From c10d38dda1774ed4540380333cabd229eff37094 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 10 Sep 2008 13:37:17 +0200
Subject: x86: some lock annotations for user copy paths

copy_to/from_user and all its variants (except the atomic ones) can take a
page fault and perform non-trivial work like taking mmap_sem and entering
the filesyste/pagecache.

Unfortunately, this often escapes lockdep because a common pattern is to
use it to read in some arguments just set up from userspace, or write data
back to a hot buffer. In those cases, it will be unlikely for page reclaim
to get a window in to cause copy_*_user to fault.

With the new might_lock primitives, add some annotations to x86. I don't
know if I caught all possible faulting points (it's a bit of a maze, and I
didn't really look at 32-bit). But this is a starting point.

Boots and runs OK so far.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/usercopy_32.c | 7 ++++++-
 arch/x86/lib/usercopy_64.c | 4 ++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 24e60944971..8eedde2a9ca 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -33,6 +33,8 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
 do {									   \
 	int __d0, __d1, __d2;						   \
 	might_sleep();							   \
+	if (current->mm)						   \
+		might_lock_read(&current->mm->mmap_sem);		   \
 	__asm__ __volatile__(						   \
 		"	testl %1,%1\n"					   \
 		"	jz 2f\n"					   \
@@ -120,6 +122,8 @@ EXPORT_SYMBOL(strncpy_from_user);
 do {									\
 	int __d0;							\
 	might_sleep();							\
+	if (current->mm)						\
+		might_lock_read(&current->mm->mmap_sem);		\
 	__asm__ __volatile__(						\
 		"0:	rep; stosl\n"					\
 		"	movl %2,%0\n"					\
@@ -148,7 +152,6 @@ do {									\
 unsigned long
 clear_user(void __user *to, unsigned long n)
 {
-	might_sleep();
 	if (access_ok(VERIFY_WRITE, to, n))
 		__do_clear_user(to, n);
 	return n;
@@ -191,6 +194,8 @@ long strnlen_user(const char __user *s, long n)
 	unsigned long res, tmp;
 
 	might_sleep();
+	if (current->mm)
+		might_lock_read(&current->mm->mmap_sem);
 
 	__asm__ __volatile__(
 		"	testl %0, %0\n"
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index f4df6e7c718..847d1294599 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -16,6 +16,8 @@
 do {									   \
 	long __d0, __d1, __d2;						   \
 	might_sleep();							   \
+	if (current->mm)						   \
+		might_lock_read(&current->mm->mmap_sem);		   \
 	__asm__ __volatile__(						   \
 		"	testq %1,%1\n"					   \
 		"	jz 2f\n"					   \
@@ -65,6 +67,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
 {
 	long __d0;
 	might_sleep();
+	if (current->mm)
+		might_lock_read(&current->mm->mmap_sem);
 	/* no memory constraint because it doesn't change any memory gcc knows
 	   about */
 	asm volatile(
-- 
cgit v1.2.3


From 3ee1afa308f2a38e5d1e2ad3752ad7abcf480da1 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 10 Sep 2008 13:37:17 +0200
Subject: x86: some lock annotations for user copy paths, v2

 - introduce might_fault()
 - handle the atomic user copy paths correctly

[ mingo@elte.hu: move might_sleep() outside of in_atomic(). ]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/usercopy_32.c | 12 +++---------
 arch/x86/lib/usercopy_64.c |  8 ++------
 2 files changed, 5 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 8eedde2a9ca..7393152a252 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -32,9 +32,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
 #define __do_strncpy_from_user(dst, src, count, res)			   \
 do {									   \
 	int __d0, __d1, __d2;						   \
-	might_sleep();							   \
-	if (current->mm)						   \
-		might_lock_read(&current->mm->mmap_sem);		   \
+	might_fault();							   \
 	__asm__ __volatile__(						   \
 		"	testl %1,%1\n"					   \
 		"	jz 2f\n"					   \
@@ -121,9 +119,7 @@ EXPORT_SYMBOL(strncpy_from_user);
 #define __do_clear_user(addr,size)					\
 do {									\
 	int __d0;							\
-	might_sleep();							\
-	if (current->mm)						\
-		might_lock_read(&current->mm->mmap_sem);		\
+	might_fault();							\
 	__asm__ __volatile__(						\
 		"0:	rep; stosl\n"					\
 		"	movl %2,%0\n"					\
@@ -193,9 +189,7 @@ long strnlen_user(const char __user *s, long n)
 	unsigned long mask = -__addr_ok(s);
 	unsigned long res, tmp;
 
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 
 	__asm__ __volatile__(
 		"	testl %0, %0\n"
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 847d1294599..64d6c84e635 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -15,9 +15,7 @@
 #define __do_strncpy_from_user(dst,src,count,res)			   \
 do {									   \
 	long __d0, __d1, __d2;						   \
-	might_sleep();							   \
-	if (current->mm)						   \
-		might_lock_read(&current->mm->mmap_sem);		   \
+	might_fault();							   \
 	__asm__ __volatile__(						   \
 		"	testq %1,%1\n"					   \
 		"	jz 2f\n"					   \
@@ -66,9 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user);
 unsigned long __clear_user(void __user *addr, unsigned long size)
 {
 	long __d0;
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	/* no memory constraint because it doesn't change any memory gcc knows
 	   about */
 	asm volatile(
-- 
cgit v1.2.3


From 1d18ef489509314506328b9e464dd47c24c1d68f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Sep 2008 20:53:21 +0200
Subject: x86: some lock annotations for user copy paths, v3

- add annotation back to clear_user()
- change probe_kernel_address() to _inatomic*() method

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/usercopy_32.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 7393152a252..fab5faba1d3 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -148,6 +148,7 @@ do {									\
 unsigned long
 clear_user(void __user *to, unsigned long n)
 {
+	might_fault();
 	if (access_ok(VERIFY_WRITE, to, n))
 		__do_clear_user(to, n);
 	return n;
-- 
cgit v1.2.3


From e45f2c07742d447597df001c878bc4a8aafcde37 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 24 Nov 2008 11:28:36 +0300
Subject: x86: correct link to HPET timer specification

Impact: update documentation / help text

Original link is dead.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f..19f0d97829e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -482,7 +482,7 @@ config HPET_TIMER
          The HPET provides a stable time base on SMP
          systems, unlike the TSC, but it is more expensive to access,
          as it is off-chip.  You can find the HPET spec at
-         <http://www.intel.com/hardwaredesign/hpetspec.htm>.
+         <http://www.intel.com/hardwaredesign/hpetspec_1.pdf>.
 
          You can safely choose Y here.  However, HPET will only be
          activated if the platform and the BIOS support this feature.
-- 
cgit v1.2.3


From 9963d1aad40946b1b6d34f9bee8d8a1b9032ae22 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 21 Nov 2008 21:07:16 +1030
Subject: [CPUFREQ] clean up speedstep-centrino and reduce cpumask_t usage

Impact: cleanup

1) The #ifdef CONFIG_HOTPLUG_CPU seems unnecessary these days.
2) The loop can simply skip over offline cpus, rather than creating a tmp mask.
3) set_mask is set to either a single cpu or all online cpus in a policy.
   Since it's just used for set_cpus_allowed(), any offline cpus in a policy
   don't matter, so we can just use cpumask_of_cpu() or the policy->cpus.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 51 +++++++++++-------------
 1 file changed, 24 insertions(+), 27 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 3b5f06423e7..f0ea6fa2f53 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -459,9 +459,7 @@ static int centrino_verify (struct cpufreq_policy *policy)
  * Sets a new CPUFreq policy.
  */
 struct allmasks {
-	cpumask_t		online_policy_cpus;
 	cpumask_t		saved_mask;
-	cpumask_t		set_mask;
 	cpumask_t		covered_cpus;
 };
 
@@ -475,9 +473,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 	int			retval = 0;
 	unsigned int		j, k, first_cpu, tmp;
 	CPUMASK_ALLOC(allmasks);
-	CPUMASK_PTR(online_policy_cpus, allmasks);
 	CPUMASK_PTR(saved_mask, allmasks);
-	CPUMASK_PTR(set_mask, allmasks);
 	CPUMASK_PTR(covered_cpus, allmasks);
 
 	if (unlikely(allmasks == NULL))
@@ -497,30 +493,28 @@ static int centrino_target (struct cpufreq_policy *policy,
 		goto out;
 	}
 
-#ifdef CONFIG_HOTPLUG_CPU
-	/* cpufreq holds the hotplug lock, so we are safe from here on */
-	cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus);
-#else
-	*online_policy_cpus = policy->cpus;
-#endif
-
 	*saved_mask = current->cpus_allowed;
 	first_cpu = 1;
 	cpus_clear(*covered_cpus);
-	for_each_cpu_mask_nr(j, *online_policy_cpus) {
+	for_each_cpu_mask_nr(j, policy->cpus) {
+		const cpumask_t *mask;
+
+		/* cpufreq holds the hotplug lock, so we are safe here */
+		if (!cpu_online(j))
+			continue;
+
 		/*
 		 * Support for SMP systems.
 		 * Make sure we are running on CPU that wants to change freq
 		 */
-		cpus_clear(*set_mask);
 		if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
-			cpus_or(*set_mask, *set_mask, *online_policy_cpus);
+			mask = &policy->cpus;
 		else
-			cpu_set(j, *set_mask);
+			mask = &cpumask_of_cpu(j);
 
-		set_cpus_allowed_ptr(current, set_mask);
+		set_cpus_allowed_ptr(current, mask);
 		preempt_disable();
-		if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) {
+		if (unlikely(!cpu_isset(smp_processor_id(), *mask))) {
 			dprintk("couldn't limit to CPUs in this domain\n");
 			retval = -EAGAIN;
 			if (first_cpu) {
@@ -548,7 +542,9 @@ static int centrino_target (struct cpufreq_policy *policy,
 			dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
 				target_freq, freqs.old, freqs.new, msr);
 
-			for_each_cpu_mask_nr(k, *online_policy_cpus) {
+			for_each_cpu_mask_nr(k, policy->cpus) {
+				if (!cpu_online(k))
+					continue;
 				freqs.cpu = k;
 				cpufreq_notify_transition(&freqs,
 					CPUFREQ_PRECHANGE);
@@ -571,7 +567,9 @@ static int centrino_target (struct cpufreq_policy *policy,
 		preempt_enable();
 	}
 
-	for_each_cpu_mask_nr(k, *online_policy_cpus) {
+	for_each_cpu_mask_nr(k, policy->cpus) {
+		if (!cpu_online(k))
+			continue;
 		freqs.cpu = k;
 		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 	}
@@ -584,18 +582,17 @@ static int centrino_target (struct cpufreq_policy *policy,
 		 * Best effort undo..
 		 */
 
-		if (!cpus_empty(*covered_cpus))
-			for_each_cpu_mask_nr(j, *covered_cpus) {
-				set_cpus_allowed_ptr(current,
-						     &cpumask_of_cpu(j));
-				wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
-			}
+		for_each_cpu_mask_nr(j, *covered_cpus) {
+			set_cpus_allowed_ptr(current, &cpumask_of_cpu(j));
+			wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+		}
 
 		tmp = freqs.new;
 		freqs.new = freqs.old;
 		freqs.old = tmp;
-		for_each_cpu_mask_nr(j, *online_policy_cpus) {
-			freqs.cpu = j;
+		for_each_cpu_mask_nr(j, policy->cpus) {
+			if (!cpu_online(j))
+				continue;
 			cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 			cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 		}
-- 
cgit v1.2.3


From 10db2e5cbda5b4e13d2e2f134b963bee2e129999 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Fri, 17 Oct 2008 22:52:04 +0200
Subject: [CPUFREQ] p4-clockmod: reduce noise

On those CPUs which are SpeedStep (EST) capable, we do not care at all if
p4-clockmod does not work, since a technically superior CPU frequency
management technology is to be used.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index b8e05ee4f73..ba3a94a997c 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -171,7 +171,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
 	}
 
 	if (c->x86 != 0xF) {
-		printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n");
+		if (!cpu_has(c, X86_FEATURE_EST))
+			printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. "
+				"Please send an e-mail to <cpufreq@vger.kernel.org>\n");
 		return 0;
 	}
 
-- 
cgit v1.2.3


From e088e4c9cdb618675874becb91b2fd581ee707e6 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 25 Nov 2008 13:29:47 -0500
Subject: [CPUFREQ] Disable sysfs ui for p4-clockmod.

p4-clockmod has a long history of abuse.   It pretends to be a CPU
frequency scaling driver, even though it doesn't actually change
the CPU frequency, but instead just modulates the frequency with
wait-states.
The biggest misconception is that when running at the lower 'frequency'
p4-clockmod is saving power.  This isn't the case, as workloads running
slower take longer to complete, preventing the CPU from entering deep C states.

However p4-clockmod does have a purpose.  It can prevent overheating.
Having it hooked up to the cpufreq interfaces is the wrong way to achieve
cooling however. It should instead be hooked up to ACPI.

This diff introduces a means for a cpufreq driver to register with the
cpufreq core, but not present a sysfs interface.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index ba3a94a997c..0c43b224051 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -276,6 +276,7 @@ static struct cpufreq_driver p4clockmod_driver = {
 	.name		= "p4-clockmod",
 	.owner		= THIS_MODULE,
 	.attr		= p4clockmod_attr,
+	.hide_interface	= 1,
 };
 
 
-- 
cgit v1.2.3


From c60e19eb21d9a0fb0d78969884f32d88354abca9 Mon Sep 17 00:00:00 2001
From: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Date: Sat, 15 Nov 2008 17:02:46 -0200
Subject: [CPUFREQ] add to speedstep-lib additional fsb values for core
 processors

Add additional fsb values to pentium_core_get_frequency, from latest edition
(September 2008) of Intel 64 and IA-32 Architectures Software Develper's Manual,
Volume 3B: System Programming Guide, Part 2. Values added are to detect 800,
1067 and 1333 FSB types.

Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index 98d4fdb7dc0..cdac7d62369 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -139,6 +139,15 @@ static unsigned int pentium_core_get_frequency(void)
 	case 3:
 		fsb = 166667;
 		break;
+	case 2:
+		fsb = 200000;
+		break;
+	case 0:
+		fsb = 266667;
+		break;
+	case 4:
+		fsb = 333333;
+		break;
 	default:
 		printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
 	}
-- 
cgit v1.2.3


From 8529154ec3f3ac20344c65b7a040c604c7af7651 Mon Sep 17 00:00:00 2001
From: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Date: Sat, 15 Nov 2008 17:02:46 -0200
Subject: [CPUFREQ] Add Celeron Core support to p4-clockmod.

Add Celeron Core support to p4-clockmod.

Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 0c43b224051..beea4466b06 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -160,6 +160,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
 		switch (c->x86_model) {
 		case 0x0E: /* Core */
 		case 0x0F: /* Core Duo */
+		case 0x16: /* Celeron Core */
 			p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
 			return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE);
 		case 0x0D: /* Pentium M (Dothan) */
-- 
cgit v1.2.3


From 0b8f1efad30bd58f89961b82dfe68b9edf8fd2ac Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 5 Dec 2008 18:58:31 -0800
Subject: sparse irq_desc[] array: core kernel and x86 changes

Impact: new feature

Problem on distro kernels: irq_desc[NR_IRQS] takes megabytes of RAM with
NR_CPUS set to large values. The goal is to be able to scale up to much
larger NR_IRQS value without impacting the (important) common case.

To solve this, we generalize irq_desc[NR_IRQS] to an (optional) array of
irq_desc pointers.

When CONFIG_SPARSE_IRQ=y is used, we use kzalloc_node to get irq_desc,
this also makes the IRQ descriptors NUMA-local (to the site that calls
request_irq()).

This gets rid of the irq_cfg[] static array on x86 as well: irq_cfg now
uses desc->chip_data for x86 to store irq_cfg.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                   |  10 ++
 arch/x86/include/asm/irq_vectors.h |   9 ++
 arch/x86/kernel/io_apic.c          | 301 +++++++++++++++++++++++--------------
 arch/x86/kernel/irq.c              |   3 +
 arch/x86/kernel/irq_32.c           |   2 +
 arch/x86/kernel/irq_64.c           |   2 +
 arch/x86/kernel/irqinit_32.c       |   1 -
 arch/x86/kernel/irqinit_64.c       |   1 -
 8 files changed, 216 insertions(+), 113 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f..48ac688de3c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -238,6 +238,16 @@ config X86_HAS_BOOT_CPU_ID
 	def_bool y
 	depends on X86_VOYAGER
 
+config SPARSE_IRQ
+	bool "Support sparse irq numbering"
+	depends on PCI_MSI || HT_IRQ
+	default y
+	help
+	  This enables support for sparse irq, esp for msi/msi-x. You may need
+	  if you have lots of cards supports msi-x installed.
+
+	  If you don't know what to do here, say Y.
+
 config X86_FIND_SMP_CONFIG
 	def_bool y
 	depends on X86_MPPARSE || X86_VOYAGER
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 0005adb0f94..bb6b69a6b12 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,11 +102,20 @@
 #define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
 
 #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
+
+#ifndef CONFIG_SPARSE_IRQ
 # if NR_CPUS < MAX_IO_APICS
 #  define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
 # else
 #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
 # endif
+#else
+# if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
+#  define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
+# else
+#  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
+# endif
+#endif
 
 #elif defined(CONFIG_X86_VOYAGER)
 
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9043251210f..9de17f5c112 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -108,8 +108,33 @@ static int __init parse_noapic(char *str)
 early_param("noapic", parse_noapic);
 
 struct irq_pin_list;
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+struct irq_pin_list {
+	int apic, pin;
+	struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+{
+	struct irq_pin_list *pin;
+	int node;
+
+	node = cpu_to_node(cpu);
+
+	pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+	printk(KERN_DEBUG "  alloc irq_2_pin on cpu %d node %d\n", cpu, node);
+
+	return pin;
+}
+
 struct irq_cfg {
-	unsigned int irq;
 	struct irq_pin_list *irq_2_pin;
 	cpumask_t domain;
 	cpumask_t old_domain;
@@ -119,83 +144,93 @@ struct irq_cfg {
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_cfg irq_cfgx[] = {
+#else
 static struct irq_cfg irq_cfgx[NR_IRQS] = {
-	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-	[3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-	[4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-	[5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-	[6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-	[7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-	[8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-	[9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-	[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-	[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-	[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-	[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-	[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+#endif
+	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+	[3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+	[4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+	[5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+	[6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+	[7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+	[8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+	[9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+	[10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+	[11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+	[12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+	[13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+	[14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
-#define for_each_irq_cfg(irq, cfg)		\
-	for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
-
-static struct irq_cfg *irq_cfg(unsigned int irq)
+void __init arch_early_irq_init(void)
 {
-	return irq < nr_irqs ? irq_cfgx + irq : NULL;
-}
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
+	int count;
+	int i;
 
-static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
-{
-	return irq_cfg(irq);
-}
+	cfg = irq_cfgx;
+	count = ARRAY_SIZE(irq_cfgx);
 
-/*
- * Rough estimation of how many shared IRQs there are, can be changed
- * anytime.
- */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+	for (i = 0; i < count; i++) {
+		desc = irq_to_desc(i);
+		desc->chip_data = &cfg[i];
+	}
+}
 
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	struct irq_cfg *cfg = NULL;
+	struct irq_desc *desc;
 
-struct irq_pin_list {
-	int apic, pin;
-	struct irq_pin_list *next;
-};
+	desc = irq_to_desc(irq);
+	if (desc)
+		cfg = desc->chip_data;
 
-static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
-static struct irq_pin_list *irq_2_pin_ptr;
+	return cfg;
+}
 
-static void __init irq_2_pin_init(void)
+static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 {
-	struct irq_pin_list *pin = irq_2_pin_head;
-	int i;
+	struct irq_cfg *cfg;
+	int node;
+
+	node = cpu_to_node(cpu);
 
-	for (i = 1; i < PIN_MAP_SIZE; i++)
-		pin[i-1].next = &pin[i];
+	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+	printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
 
-	irq_2_pin_ptr = &pin[0];
+	return cfg;
 }
 
-static struct irq_pin_list *get_one_free_irq_2_pin(void)
+void arch_init_chip_data(struct irq_desc *desc, int cpu)
 {
-	struct irq_pin_list *pin = irq_2_pin_ptr;
+	struct irq_cfg *cfg;
 
-	if (!pin)
-		panic("can not get more irq_2_pin\n");
+	cfg = desc->chip_data;
+	if (!cfg) {
+		desc->chip_data = get_one_free_irq_cfg(cpu);
+		if (!desc->chip_data) {
+			printk(KERN_ERR "can not alloc irq_cfg\n");
+			BUG_ON(1);
+		}
+	}
+}
 
-	irq_2_pin_ptr = pin->next;
-	pin->next = NULL;
-	return pin;
+#else
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	return irq < nr_irqs ? irq_cfgx + irq : NULL;
 }
 
+#endif
+
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -397,16 +432,19 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin)
 {
-	struct irq_cfg *cfg;
 	struct irq_pin_list *entry;
+	struct irq_cfg *cfg = irq_cfg(irq);
 
-	/* first time to refer irq_cfg, so with new */
-	cfg = irq_cfg_alloc(irq);
 	entry = cfg->irq_2_pin;
 	if (!entry) {
-		entry = get_one_free_irq_2_pin();
+		entry = get_one_free_irq_2_pin(cpu);
+		if (!entry) {
+			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+					apic, pin);
+			return;
+		}
 		cfg->irq_2_pin = entry;
 		entry->apic = apic;
 		entry->pin = pin;
@@ -421,7 +459,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 		entry = entry->next;
 	}
 
-	entry->next = get_one_free_irq_2_pin();
+	entry->next = get_one_free_irq_2_pin(cpu);
 	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
@@ -430,7 +468,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq(unsigned int irq,
+static void __init replace_pin_at_irq(unsigned int irq, int cpu,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
@@ -451,7 +489,7 @@ static void __init replace_pin_at_irq(unsigned int irq,
 
 	/* why? call replace before add? */
 	if (!replaced)
-		add_pin_to_irq(irq, newapic, newpin);
+		add_pin_to_irq_cpu(irq, cpu, newapic, newpin);
 }
 
 static inline void io_apic_modify_irq(unsigned int irq,
@@ -1162,9 +1200,13 @@ void __setup_vector_irq(int cpu)
 	/* This function must be called with vector_lock held */
 	int irq, vector;
 	struct irq_cfg *cfg;
+	struct irq_desc *desc;
 
 	/* Mark the inuse vectors */
-	for_each_irq_cfg(irq, cfg) {
+	for_each_irq_desc(irq, desc) {
+		if (!desc)
+			continue;
+		cfg = desc->chip_data;
 		if (!cpu_isset(cpu, cfg->domain))
 			continue;
 		vector = cfg->vector;
@@ -1356,6 +1398,8 @@ static void __init setup_IO_APIC_irqs(void)
 {
 	int apic, pin, idx, irq;
 	int notcon = 0;
+	struct irq_desc *desc;
+	int cpu = boot_cpu_id;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
@@ -1387,7 +1431,12 @@ static void __init setup_IO_APIC_irqs(void)
 			if (multi_timer_check(apic, irq))
 				continue;
 #endif
-			add_pin_to_irq(irq, apic, pin);
+			desc = irq_to_desc_alloc_cpu(irq, cpu);
+			if (!desc) {
+				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+				continue;
+			}
+			add_pin_to_irq_cpu(irq, cpu, apic, pin);
 
 			setup_IO_APIC_irq(apic, pin, irq,
 					irq_trigger(idx), irq_polarity(idx));
@@ -1448,6 +1497,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	union IO_APIC_reg_03 reg_03;
 	unsigned long flags;
 	struct irq_cfg *cfg;
+	struct irq_desc *desc;
 	unsigned int irq;
 
 	if (apic_verbosity == APIC_QUIET)
@@ -1537,8 +1587,13 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for_each_irq_cfg(irq, cfg) {
-		struct irq_pin_list *entry = cfg->irq_2_pin;
+	for_each_irq_desc(irq, desc) {
+		struct irq_pin_list *entry;
+
+		if (!desc)
+			continue;
+		cfg = desc->chip_data;
+		entry = cfg->irq_2_pin;
 		if (!entry)
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", irq);
@@ -2022,6 +2077,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 {
 	int was_pending = 0;
 	unsigned long flags;
+	struct irq_cfg *cfg;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	if (irq < 16) {
@@ -2029,6 +2085,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 		if (i8259A_irq_pending(irq))
 			was_pending = 1;
 	}
+	cfg = irq_cfg(irq);
 	__unmask_IO_APIC_irq(irq);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
@@ -2178,6 +2235,9 @@ static void ir_irq_migration(struct work_struct *work)
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
+		if (!desc)
+			continue;
+
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
 
@@ -2229,6 +2289,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		struct irq_cfg *cfg;
 		irq = __get_cpu_var(vector_irq)[vector];
 
+		if (irq == -1)
+			continue;
+
 		desc = irq_to_desc(irq);
 		if (!desc)
 			continue;
@@ -2430,8 +2493,12 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for_each_irq_cfg(irq, cfg) {
-		if (IO_APIC_IRQ(irq) && !cfg->vector) {
+	for_each_irq_desc(irq, desc) {
+		if (!desc)
+			continue;
+
+		cfg = desc->chip_data;
+		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -2439,11 +2506,9 @@ static inline void init_IO_APIC_traps(void)
 			 */
 			if (irq < 16)
 				make_8259A_irq(irq);
-			else {
-				desc = irq_to_desc(irq);
+			else
 				/* Strange. Oh, well.. */
 				desc->chip = &no_irq_chip;
-			}
 		}
 	}
 }
@@ -2654,7 +2719,7 @@ static inline void __init check_timer(void)
 		 * Ok, does IRQ0 through the IOAPIC work?
 		 */
 		if (no_pin1) {
-			add_pin_to_irq(0, apic1, pin1);
+			add_pin_to_irq_cpu(0, boot_cpu_id, apic1, pin1);
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		}
 		unmask_IO_APIC_irq(0);
@@ -2683,7 +2748,7 @@ static inline void __init check_timer(void)
 		/*
 		 * legacy devices should be connected to IO APIC #0
 		 */
-		replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+		replace_pin_at_irq(0, boot_cpu_id, apic1, pin1, apic2, pin2);
 		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
 		unmask_IO_APIC_irq(0);
 		enable_8259A_irq(0);
@@ -2902,21 +2967,25 @@ unsigned int create_irq_nr(unsigned int irq_want)
 	unsigned int irq;
 	unsigned int new;
 	unsigned long flags;
-	struct irq_cfg *cfg_new;
-
-	irq_want = nr_irqs - 1;
+	struct irq_cfg *cfg_new = NULL;
+	int cpu = boot_cpu_id;
+	struct irq_desc *desc_new = NULL;
 
 	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = irq_want; new > 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
-		cfg_new = irq_cfg(new);
-		if (cfg_new && cfg_new->vector != 0)
+
+		desc_new = irq_to_desc_alloc_cpu(new, cpu);
+		if (!desc_new) {
+			printk(KERN_INFO "can not get irq_desc for %d\n", new);
+			continue;
+		}
+		cfg_new = desc_new->chip_data;
+
+		if (cfg_new->vector != 0)
 			continue;
-		/* check if need to create one */
-		if (!cfg_new)
-			cfg_new = irq_cfg_alloc(new);
 		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
 			irq = new;
 		break;
@@ -2925,6 +2994,9 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 	if (irq > 0) {
 		dynamic_irq_init(irq);
+		/* restore it, in case dynamic_irq_init clear it */
+		if (desc_new)
+			desc_new->chip_data = cfg_new;
 	}
 	return irq;
 }
@@ -2944,8 +3016,16 @@ int create_irq(void)
 void destroy_irq(unsigned int irq)
 {
 	unsigned long flags;
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
 
+	/* store it, in case dynamic_irq_cleanup clear it */
+	desc = irq_to_desc(irq);
+	cfg = desc->chip_data;
 	dynamic_irq_cleanup(irq);
+	/* connect back irq_cfg */
+	if (desc)
+		desc->chip_data = cfg;
 
 #ifdef CONFIG_INTR_REMAP
 	free_irte(irq);
@@ -3195,26 +3275,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 	return 0;
 }
 
-static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
-{
-	unsigned int irq;
-
-	irq = dev->bus->number;
-	irq <<= 8;
-	irq |= dev->devfn;
-	irq <<= 12;
-
-	return irq;
-}
-
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
 {
 	unsigned int irq;
 	int ret;
 	unsigned int irq_want;
 
-	irq_want = build_irq_for_pci_dev(dev) + 0x100;
-
+	irq_want = nr_irqs - 1;
 	irq = create_irq_nr(irq_want);
 	if (irq == 0)
 		return -1;
@@ -3228,7 +3295,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 		goto error;
 no_ir:
 #endif
-	ret = setup_msi_irq(dev, desc, irq);
+	ret = setup_msi_irq(dev, msidesc, irq);
 	if (ret < 0) {
 		destroy_irq(irq);
 		return ret;
@@ -3246,7 +3313,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
 	unsigned int irq;
 	int ret, sub_handle;
-	struct msi_desc *desc;
+	struct msi_desc *msidesc;
 	unsigned int irq_want;
 
 #ifdef CONFIG_INTR_REMAP
@@ -3254,10 +3321,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	int index = 0;
 #endif
 
-	irq_want = build_irq_for_pci_dev(dev) + 0x100;
+	irq_want = nr_irqs - 1;
 	sub_handle = 0;
-	list_for_each_entry(desc, &dev->msi_list, list) {
-		irq = create_irq_nr(irq_want--);
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		irq = create_irq_nr(irq_want);
+		irq_want--;
 		if (irq == 0)
 			return -1;
 #ifdef CONFIG_INTR_REMAP
@@ -3289,7 +3357,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		}
 no_ir:
 #endif
-		ret = setup_msi_irq(dev, desc, irq);
+		ret = setup_msi_irq(dev, msidesc, irq);
 		if (ret < 0)
 			goto error;
 		sub_handle++;
@@ -3707,17 +3775,29 @@ int __init io_apic_get_version(int ioapic)
 
 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
 {
+	struct irq_desc *desc;
+	struct irq_cfg *cfg;
+	int cpu = boot_cpu_id;
+
 	if (!IO_APIC_IRQ(irq)) {
 		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
 			ioapic);
 		return -EINVAL;
 	}
 
+	desc = irq_to_desc_alloc_cpu(irq, cpu);
+	if (!desc) {
+		printk(KERN_INFO "can not get irq_desc %d\n", irq);
+		return 0;
+	}
+
 	/*
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 */
-	if (irq >= 16)
-		add_pin_to_irq(irq, ioapic, pin);
+	if (irq >= 16) {
+		cfg = desc->chip_data;
+		add_pin_to_irq_cpu(irq, cpu, ioapic, pin);
+	}
 
 	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
 
@@ -3773,7 +3853,8 @@ void __init setup_ioapic_dest(void)
 			 * when you have too many devices, because at that time only boot
 			 * cpu is online.
 			 */
-			cfg = irq_cfg(irq);
+			desc = irq_to_desc(irq);
+			cfg = desc->chip_data;
 			if (!cfg->vector) {
 				setup_IO_APIC_irq(ioapic, pin, irq,
 						  irq_trigger(irq_entry),
@@ -3785,7 +3866,6 @@ void __init setup_ioapic_dest(void)
 			/*
 			 * Honour affinities which have been set in early boot
 			 */
-			desc = irq_to_desc(irq);
 			if (desc->status &
 			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
 				mask = desc->affinity;
@@ -3846,7 +3926,6 @@ void __init ioapic_init_mappings(void)
 	struct resource *ioapic_res;
 	int i;
 
-	irq_2_pin_init();
 	ioapic_res = ioapic_setup_resources();
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d1d4dc52f64..3f1d9d18df6 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -118,6 +118,9 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	desc = irq_to_desc(i);
+	if (!desc)
+		return 0;
+
 	spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
 	any_count = kstat_irqs(i);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a51382672de..119fc9c8ff7 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -242,6 +242,8 @@ void fixup_irqs(cpumask_t map)
 	for_each_irq_desc(irq, desc) {
 		cpumask_t mask;
 
+		if (!desc)
+			continue;
 		if (irq == 2)
 			continue;
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a..900009c7059 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -94,6 +94,8 @@ void fixup_irqs(cpumask_t map)
 		int break_affinity = 0;
 		int set_affinity = 1;
 
+		if (!desc)
+			continue;
 		if (irq == 2)
 			continue;
 
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e8..5a5651b7f9e 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -69,7 +69,6 @@ void __init init_ISA_irqs (void)
 	 * 16 old-style INTA-cycle interrupts:
 	 */
 	for (i = 0; i < 16; i++) {
-		/* first time call this irq_desc */
 		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff023539128..cd9f42d028d 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -143,7 +143,6 @@ void __init init_ISA_irqs(void)
 	init_8259A(0);
 
 	for (i = 0; i < 16; i++) {
-		/* first time call this irq_desc */
 		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
-- 
cgit v1.2.3


From 99d093d12897562a253540a902bbf65ec16042ac Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 5 Dec 2008 18:58:32 -0800
Subject: x86: use NR_IRQS_LEGACY

Impact: cleanup

Introduce NR_IRQS_LEGACY instead of hard coded number.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/irq_vectors.h |  2 ++
 arch/x86/kernel/io_apic.c          | 10 +++++-----
 arch/x86/kernel/irqinit_32.c       |  2 +-
 arch/x86/kernel/irqinit_64.c       |  2 +-
 4 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index bb6b69a6b12..f7ff65032b9 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -101,6 +101,8 @@
 #define LAST_VM86_IRQ		15
 #define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
 
+#define NR_IRQS_LEGACY		16
+
 #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
 
 #ifndef CONFIG_SPARSE_IRQ
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9de17f5c112..9870461ae93 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -847,7 +847,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
  */
 static int EISA_ELCR(unsigned int irq)
 {
-	if (irq < 16) {
+	if (irq < NR_IRQS_LEGACY) {
 		unsigned int port = 0x4d0 + (irq >> 3);
 		return (inb(port) >> (irq & 7)) & 1;
 	}
@@ -1388,7 +1388,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 	}
 
 	ioapic_register_intr(irq, trigger);
-	if (irq < 16)
+	if (irq < NR_IRQS_LEGACY)
 		disable_8259A_irq(irq);
 
 	ioapic_write_entry(apic, pin, entry);
@@ -2080,7 +2080,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 	struct irq_cfg *cfg;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	if (irq < 16) {
+	if (irq < NR_IRQS_LEGACY) {
 		disable_8259A_irq(irq);
 		if (i8259A_irq_pending(irq))
 			was_pending = 1;
@@ -2504,7 +2504,7 @@ static inline void init_IO_APIC_traps(void)
 			 * so default to an old-fashioned 8259
 			 * interrupt if we can..
 			 */
-			if (irq < 16)
+			if (irq < NR_IRQS_LEGACY)
 				make_8259A_irq(irq);
 			else
 				/* Strange. Oh, well.. */
@@ -3794,7 +3794,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
 	/*
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 */
-	if (irq >= 16) {
+	if (irq >= NR_IRQS_LEGACY) {
 		cfg = desc->chip_data;
 		add_pin_to_irq_cpu(irq, cpu, ioapic, pin);
 	}
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 5a5651b7f9e..6a92f47c52e 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -68,7 +68,7 @@ void __init init_ISA_irqs (void)
 	/*
 	 * 16 old-style INTA-cycle interrupts:
 	 */
-	for (i = 0; i < 16; i++) {
+	for (i = 0; i < NR_IRQS_LEGACY; i++) {
 		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index cd9f42d028d..40c1e62ec78 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,7 +142,7 @@ void __init init_ISA_irqs(void)
 	init_bsp_APIC();
 	init_8259A(0);
 
-	for (i = 0; i < 16; i++) {
+	for (i = 0; i < NR_IRQS_LEGACY; i++) {
 		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
-- 
cgit v1.2.3


From be5d5350a937cd8513b258739f1099420129e96f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 5 Dec 2008 18:58:33 -0800
Subject: x86: MSI start irq numbering from nr_irqs_gsi

Impact: sanitize MSI irq number ordering from top-down to bottom-up

Increase new MSI IRQs starting from nr_irqs_gsi (which is somewhere below
256), instead of decreasing from NR_IRQS. (The latter method can result
in confusingly high IRQ numbers - if NR_CPUS is set to a high value and
NR_IRQS scales up to a high value.)

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/io_apic.h |  2 +-
 arch/x86/kernel/io_apic.c      | 24 +++++++++++++++++-------
 arch/x86/kernel/setup.c        |  2 +-
 3 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 6afd9933a7d..b35e94c2d6d 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -188,7 +188,7 @@ extern void restore_IO_APIC_setup(void);
 extern void reinit_intr_remapped_IO_APIC(int);
 #endif
 
-extern int probe_nr_irqs(void);
+extern void probe_nr_irqs_gsi(void);
 
 #else  /* !CONFIG_X86_IO_APIC */
 #define io_apic_assign_pci_irqs 0
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9870461ae93..0dcde74abd1 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2973,7 +2973,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
-	for (new = irq_want; new > 0; new--) {
+	for (new = irq_want; new < NR_IRQS; new++) {
 		if (platform_legacy_irq(new))
 			continue;
 
@@ -3001,11 +3001,14 @@ unsigned int create_irq_nr(unsigned int irq_want)
 	return irq;
 }
 
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
 int create_irq(void)
 {
+	unsigned int irq_want;
 	int irq;
 
-	irq = create_irq_nr(nr_irqs - 1);
+	irq_want = nr_irqs_gsi;
+	irq = create_irq_nr(irq_want);
 
 	if (irq == 0)
 		irq = -1;
@@ -3281,7 +3284,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
 	int ret;
 	unsigned int irq_want;
 
-	irq_want = nr_irqs - 1;
+	irq_want = nr_irqs_gsi;
 	irq = create_irq_nr(irq_want);
 	if (irq == 0)
 		return -1;
@@ -3321,11 +3324,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	int index = 0;
 #endif
 
-	irq_want = nr_irqs - 1;
+	irq_want = nr_irqs_gsi;
 	sub_handle = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
 		irq = create_irq_nr(irq_want);
-		irq_want--;
+		irq_want++;
 		if (irq == 0)
 			return -1;
 #ifdef CONFIG_INTR_REMAP
@@ -3674,9 +3677,16 @@ int __init io_apic_get_redir_entries (int ioapic)
 	return reg_01.bits.entries;
 }
 
-int __init probe_nr_irqs(void)
+void __init probe_nr_irqs_gsi(void)
 {
-	return NR_IRQS;
+	int idx;
+	int nr = 0;
+
+	for (idx = 0; idx < nr_ioapics; idx++)
+		nr += io_apic_get_redir_entries(idx) + 1;
+
+	if (nr > nr_irqs_gsi)
+		nr_irqs_gsi = nr;
 }
 
 /* --------------------------------------------------------------------------
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9d5674f7b6c..a3834f12320 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1082,7 +1082,7 @@ void __init setup_arch(char **cmdline_p)
 	ioapic_init_mappings();
 
 	/* need to wait for io_apic is mapped */
-	nr_irqs = probe_nr_irqs();
+	probe_nr_irqs_gsi();
 
 	kvm_guest_init();
 
-- 
cgit v1.2.3


From 3145e941fcfe2548fa2270afb1a05bab3a6bc418 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 5 Dec 2008 18:58:34 -0800
Subject: x86, MSI: pass irq_cfg and irq_desc

Impact: simplify code

Pass irq_desc and cfg around, instead of raw IRQ numbers - this way
we dont have to look it up again and again.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 318 ++++++++++++++++++++++++++--------------------
 1 file changed, 181 insertions(+), 137 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 0dcde74abd1..a1a2e070f31 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -231,6 +231,10 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 
 #endif
 
+static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+{
+}
+
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -272,11 +276,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
 	writel(value, &io_apic->data);
 }
 
-static bool io_apic_level_ack_pending(unsigned int irq)
+static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
 {
 	struct irq_pin_list *entry;
 	unsigned long flags;
-	struct irq_cfg *cfg = irq_cfg(irq);
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	entry = cfg->irq_2_pin;
@@ -358,13 +361,12 @@ static void ioapic_mask_entry(int apic, int pin)
 }
 
 #ifdef CONFIG_SMP
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
 {
 	int apic, pin;
-	struct irq_cfg *cfg;
 	struct irq_pin_list *entry;
+	u8 vector = cfg->vector;
 
-	cfg = irq_cfg(irq);
 	entry = cfg->irq_2_pin;
 	for (;;) {
 		unsigned int reg;
@@ -394,24 +396,27 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 	}
 }
 
-static int assign_irq_vector(int irq, cpumask_t mask);
+static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
 
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
 {
 	struct irq_cfg *cfg;
 	unsigned long flags;
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
+	unsigned int irq;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
-	cfg = irq_cfg(irq);
-	if (assign_irq_vector(irq, mask))
+	irq = desc->irq;
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 	/*
@@ -419,12 +424,20 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	 */
 	dest = SET_APIC_LOGICAL_ID(dest);
 
-	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&ioapic_lock, flags);
-	__target_IO_APIC_irq(irq, dest, cfg->vector);
+	__target_IO_APIC_irq(irq, dest, cfg);
 	desc->affinity = mask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
+
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	set_ioapic_affinity_irq_desc(desc, mask);
+}
 #endif /* CONFIG_SMP */
 
 /*
@@ -432,10 +445,9 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin)
+static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
 {
 	struct irq_pin_list *entry;
-	struct irq_cfg *cfg = irq_cfg(irq);
 
 	entry = cfg->irq_2_pin;
 	if (!entry) {
@@ -468,11 +480,10 @@ static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin)
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq(unsigned int irq, int cpu,
+static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
 	struct irq_pin_list *entry = cfg->irq_2_pin;
 	int replaced = 0;
 
@@ -489,18 +500,16 @@ static void __init replace_pin_at_irq(unsigned int irq, int cpu,
 
 	/* why? call replace before add? */
 	if (!replaced)
-		add_pin_to_irq_cpu(irq, cpu, newapic, newpin);
+		add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
 }
 
-static inline void io_apic_modify_irq(unsigned int irq,
+static inline void io_apic_modify_irq(struct irq_cfg *cfg,
 				int mask_and, int mask_or,
 				void (*final)(struct irq_pin_list *entry))
 {
 	int pin;
-	struct irq_cfg *cfg;
 	struct irq_pin_list *entry;
 
-	cfg = irq_cfg(irq);
 	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
 		unsigned int reg;
 		pin = entry->pin;
@@ -513,9 +522,9 @@ static inline void io_apic_modify_irq(unsigned int irq,
 	}
 }
 
-static void __unmask_IO_APIC_irq(unsigned int irq)
+static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
 }
 
 #ifdef CONFIG_X86_64
@@ -530,47 +539,64 @@ void io_apic_sync(struct irq_pin_list *entry)
 	readl(&io_apic->data);
 }
 
-static void __mask_IO_APIC_irq(unsigned int irq)
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 }
 #else /* CONFIG_X86_32 */
-static void __mask_IO_APIC_irq(unsigned int irq)
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
 }
 
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
 			IO_APIC_REDIR_MASKED, NULL);
 }
 
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
 			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
 }
 #endif /* CONFIG_X86_32 */
 
-static void mask_IO_APIC_irq (unsigned int irq)
+static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
+	struct irq_cfg *cfg = desc->chip_data;
 	unsigned long flags;
 
+	BUG_ON(!cfg);
+
 	spin_lock_irqsave(&ioapic_lock, flags);
-	__mask_IO_APIC_irq(irq);
+	__mask_IO_APIC_irq(cfg);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-static void unmask_IO_APIC_irq (unsigned int irq)
+static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
+	struct irq_cfg *cfg = desc->chip_data;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	__unmask_IO_APIC_irq(irq);
+	__unmask_IO_APIC_irq(cfg);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
+static void mask_IO_APIC_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	mask_IO_APIC_irq_desc(desc);
+}
+static void unmask_IO_APIC_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	unmask_IO_APIC_irq_desc(desc);
+}
+
 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 {
 	struct IO_APIC_route_entry entry;
@@ -1072,7 +1098,7 @@ void unlock_vector_lock(void)
 	spin_unlock(&vector_lock);
 }
 
-static int __assign_irq_vector(int irq, cpumask_t mask)
+static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
 {
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1088,16 +1114,13 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
 	unsigned int old_vector;
 	int cpu;
-	struct irq_cfg *cfg;
 
-	cfg = irq_cfg(irq);
+	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+		return -EBUSY;
 
 	/* Only try and allocate irqs on cpus that are present */
 	cpus_and(mask, mask, cpu_online_map);
 
-	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
-		return -EBUSY;
-
 	old_vector = cfg->vector;
 	if (old_vector) {
 		cpumask_t tmp;
@@ -1151,24 +1174,22 @@ next:
 	return -ENOSPC;
 }
 
-static int assign_irq_vector(int irq, cpumask_t mask)
+static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
 {
 	int err;
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, mask);
+	err = __assign_irq_vector(irq, cfg, mask);
 	spin_unlock_irqrestore(&vector_lock, flags);
 	return err;
 }
 
-static void __clear_irq_vector(int irq)
+static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
 {
-	struct irq_cfg *cfg;
 	cpumask_t mask;
 	int cpu, vector;
 
-	cfg = irq_cfg(irq);
 	BUG_ON(!cfg->vector);
 
 	vector = cfg->vector;
@@ -1257,11 +1278,8 @@ static inline int IO_APIC_irq_trigger(int irq)
 }
 #endif
 
-static void ioapic_register_intr(int irq, unsigned long trigger)
+static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
 {
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
 
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 	    trigger == IOAPIC_LEVEL)
@@ -1353,7 +1371,7 @@ static int setup_ioapic_entry(int apic, int irq,
 	return 0;
 }
 
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
 			      int trigger, int polarity)
 {
 	struct irq_cfg *cfg;
@@ -1363,10 +1381,10 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 	if (!IO_APIC_IRQ(irq))
 		return;
 
-	cfg = irq_cfg(irq);
+	cfg = desc->chip_data;
 
 	mask = TARGET_CPUS;
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
 	cpus_and(mask, cfg->domain, mask);
@@ -1383,11 +1401,11 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 			       cfg->vector)) {
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic].mp_apicid, pin);
-		__clear_irq_vector(irq);
+		__clear_irq_vector(irq, cfg);
 		return;
 	}
 
-	ioapic_register_intr(irq, trigger);
+	ioapic_register_intr(irq, desc, trigger);
 	if (irq < NR_IRQS_LEGACY)
 		disable_8259A_irq(irq);
 
@@ -1399,6 +1417,7 @@ static void __init setup_IO_APIC_irqs(void)
 	int apic, pin, idx, irq;
 	int notcon = 0;
 	struct irq_desc *desc;
+	struct irq_cfg *cfg;
 	int cpu = boot_cpu_id;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
@@ -1436,9 +1455,10 @@ static void __init setup_IO_APIC_irqs(void)
 				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
 				continue;
 			}
-			add_pin_to_irq_cpu(irq, cpu, apic, pin);
+			cfg = desc->chip_data;
+			add_pin_to_irq_cpu(cfg, cpu, apic, pin);
 
-			setup_IO_APIC_irq(apic, pin, irq,
+			setup_IO_APIC_irq(apic, pin, irq, desc,
 					irq_trigger(idx), irq_polarity(idx));
 		}
 	}
@@ -2086,7 +2106,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 			was_pending = 1;
 	}
 	cfg = irq_cfg(irq);
-	__unmask_IO_APIC_irq(irq);
+	__unmask_IO_APIC_irq(cfg);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return was_pending;
@@ -2149,35 +2169,37 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
  * as simple as edge triggered migration and we can do the irq migration
  * with a simple atomic update to IO-APIC RTE.
  */
-static void migrate_ioapic_irq(int irq, cpumask_t mask)
+static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
 {
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
 	int modify_ioapic_rte;
 	unsigned int dest;
 	unsigned long flags;
+	unsigned int irq;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
+	irq = desc->irq;
 	if (get_irte(irq, &irte))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
-	desc = irq_to_desc(irq);
 	modify_ioapic_rte = desc->status & IRQ_LEVEL;
 	if (modify_ioapic_rte) {
 		spin_lock_irqsave(&ioapic_lock, flags);
-		__target_IO_APIC_irq(irq, dest, cfg->vector);
+		__target_IO_APIC_irq(irq, dest, cfg);
 		spin_unlock_irqrestore(&ioapic_lock, flags);
 	}
 
@@ -2199,14 +2221,14 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
 	desc->affinity = mask;
 }
 
-static int migrate_irq_remapped_level(int irq)
+static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
 {
 	int ret = -1;
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg = desc->chip_data;
 
-	mask_IO_APIC_irq(irq);
+	mask_IO_APIC_irq_desc(desc);
 
-	if (io_apic_level_ack_pending(irq)) {
+	if (io_apic_level_ack_pending(cfg)) {
 		/*
 		 * Interrupt in progress. Migrating irq now will change the
 		 * vector information in the IO-APIC RTE and that will confuse
@@ -2218,14 +2240,15 @@ static int migrate_irq_remapped_level(int irq)
 	}
 
 	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq(irq, desc->pending_mask);
+	migrate_ioapic_irq_desc(desc, desc->pending_mask);
 
 	ret = 0;
 	desc->status &= ~IRQ_MOVE_PENDING;
 	cpus_clear(desc->pending_mask);
 
 unmask:
-	unmask_IO_APIC_irq(irq);
+	unmask_IO_APIC_irq_desc(desc);
+
 	return ret;
 }
 
@@ -2258,18 +2281,22 @@ static void ir_irq_migration(struct work_struct *work)
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-
 	if (desc->status & IRQ_LEVEL) {
 		desc->status |= IRQ_MOVE_PENDING;
 		desc->pending_mask = mask;
-		migrate_irq_remapped_level(irq);
+		migrate_irq_remapped_level_desc(desc);
 		return;
 	}
 
-	migrate_ioapic_irq(irq, mask);
+	migrate_ioapic_irq_desc(desc, mask);
+}
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	set_ir_ioapic_affinity_irq_desc(desc, mask);
 }
 #endif
 
@@ -2313,9 +2340,10 @@ unlock:
 	irq_exit();
 }
 
-static void irq_complete_move(unsigned int irq)
+static void irq_complete_move(struct irq_desc **descp)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_desc *desc = *descp;
+	struct irq_cfg *cfg = desc->chip_data;
 	unsigned vector, me;
 
 	if (likely(!cfg->move_in_progress))
@@ -2333,8 +2361,9 @@ static void irq_complete_move(unsigned int irq)
 	}
 }
 #else
-static inline void irq_complete_move(unsigned int irq) {}
+static inline void irq_complete_move(struct irq_desc **descp) {}
 #endif
+
 #ifdef CONFIG_INTR_REMAP
 static void ack_x2apic_level(unsigned int irq)
 {
@@ -2345,11 +2374,14 @@ static void ack_x2apic_edge(unsigned int irq)
 {
 	ack_x2APIC_irq();
 }
+
 #endif
 
 static void ack_apic_edge(unsigned int irq)
 {
-	irq_complete_move(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	irq_complete_move(&desc);
 	move_native_irq(irq);
 	ack_APIC_irq();
 }
@@ -2358,18 +2390,21 @@ atomic_t irq_mis_count;
 
 static void ack_apic_level(unsigned int irq)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
+
 #ifdef CONFIG_X86_32
 	unsigned long v;
 	int i;
 #endif
+	struct irq_cfg *cfg;
 	int do_unmask_irq = 0;
 
-	irq_complete_move(irq);
+	irq_complete_move(&desc);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	/* If we are moving the irq we need to mask it */
-	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+	if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
 		do_unmask_irq = 1;
-		mask_IO_APIC_irq(irq);
+		mask_IO_APIC_irq_desc(desc);
 	}
 #endif
 
@@ -2393,7 +2428,8 @@ static void ack_apic_level(unsigned int irq)
 	* operation to prevent an edge-triggered interrupt escaping meanwhile.
 	* The idea is from Manfred Spraul.  --macro
 	*/
-	i = irq_cfg(irq)->vector;
+	cfg = desc->chip_data;
+	i = cfg->vector;
 
 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 #endif
@@ -2432,17 +2468,18 @@ static void ack_apic_level(unsigned int irq)
 		 * accurate and is causing problems then it is a hardware bug
 		 * and you can go talk to the chipset vendor about it.
 		 */
-		if (!io_apic_level_ack_pending(irq))
+		cfg = desc->chip_data;
+		if (!io_apic_level_ack_pending(cfg))
 			move_masked_irq(irq);
-		unmask_IO_APIC_irq(irq);
+		unmask_IO_APIC_irq_desc(desc);
 	}
 
 #ifdef CONFIG_X86_32
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
 		spin_lock(&ioapic_lock);
-		__mask_and_edge_IO_APIC_irq(irq);
-		__unmask_and_level_IO_APIC_irq(irq);
+		__mask_and_edge_IO_APIC_irq(cfg);
+		__unmask_and_level_IO_APIC_irq(cfg);
 		spin_unlock(&ioapic_lock);
 	}
 #endif
@@ -2533,7 +2570,7 @@ static void unmask_lapic_irq(unsigned int irq)
 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
-static void ack_lapic_irq (unsigned int irq)
+static void ack_lapic_irq(unsigned int irq)
 {
 	ack_APIC_irq();
 }
@@ -2545,11 +2582,8 @@ static struct irq_chip lapic_chip __read_mostly = {
 	.ack		= ack_lapic_irq,
 };
 
-static void lapic_register_intr(int irq)
+static void lapic_register_intr(int irq, struct irq_desc *desc)
 {
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
 	desc->status &= ~IRQ_LEVEL;
 	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
 				      "edge");
@@ -2653,7 +2687,9 @@ int timer_through_8259 __initdata;
  */
 static inline void __init check_timer(void)
 {
-	struct irq_cfg *cfg = irq_cfg(0);
+	struct irq_desc *desc = irq_to_desc(0);
+	struct irq_cfg *cfg = desc->chip_data;
+	int cpu = boot_cpu_id;
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
 	unsigned int ver;
@@ -2668,7 +2704,7 @@ static inline void __init check_timer(void)
 	 * get/set the timer IRQ vector:
 	 */
 	disable_8259A_irq(0);
-	assign_irq_vector(0, TARGET_CPUS);
+	assign_irq_vector(0, cfg, TARGET_CPUS);
 
 	/*
 	 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2719,10 +2755,10 @@ static inline void __init check_timer(void)
 		 * Ok, does IRQ0 through the IOAPIC work?
 		 */
 		if (no_pin1) {
-			add_pin_to_irq_cpu(0, boot_cpu_id, apic1, pin1);
+			add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		}
-		unmask_IO_APIC_irq(0);
+		unmask_IO_APIC_irq_desc(desc);
 		if (timer_irq_works()) {
 			if (nmi_watchdog == NMI_IO_APIC) {
 				setup_nmi();
@@ -2748,9 +2784,9 @@ static inline void __init check_timer(void)
 		/*
 		 * legacy devices should be connected to IO APIC #0
 		 */
-		replace_pin_at_irq(0, boot_cpu_id, apic1, pin1, apic2, pin2);
+		replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
 		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
-		unmask_IO_APIC_irq(0);
+		unmask_IO_APIC_irq_desc(desc);
 		enable_8259A_irq(0);
 		if (timer_irq_works()) {
 			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2782,7 +2818,7 @@ static inline void __init check_timer(void)
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
 
-	lapic_register_intr(0);
+	lapic_register_intr(0, desc);
 	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
 	enable_8259A_irq(0);
 
@@ -2986,7 +3022,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 		if (cfg_new->vector != 0)
 			continue;
-		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
+		if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
 			irq = new;
 		break;
 	}
@@ -3034,7 +3070,7 @@ void destroy_irq(unsigned int irq)
 	free_irte(irq);
 #endif
 	spin_lock_irqsave(&vector_lock, flags);
-	__clear_irq_vector(irq);
+	__clear_irq_vector(irq, cfg);
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
 
@@ -3049,12 +3085,12 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 	unsigned dest;
 	cpumask_t tmp;
 
+	cfg = irq_cfg(irq);
 	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
+	err = assign_irq_vector(irq, cfg, tmp);
 	if (err)
 		return err;
 
-	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, tmp);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -3112,35 +3148,35 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 #ifdef CONFIG_SMP
 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
-	read_msi_msg(irq, &msg);
+	read_msi_msg_desc(desc, &msg);
 
 	msg.data &= ~MSI_DATA_VECTOR_MASK;
 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
-	write_msi_msg(irq, &msg);
-	desc = irq_to_desc(irq);
+	write_msi_msg_desc(desc, &msg);
 	desc->affinity = mask;
 }
-
 #ifdef CONFIG_INTR_REMAP
 /*
  * Migrate the MSI irq to another cpumask. This migration is
@@ -3148,11 +3184,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
  */
 static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -3161,10 +3197,12 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	if (get_irte(irq, &irte))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -3188,9 +3226,9 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 		cfg->move_in_progress = 0;
 	}
 
-	desc = irq_to_desc(irq);
 	desc->affinity = mask;
 }
+
 #endif
 #endif /* CONFIG_SMP */
 
@@ -3249,7 +3287,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
 }
 #endif
 
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
 {
 	int ret;
 	struct msi_msg msg;
@@ -3258,7 +3296,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 	if (ret < 0)
 		return ret;
 
-	set_irq_msi(irq, desc);
+	set_irq_msi(irq, msidesc);
 	write_msi_msg(irq, &msg);
 
 #ifdef CONFIG_INTR_REMAP
@@ -3381,20 +3419,22 @@ void arch_teardown_msi_irq(unsigned int irq)
 #ifdef CONFIG_SMP
 static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -3406,9 +3446,9 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
-	desc = irq_to_desc(irq);
 	desc->affinity = mask;
 }
+
 #endif /* CONFIG_SMP */
 
 struct irq_chip dmar_msi_type = {
@@ -3442,8 +3482,8 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_SMP
 static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
@@ -3452,10 +3492,12 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	if (cpus_empty(tmp))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -3467,9 +3509,9 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	hpet_msi_write(irq, &msg);
-	desc = irq_to_desc(irq);
 	desc->affinity = mask;
 }
+
 #endif /* CONFIG_SMP */
 
 struct irq_chip hpet_msi_type = {
@@ -3524,26 +3566,28 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
-	desc = irq_to_desc(irq);
 	desc->affinity = mask;
 }
+
 #endif
 
 static struct irq_chip ht_irq_chip = {
@@ -3563,13 +3607,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 	int err;
 	cpumask_t tmp;
 
+	cfg = irq_cfg(irq);
 	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
+	err = assign_irq_vector(irq, cfg, tmp);
 	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
-		cfg = irq_cfg(irq);
 		cpus_and(tmp, cfg->domain, tmp);
 		dest = cpu_mask_to_apicid(tmp);
 
@@ -3615,7 +3659,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	unsigned long flags;
 	int err;
 
-	err = assign_irq_vector(irq, *eligible_cpu);
+	cfg = irq_cfg(irq);
+
+	err = assign_irq_vector(irq, cfg, *eligible_cpu);
 	if (err != 0)
 		return err;
 
@@ -3624,8 +3670,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 				      irq_name);
 	spin_unlock_irqrestore(&vector_lock, flags);
 
-	cfg = irq_cfg(irq);
-
 	mmr_value = 0;
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
 	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
@@ -3806,10 +3850,10 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
 	 */
 	if (irq >= NR_IRQS_LEGACY) {
 		cfg = desc->chip_data;
-		add_pin_to_irq_cpu(irq, cpu, ioapic, pin);
+		add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
 	}
 
-	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
+	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
 
 	return 0;
 }
@@ -3866,7 +3910,7 @@ void __init setup_ioapic_dest(void)
 			desc = irq_to_desc(irq);
 			cfg = desc->chip_data;
 			if (!cfg->vector) {
-				setup_IO_APIC_irq(ioapic, pin, irq,
+				setup_IO_APIC_irq(ioapic, pin, irq, desc,
 						  irq_trigger(irq_entry),
 						  irq_polarity(irq_entry));
 				continue;
@@ -3884,10 +3928,10 @@ void __init setup_ioapic_dest(void)
 
 #ifdef CONFIG_INTR_REMAP
 			if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq(irq, mask);
+				set_ir_ioapic_affinity_irq_desc(desc, mask);
 			else
 #endif
-				set_ioapic_affinity_irq(irq, mask);
+				set_ioapic_affinity_irq_desc(desc, mask);
 		}
 
 	}
-- 
cgit v1.2.3


From 50dd94e017ec39f85c26b6c10ed9fb2d7a7d8042 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 8 Dec 2008 18:47:51 +0100
Subject: sparseirq: fix typo in !CONFIG_IO_APIC case

Impact: build fix

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/io_apic.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index b35e94c2d6d..25d527ca136 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -193,12 +193,9 @@ extern void probe_nr_irqs_gsi(void);
 #else  /* !CONFIG_X86_IO_APIC */
 #define io_apic_assign_pci_irqs 0
 static const int timer_through_8259 = 0;
-static inline void ioapic_init_mappings(void) { }
+static inline void ioapic_init_mappings(void)	{ }
 
-static inline int probe_nr_irqs(void)
-{
-	return NR_IRQS;
-}
+static inline void probe_nr_irqs_gsi(void)	{ }
 #endif
 
 #endif /* _ASM_X86_IO_APIC_H */
-- 
cgit v1.2.3


From 8a4830f8891be6b4e04809693a24771a4694e0b0 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 9 Dec 2008 11:56:20 -0800
Subject: sparseirq: fix !SMP && !PCI_MSI && !HT_IRQ build

Ingo Molnar wrote:

>>>  drivers/pci/intr_remapping.c: In function 'irq_2_iommu_alloc':
>>>  drivers/pci/intr_remapping.c:72: error: 'boot_cpu_id' undeclared (first use in this function)
>>>  drivers/pci/intr_remapping.c:72: error: (Each undeclared identifier is reported only once
>>>  drivers/pci/intr_remapping.c:72: error: for each function it appears in.)

sparseirq should only be used with SMP for now.
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 48ac688de3c..8943c13502c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -240,7 +240,7 @@ config X86_HAS_BOOT_CPU_ID
 
 config SPARSE_IRQ
 	bool "Support sparse irq numbering"
-	depends on PCI_MSI || HT_IRQ
+	depends on (PCI_MSI || HT_IRQ) && SMP
 	default y
 	help
 	  This enables support for sparse irq, esp for msi/msi-x. You may need
-- 
cgit v1.2.3


From fd13f6c85144bb2026c534a35be1d7cb7628a64a Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Sun, 19 Oct 2008 21:00:09 +0200
Subject: oprofile: comment cleanup

This fixes the coding style of some comments.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 509513760a6..fb67e1999d8 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -65,8 +65,10 @@ static unsigned long reset_value[NUM_COUNTERS];
 #define IBS_FETCH_BEGIN 3
 #define IBS_OP_BEGIN    4
 
-/* The function interface needs to be fixed, something like add
-   data. Should then be added to linux/oprofile.h. */
+/*
+ * The function interface needs to be fixed, something like add
+ * data. Should then be added to linux/oprofile.h.
+ */
 extern void
 oprofile_add_ibs_sample(struct pt_regs *const regs,
 			unsigned int *const ibs_sample, int ibs_code);
@@ -106,7 +108,7 @@ struct ibs_op_sample {
 
 /*
  * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+
-*/
+ */
 static void clear_ibs_nmi(void);
 
 static int ibs_allowed;	/* AMD Family10h and later */
@@ -223,7 +225,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 						(unsigned int *)&ibs_fetch,
 						IBS_FETCH_BEGIN);
 
-			/*reenable the IRQ */
+			/* reenable the IRQ */
 			rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
 			high &= ~IBS_FETCH_HIGH_VALID_BIT;
 			high |= IBS_FETCH_HIGH_ENABLE;
@@ -331,8 +333,10 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 	unsigned int low, high;
 	int i;
 
-	/* Subtle: stop on all counters to avoid race with
-	 * setting our pm callback */
+	/*
+	 * Subtle: stop on all counters to avoid race with setting our
+	 * pm callback
+	 */
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
 		if (!reset_value[i])
 			continue;
@@ -343,13 +347,15 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 
 #ifdef CONFIG_OPROFILE_IBS
 	if (ibs_allowed && ibs_config.fetch_enabled) {
-		low = 0;		/* clear max count and enable */
+		/* clear max count and enable */
+		low = 0;
 		high = 0;
 		wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
 	}
 
 	if (ibs_allowed && ibs_config.op_enabled) {
-		low = 0;		/* clear max count and enable */
+		/* clear max count and enable */
+		low = 0;
 		high = 0;
 		wrmsr(MSR_AMD64_IBSOPCTL, low, high);
 	}
@@ -443,10 +449,7 @@ static int pfm_amd64_setup_eilvt(void)
 	return 0;
 }
 
-/*
- * initialize the APIC for the IBS interrupts
- * if available (AMD Family10h rev B0 and later)
- */
+/* initialize the APIC for the IBS interrupts if available */
 static void setup_ibs(void)
 {
 	ibs_allowed = boot_cpu_has(X86_FEATURE_IBS);
@@ -463,9 +466,7 @@ static void setup_ibs(void)
 }
 
 
-/*
- * unitialize the APIC for the IBS interrupts if needed on AMD Family10h
- * rev B0 and later */
+/* uninitialize the APIC for the IBS interrupts if needed */
 static void clear_ibs_nmi(void)
 {
 	if (ibs_allowed)
-- 
cgit v1.2.3


From cdc1834d1aa2e5b574a25e66f82625b44cdd0d8f Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 26 Sep 2008 22:18:44 -0400
Subject: oprofile: whitspace changes only

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index fb67e1999d8..f71bd218b48 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -70,8 +70,8 @@ static unsigned long reset_value[NUM_COUNTERS];
  * data. Should then be added to linux/oprofile.h.
  */
 extern void
-oprofile_add_ibs_sample(struct pt_regs *const regs,
-			unsigned int *const ibs_sample, int ibs_code);
+oprofile_add_ibs_sample(struct pt_regs * const regs,
+			unsigned int * const ibs_sample, int ibs_code);
 
 struct ibs_fetch_sample {
 	/* MSRC001_1031 IBS Fetch Linear Address Register */
-- 
cgit v1.2.3


From 9fa6812dbab9207f7af52c3d0417f1f9eb89c386 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 24 Nov 2008 14:21:03 +0100
Subject: x86/oprofile: reordering IBS code in op_model_amd.c

This is part of the cpu buffer rework.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index f71bd218b48..8ff657b3ff8 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -376,18 +376,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
 	}
 }
 
-#ifndef CONFIG_OPROFILE_IBS
-
-/* no IBS support */
-
-static int op_amd_init(struct oprofile_operations *ops)
-{
-	return 0;
-}
-
-static void op_amd_exit(void) {}
-
-#else
+#ifdef CONFIG_OPROFILE_IBS
 
 static u8 ibs_eilvt_off;
 
@@ -531,7 +520,18 @@ static void op_amd_exit(void)
 	clear_ibs_nmi();
 }
 
-#endif
+#else
+
+/* no IBS support */
+
+static int op_amd_init(struct oprofile_operations *ops)
+{
+	return 0;
+}
+
+static void op_amd_exit(void) {}
+
+#endif /* CONFIG_OPROFILE_IBS */
 
 struct op_x86_model_spec const op_amd_spec = {
 	.init			= op_amd_init,
-- 
cgit v1.2.3


From fe615cbf34fc6a1c53c359417da4696328a488ed Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 24 Nov 2008 14:58:03 +0100
Subject: x86/oprofile: cleanup IBS init/exit functions in op_model_amd.c

Implementation of pairwise init/exit funcions for IBS and IBS NMI
setup. There are also some function renames and the removal of forward
function declarations.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 8ff657b3ff8..98658f25f54 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -106,11 +106,6 @@ struct ibs_op_sample {
 	unsigned int ibs_dc_phys_high;
 };
 
-/*
- * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+
- */
-static void clear_ibs_nmi(void);
-
 static int ibs_allowed;	/* AMD Family10h and later */
 
 struct op_ibs_config {
@@ -390,7 +385,7 @@ static inline void apic_clear_ibs_nmi_per_cpu(void *arg)
 	setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1);
 }
 
-static int pfm_amd64_setup_eilvt(void)
+static int init_ibs_nmi(void)
 {
 #define IBSCTL_LVTOFFSETVAL		(1 << 8)
 #define IBSCTL				0x1cc
@@ -438,15 +433,22 @@ static int pfm_amd64_setup_eilvt(void)
 	return 0;
 }
 
+/* uninitialize the APIC for the IBS interrupts if needed */
+static void clear_ibs_nmi(void)
+{
+	if (ibs_allowed)
+		on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
+}
+
 /* initialize the APIC for the IBS interrupts if available */
-static void setup_ibs(void)
+static void ibs_init(void)
 {
 	ibs_allowed = boot_cpu_has(X86_FEATURE_IBS);
 
 	if (!ibs_allowed)
 		return;
 
-	if (pfm_amd64_setup_eilvt()) {
+	if (init_ibs_nmi()) {
 		ibs_allowed = 0;
 		return;
 	}
@@ -454,12 +456,12 @@ static void setup_ibs(void)
 	printk(KERN_INFO "oprofile: AMD IBS detected\n");
 }
 
-
-/* uninitialize the APIC for the IBS interrupts if needed */
-static void clear_ibs_nmi(void)
+static void ibs_exit(void)
 {
-	if (ibs_allowed)
-		on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
+	if (!ibs_allowed)
+		return;
+
+	clear_ibs_nmi();
 }
 
 static int (*create_arch_files)(struct super_block *sb, struct dentry *root);
@@ -509,7 +511,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
 
 static int op_amd_init(struct oprofile_operations *ops)
 {
-	setup_ibs();
+	ibs_init();
 	create_arch_files = ops->create_files;
 	ops->create_files = setup_ibs_files;
 	return 0;
@@ -517,7 +519,7 @@ static int op_amd_init(struct oprofile_operations *ops)
 
 static void op_amd_exit(void)
 {
-	clear_ibs_nmi();
+	ibs_exit();
 }
 
 #else
-- 
cgit v1.2.3


From 98a79d6a50181ca1ecf7400eda01d5dc1bc0dbf0 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:19:41 +1030
Subject: cpumask: centralize cpu_online_map and cpu_possible_map

Impact: cleanup

Each SMP arch defines these themselves.  Move them to a central
location.

Twists:
1) Some archs (m32, parisc, s390) set possible_map to all 1, so we add a
   CONFIG_INIT_ALL_POSSIBLE for this rather than break them.

2) mips and sparc32 '#define cpu_possible_map phys_cpu_present_map'.
   Those archs simply have phys_cpu_present_map replaced everywhere.

3) Alpha defined cpu_possible_map to cpu_present_map; this is tricky
   so I just manipulate them both in sync.

4) IA64, cris and m32r have gratuitous 'extern cpumask_t cpu_possible_map'
   declarations.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Grant Grundler <grundler@parisc-linux.org>
Tested-by: Tony Luck <tony.luck@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Mike Travis <travis@sgi.com>
Cc: ink@jurassic.park.msu.ru
Cc: rmk@arm.linux.org.uk
Cc: starvik@axis.com
Cc: tony.luck@intel.com
Cc: takata@linux-m32r.org
Cc: ralf@linux-mips.org
Cc: grundler@parisc-linux.org
Cc: paulus@samba.org
Cc: schwidefsky@de.ibm.com
Cc: lethal@linux-sh.org
Cc: wli@holomorphy.com
Cc: davem@davemloft.net
Cc: jdike@addtoit.com
Cc: mingo@redhat.com
---
 arch/x86/kernel/smpboot.c           | 6 ------
 arch/x86/mach-voyager/voyager_smp.c | 7 -------
 2 files changed, 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7b109339731..468c2f9d47a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -101,14 +101,8 @@ EXPORT_SYMBOL(smp_num_siblings);
 /* Last level cache ID of each logical CPU */
 DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
 
-/* bitmap of online cpus */
-cpumask_t cpu_online_map __read_mostly;
-EXPORT_SYMBOL(cpu_online_map);
-
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 52145007bd7..9c990185e9f 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -63,11 +63,6 @@ static int voyager_extended_cpus = 1;
 /* Used for the invalidate map that's also checked in the spinlock */
 static volatile unsigned long smp_invalidate_needed;
 
-/* Bitmask of currently online CPUs - used by setup.c for
-   /proc/cpuinfo, visible externally but still physical */
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
-
 /* Bitmask of CPUs present in the system - exported by i386_syms.c, used
  * by scheduler but indexed physically */
 cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
@@ -218,8 +213,6 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 /* This is for the new dynamic CPU boot code */
 cpumask_t cpu_callin_map = CPU_MASK_NONE;
 cpumask_t cpu_callout_map = CPU_MASK_NONE;
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_possible_map);
 
 /* The per processor IRQ masks (these are usually kept in sync) */
 static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
-- 
cgit v1.2.3


From 29c0177e6a4ac094302bed54a1d4bbb6b740a9ef Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:25 +1030
Subject: cpumask: change cpumask_scnprintf, cpumask_parse_user, cpulist_parse,
 and cpulist_scnprintf to take pointers.

Impact: change calling convention of existing cpumask APIs

Most cpumask functions started with cpus_: these have been replaced by
cpumask_ ones which take struct cpumask pointers as expected.

These four functions don't have good replacement names; fortunately
they're rarely used, so we just change them over.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: mingo@redhat.com
Cc: tony.luck@intel.com
Cc: ralf@linux-mips.org
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: cl@linux-foundation.org
Cc: srostedt@redhat.com
---
 arch/x86/kernel/cpu/intel_cacheinfo.c | 4 ++--
 arch/x86/kernel/setup_percpu.c        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 3f46afbb1cf..43ea612d3e9 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -626,8 +626,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
 		cpumask_t *mask = &this_leaf->shared_cpu_map;
 
 		n = type?
-			cpulist_scnprintf(buf, len-2, *mask):
-			cpumask_scnprintf(buf, len-2, *mask);
+			cpulist_scnprintf(buf, len-2, mask) :
+			cpumask_scnprintf(buf, len-2, mask);
 		buf[n++] = '\n';
 		buf[n] = '\0';
 	}
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ae0c0d3bb77..1c2084291f9 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -282,7 +282,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
 	else
 		cpu_clear(cpu, *mask);
 
-	cpulist_scnprintf(buf, sizeof(buf), *mask);
+	cpulist_scnprintf(buf, sizeof(buf), mask);
 	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
 		enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
  }
-- 
cgit v1.2.3


From 0de26520c7cabf36e1de090ea8092f011a6106ce Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:26 +1030
Subject: cpumask: make irq_set_affinity() take a const struct cpumask

Impact: change existing irq_chip API

Not much point with gentle transition here: the struct irq_chip's
setaffinity method signature needs to change.

Fortunately, not widely used code, but hits a few architectures.

Note: In irq_select_affinity() I save a temporary in by mangling
irq_desc[irq].affinity directly.  Ingo, does this break anything?

(Folded in fix from KOSAKI Motohiro)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Grant Grundler <grundler@parisc-linux.org>
Acked-by: Ingo Molnar <mingo@redhat.com>
Cc: ralf@linux-mips.org
Cc: grundler@parisc-linux.org
Cc: jeremy@xensource.com
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---
 arch/x86/kernel/hpet.c    |  4 +--
 arch/x86/kernel/io_apic.c | 81 +++++++++++++++++++++++------------------------
 arch/x86/kernel/irq_32.c  |  2 +-
 arch/x86/kernel/irq_64.c  |  2 +-
 4 files changed, 43 insertions(+), 46 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 067d8de913f..940f25851e1 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -301,7 +301,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
 			struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
 			hpet_setup_msi_irq(hdev->irq);
 			disable_irq(hdev->irq);
-			irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+			irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
 			enable_irq(hdev->irq);
 		}
 		break;
@@ -449,7 +449,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
 		return -1;
 
 	disable_irq(dev->irq);
-	irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+	irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
 	enable_irq(dev->irq);
 
 	printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9043251210f..1184210e6d0 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -361,7 +361,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static int assign_irq_vector(int irq, cpumask_t mask);
 
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ioapic_affinity_irq(unsigned int irq,
+				    const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	unsigned long flags;
@@ -369,15 +370,14 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 	/*
 	 * Only the high 8 bits are valid.
@@ -387,7 +387,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&ioapic_lock, flags);
 	__target_IO_APIC_irq(irq, dest, cfg->vector);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 #endif /* CONFIG_SMP */
@@ -2189,7 +2189,7 @@ static void ir_irq_migration(struct work_struct *work)
 				continue;
 			}
 
-			desc->chip->set_affinity(irq, desc->pending_mask);
+			desc->chip->set_affinity(irq, &desc->pending_mask);
 			spin_unlock_irqrestore(&desc->lock, flags);
 		}
 	}
@@ -2198,18 +2198,19 @@ static void ir_irq_migration(struct work_struct *work)
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq(unsigned int irq,
+				       const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (desc->status & IRQ_LEVEL) {
 		desc->status |= IRQ_MOVE_PENDING;
-		desc->pending_mask = mask;
+		cpumask_copy(&desc->pending_mask, mask);
 		migrate_irq_remapped_level(irq);
 		return;
 	}
 
-	migrate_ioapic_irq(irq, mask);
+	migrate_ioapic_irq(irq, *mask);
 }
 #endif
 
@@ -3027,7 +3028,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 }
 
 #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
@@ -3035,15 +3036,14 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	read_msi_msg(irq, &msg);
@@ -3055,7 +3055,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 
 	write_msi_msg(irq, &msg);
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 
 #ifdef CONFIG_INTR_REMAP
@@ -3063,7 +3063,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void ir_set_msi_irq_affinity(unsigned int irq,
+				    const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	unsigned int dest;
@@ -3071,18 +3072,17 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	struct irte irte;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
 	if (get_irte(irq, &irte))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	irte.vector = cfg->vector;
@@ -3106,7 +3106,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	}
 
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 #endif
 #endif /* CONFIG_SMP */
@@ -3308,7 +3308,7 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
@@ -3316,15 +3316,14 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	dmar_msi_read(irq, &msg);
@@ -3336,7 +3335,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 
 	dmar_msi_write(irq, &msg);
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 #endif /* CONFIG_SMP */
 
@@ -3369,7 +3368,7 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct irq_desc *desc;
@@ -3377,15 +3376,14 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	unsigned int dest;
 	cpumask_t tmp;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	hpet_msi_read(irq, &msg);
@@ -3397,7 +3395,7 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 
 	hpet_msi_write(irq, &msg);
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 #endif /* CONFIG_SMP */
 
@@ -3451,27 +3449,26 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 #endif
 
@@ -3794,10 +3791,10 @@ void __init setup_ioapic_dest(void)
 
 #ifdef CONFIG_INTR_REMAP
 			if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq(irq, mask);
+				set_ir_ioapic_affinity_irq(irq, &mask);
 			else
 #endif
-				set_ioapic_affinity_irq(irq, mask);
+				set_ioapic_affinity_irq(irq, &mask);
 		}
 
 	}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a51382672de..87870a49be4 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -251,7 +251,7 @@ void fixup_irqs(cpumask_t map)
 			mask = map;
 		}
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, mask);
+			desc->chip->set_affinity(irq, &mask);
 		else if (desc->action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a..7d37f847544 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -116,7 +116,7 @@ void fixup_irqs(cpumask_t map)
 			desc->chip->mask(irq);
 
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, mask);
+			desc->chip->set_affinity(irq, &mask);
 		else if (!(warned++))
 			set_affinity = 0;
 
-- 
cgit v1.2.3


From 320ab2b0b1e08e3805a3e1084a2f0eb1938d5d67 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:26 +1030
Subject: cpumask: convert struct clock_event_device to cpumask pointers.

Impact: change calling convention of existing clock_event APIs

struct clock_event_timer's cpumask field gets changed to take pointer,
as does the ->broadcast function.

Another single-patch change.  For safety, we BUG_ON() in
clockevents_register_device() if it's not set.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c        | 8 ++++----
 arch/x86/kernel/hpet.c        | 4 ++--
 arch/x86/kernel/i8253.c       | 2 +-
 arch/x86/kernel/mfgpt_32.c    | 2 +-
 arch/x86/kernel/vmiclock_32.c | 2 +-
 arch/x86/lguest/boot.c        | 2 +-
 arch/x86/xen/time.c           | 2 +-
 7 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 16f94879b52..b2cef49f308 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -141,7 +141,7 @@ static int lapic_next_event(unsigned long delta,
 			    struct clock_event_device *evt);
 static void lapic_timer_setup(enum clock_event_mode mode,
 			      struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
+static void lapic_timer_broadcast(const struct cpumask *mask);
 static void apic_pm_activate(void);
 
 /*
@@ -453,10 +453,10 @@ static void lapic_timer_setup(enum clock_event_mode mode,
 /*
  * Local APIC timer broadcast function
  */
-static void lapic_timer_broadcast(cpumask_t mask)
+static void lapic_timer_broadcast(const struct cpumask *mask)
 {
 #ifdef CONFIG_SMP
-	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+	send_IPI_mask(*mask, LOCAL_TIMER_VECTOR);
 #endif
 }
 
@@ -469,7 +469,7 @@ static void __cpuinit setup_APIC_timer(void)
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
 	memcpy(levt, &lapic_clockevent, sizeof(*levt));
-	levt->cpumask = cpumask_of_cpu(smp_processor_id());
+	levt->cpumask = cpumask_of(smp_processor_id());
 
 	clockevents_register_device(levt);
 }
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 940f25851e1..e76d7e27297 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -246,7 +246,7 @@ static void hpet_legacy_clockevent_register(void)
 	 * Start hpet with the boot cpu mask and make it
 	 * global after the IO_APIC has been initialized.
 	 */
-	hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+	hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
 	clockevents_register_device(&hpet_clockevent);
 	global_clock_event = &hpet_clockevent;
 	printk(KERN_DEBUG "hpet clockevent registered\n");
@@ -500,7 +500,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 	/* 5 usec minimum reprogramming delta. */
 	evt->min_delta_ns = 5000;
 
-	evt->cpumask = cpumask_of_cpu(hdev->cpu);
+	evt->cpumask = cpumask_of(hdev->cpu);
 	clockevents_register_device(evt);
 }
 
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c1b5e3ece1f..10f92fb532f 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void)
 	 * Start pit with the boot cpu mask and make it global after the
 	 * IO_APIC has been initialized.
 	 */
-	pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+	pit_clockevent.cpumask = cpumask_of(smp_processor_id());
 	pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
 				     pit_clockevent.shift);
 	pit_clockevent.max_delta_ns =
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3b599518c32..c12314c9e86 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = {
 	.set_mode = mfgpt_set_mode,
 	.set_next_event = mfgpt_next_event,
 	.rating = 250,
-	.cpumask = CPU_MASK_ALL,
+	.cpumask = cpu_all_mask,
 	.shift = 32
 };
 
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 254ee07f863..c4c1f9e0940 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
 	/* Upper bound is clockevent's use of ulong for cycle deltas. */
 	evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
 	evt->min_delta_ns = clockevent_delta2ns(1, evt);
-	evt->cpumask = cpumask_of_cpu(cpu);
+	evt->cpumask = cpumask_of(cpu);
 
 	printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
 	       evt->name, evt->mult, evt->shift);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a5d8e1ace1c..104c8220a38 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -737,7 +737,7 @@ static void lguest_time_init(void)
 
 	/* We can't set cpumask in the initializer: damn C limitations!  Set it
 	 * here and register our timer device. */
-	lguest_clockevent.cpumask = cpumask_of_cpu(0);
+	lguest_clockevent.cpumask = cpumask_of(0);
 	clockevents_register_device(&lguest_clockevent);
 
 	/* Finally, we unblock the timer interrupt. */
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c9f7cda48ed..65d75a6be0b 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -437,7 +437,7 @@ void xen_setup_timer(int cpu)
 	evt = &per_cpu(xen_clock_events, cpu);
 	memcpy(evt, xen_clockevent, sizeof(*evt));
 
-	evt->cpumask = cpumask_of_cpu(cpu);
+	evt->cpumask = cpumask_of(cpu);
 	evt->irq = irq;
 
 	setup_runstate_info(cpu);
-- 
cgit v1.2.3


From bacbe9994541c70aa3abd1a013ac738e58d4bfb2 Mon Sep 17 00:00:00 2001
From: Janne Kulmala <janne.t.kulmala@tut.fi>
Date: Tue, 16 Dec 2008 13:39:57 +0200
Subject: x86: enable HPET on Fujitsu u9200

Impact: auto-enable HPET on Fujitsu u9200

HPET timer is listed in the ACPI table, but needs a quirk entry in order to
work. Unfortunately, the quirk code runs after first HPET hpet_enable() which
has already determined that the timer doesn't work (reads 0xFFFFFFFF). This
patch allows hpet_enable() to be called again after running the quirk code.

Signed-off-by: Janne Kulmala <janne.t.kulmala@tut.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c   | 2 +-
 arch/x86/kernel/quirks.c | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 067d8de913f..84089dc8fd1 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -811,7 +811,7 @@ int __init hpet_enable(void)
 
 out_nohpet:
 	hpet_clear_mapping();
-	boot_hpet_disable = 1;
+	hpet_address = 0;
 	return 0;
 }
 
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 67465ed8931..309949e9e1c 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -168,6 +168,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31,
 			 ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1,
 			 ich_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4,
+			 ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
 			 ich_force_enable_hpet);
 
-- 
cgit v1.2.3


From a79b7a2a758c39315344f0d86b5adb21d90d786e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 16 Dec 2008 12:17:25 -0800
Subject: x86: remove unused iommu_nr_pages

Impact: cleanup, remove dead code

The last usage was removed by the patch set culminating in

| commit e3c449f526cebb8d287241c7e82faafd9709668b
| Author: Joerg Roedel <joerg.roedel@amd.com>
| Date:   Wed Oct 15 22:02:11 2008 -0700
|
|     x86, AMD IOMMU: convert driver to generic iommu_num_pages function

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/iommu.h | 2 --
 arch/x86/kernel/pci-dma.c    | 7 -------
 2 files changed, 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 0b500c5b644..35276ec5925 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -7,8 +7,6 @@ extern struct dma_mapping_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 
-extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
-
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
 
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 19262482021..e150ad4f0cc 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -125,13 +125,6 @@ void __init pci_iommu_alloc(void)
 	pci_swiotlb_init();
 }
 
-unsigned long iommu_nr_pages(unsigned long addr, unsigned long len)
-{
-	unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
-
-	return size >> PAGE_SHIFT;
-}
-EXPORT_SYMBOL(iommu_nr_pages);
 #endif
 
 void *dma_generic_alloc_coherent(struct device *dev, size_t size,
-- 
cgit v1.2.3


From 39c04b55240342d0742ac48538d3d8c71bfc0a94 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 16 Dec 2008 12:32:23 -0800
Subject: x86: make sure we really have an hpet mapping before using it

Impact: prepare the hpet code for Xen dom0 booting

When booting in Xen dom0, the hpet isn't really accessible, so make
sure the mapping is non-NULL before use.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 84089dc8fd1..a1f6ed5e1a0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -834,10 +834,11 @@ static __init int hpet_late_init(void)
 
 		hpet_address = force_hpet_address;
 		hpet_enable();
-		if (!hpet_virt_address)
-			return -ENODEV;
 	}
 
+	if (!hpet_virt_address)
+		return -ENODEV;
+
 	hpet_reserve_platform_timers(hpet_readl(HPET_ID));
 
 	for_each_online_cpu(cpu) {
-- 
cgit v1.2.3


From c8cae544bba6aee0f5cb0756dbab1a71d2c68737 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 09:13:11 -0800
Subject: x86: fix build error with post-merge of tip/cpus4096 and
 rr-for-ingo/master.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ingo Molnar wrote:

> allyes64 build failure:
>
> arch/x86/kernel/io_apic.c: In function â€˜set_ir_ioapic_affinity_irq_descâ€™:
> arch/x86/kernel/io_apic.c:2295: error: incompatible type for argument 2 of
> â€˜migrate_ioapic_irq_descâ€™
> arch/x86/kernel/io_apic.c: In function â€˜ir_set_msi_irq_affinityâ€™:
> arch/x86/kernel/io_apic.c:3205: error: incompatible type for argument 2 of
> â€˜set_extra_move_descâ€™
> make[1]: *** wait: No child processes.  Stop.

Here's a small patch to correct the build error with the post-merge tree.
Built and boot-tested.  I'll will reset the follow on patches in my brand
new git tree to accommodate this change.

Fix two references in io_apic.c that were incorrect.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index d7f0993b805..3d7d0d55253 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2292,7 +2292,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 		return;
 	}
 
-	migrate_ioapic_irq_desc(desc, mask);
+	migrate_ioapic_irq_desc(desc, *mask);
 }
 static void set_ir_ioapic_affinity_irq(unsigned int irq,
 				       const struct cpumask *mask)
@@ -3203,7 +3203,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq,
 	if (assign_irq_vector(irq, cfg, *mask))
 		return;
 
-	set_extra_move_desc(desc, mask);
+	set_extra_move_desc(desc, *mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
-- 
cgit v1.2.3


From 48a1b10aff588833b73994704c47bbd0deb73e9c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 11 Dec 2008 00:15:01 -0800
Subject: x86, sparseirq: move irq_desc according to smp_affinity, v7

Impact: improve NUMA handling by migrating irq_desc on smp_affinity changes

if CONFIG_NUMA_MIGRATE_IRQ_DESC is set:

-  make irq_desc to go with affinity aka irq_desc moving etc
-  call move_irq_desc in irq_complete_move()
-  legacy irq_desc is not moved, because they are allocated via static array

for logical apic mode, need to add move_desc_in_progress_in_same_domain,
otherwise it will not be moved ==> also could need two phases to get
irq_desc moved.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig          |   9 +++
 arch/x86/kernel/io_apic.c | 142 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 150 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8943c13502c..29073532f94 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -248,6 +248,15 @@ config SPARSE_IRQ
 
 	  If you don't know what to do here, say Y.
 
+config NUMA_MIGRATE_IRQ_DESC
+	bool "Move irq desc when changing irq smp_affinity"
+	depends on SPARSE_IRQ && SMP
+	default n
+	help
+	  This enables moving irq_desc to cpu/node that irq will use handled.
+
+	  If you don't know what to do here, say N.
+
 config X86_FIND_SMP_CONFIG
 	def_bool y
 	depends on X86_MPPARSE || X86_VOYAGER
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index a1a2e070f31..bfe1245b1a3 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -141,6 +141,9 @@ struct irq_cfg {
 	unsigned move_cleanup_count;
 	u8 vector;
 	u8 move_in_progress : 1;
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+	u8 move_desc_pending : 1;
+#endif
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -223,6 +226,121 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu)
 	}
 }
 
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+
+static void
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+{
+	struct irq_pin_list *old_entry, *head, *tail, *entry;
+
+	cfg->irq_2_pin = NULL;
+	old_entry = old_cfg->irq_2_pin;
+	if (!old_entry)
+		return;
+
+	entry = get_one_free_irq_2_pin(cpu);
+	if (!entry)
+		return;
+
+	entry->apic	= old_entry->apic;
+	entry->pin	= old_entry->pin;
+	head		= entry;
+	tail		= entry;
+	old_entry	= old_entry->next;
+	while (old_entry) {
+		entry = get_one_free_irq_2_pin(cpu);
+		if (!entry) {
+			entry = head;
+			while (entry) {
+				head = entry->next;
+				kfree(entry);
+				entry = head;
+			}
+			/* still use the old one */
+			return;
+		}
+		entry->apic	= old_entry->apic;
+		entry->pin	= old_entry->pin;
+		tail->next	= entry;
+		tail		= entry;
+		old_entry	= old_entry->next;
+	}
+
+	tail->next = NULL;
+	cfg->irq_2_pin = head;
+}
+
+static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
+{
+	struct irq_pin_list *entry, *next;
+
+	if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+		return;
+
+	entry = old_cfg->irq_2_pin;
+
+	while (entry) {
+		next = entry->next;
+		kfree(entry);
+		entry = next;
+	}
+	old_cfg->irq_2_pin = NULL;
+}
+
+void arch_init_copy_chip_data(struct irq_desc *old_desc,
+				 struct irq_desc *desc, int cpu)
+{
+	struct irq_cfg *cfg;
+	struct irq_cfg *old_cfg;
+
+	cfg = get_one_free_irq_cfg(cpu);
+
+	if (!cfg)
+		return;
+
+	desc->chip_data = cfg;
+
+	old_cfg = old_desc->chip_data;
+
+	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+
+	init_copy_irq_2_pin(old_cfg, cfg, cpu);
+}
+
+static void free_irq_cfg(struct irq_cfg *old_cfg)
+{
+	kfree(old_cfg);
+}
+
+void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+	struct irq_cfg *old_cfg, *cfg;
+
+	old_cfg = old_desc->chip_data;
+	cfg = desc->chip_data;
+
+	if (old_cfg == cfg)
+		return;
+
+	if (old_cfg) {
+		free_irq_2_pin(old_cfg, cfg);
+		free_irq_cfg(old_cfg);
+		old_desc->chip_data = NULL;
+	}
+}
+
+static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+{
+	struct irq_cfg *cfg = desc->chip_data;
+
+	if (!cfg->move_in_progress) {
+		/* it means that domain is not changed */
+		if (!cpus_intersects(desc->affinity, mask))
+			cfg->move_desc_pending = 1;
+	}
+}
+#endif
+
 #else
 static struct irq_cfg *irq_cfg(unsigned int irq)
 {
@@ -231,9 +349,11 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 
 #endif
 
+#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
 static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
 {
 }
+#endif
 
 struct io_apic {
 	unsigned int index;
@@ -2346,14 +2466,34 @@ static void irq_complete_move(struct irq_desc **descp)
 	struct irq_cfg *cfg = desc->chip_data;
 	unsigned vector, me;
 
-	if (likely(!cfg->move_in_progress))
+	if (likely(!cfg->move_in_progress)) {
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+		if (likely(!cfg->move_desc_pending))
+			return;
+
+		/* domain is not change, but affinity is changed */
+		me = smp_processor_id();
+		if (cpu_isset(me, desc->affinity)) {
+			*descp = desc = move_irq_desc(desc, me);
+			/* get the new one */
+			cfg = desc->chip_data;
+			cfg->move_desc_pending = 0;
+		}
+#endif
 		return;
+	}
 
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
 	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
 		cpumask_t cleanup_mask;
 
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+		*descp = desc = move_irq_desc(desc, me);
+		/* get the new one */
+		cfg = desc->chip_data;
+#endif
+
 		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
 		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
 		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-- 
cgit v1.2.3


From 17483a1f34c970e6c2cb8c082d4441bfabbe88a9 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 12 Dec 2008 13:14:18 -0800
Subject: sparseirq: fix !SMP building, #2

Impact: build fix

make intr_remapping.c to include smp.h, so could use boot_cpu_id there

also remove old change that disabling sparseirq with !SMP

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 29073532f94..60a008857a3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -240,7 +240,7 @@ config X86_HAS_BOOT_CPU_ID
 
 config SPARSE_IRQ
 	bool "Support sparse irq numbering"
-	depends on (PCI_MSI || HT_IRQ) && SMP
+	depends on PCI_MSI || HT_IRQ
 	default y
 	help
 	  This enables support for sparse irq, esp for msi/msi-x. You may need
-- 
cgit v1.2.3


From 36f5101a60de8f79c0d1ca06e50660bf5129e02c Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:51 -0800
Subject: x86: enable MAXSMP

Impact: activates new off-stack cpumask code on MAXSMP (non-default) x86 configs

Set MAXSMP to enable CONFIG_CPUMASK_OFFSTACK which moves cpumask's off
the stack (and in structs) when using cpumask_var_t.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hy>
---
 arch/x86/Kconfig | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d99eeb7915c..1fd44352f27 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -591,16 +591,17 @@ config IOMMU_HELPER
 
 config MAXSMP
 	bool "Configure Maximum number of SMP Processors and NUMA Nodes"
-	depends on X86_64 && SMP && BROKEN
+	depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
+	select CPUMASK_OFFSTACK
 	default n
 	help
 	  Configure maximum number of CPUS and NUMA Nodes for this architecture.
 	  If unsure, say N.
 
 config NR_CPUS
-	int "Maximum number of CPUs (2-512)" if !MAXSMP
-	range 2 512
 	depends on SMP
+	int "Maximum number of CPUs" if SMP && !MAXSMP
+	range 2 512 if SMP && !MAXSMP
 	default "4096" if MAXSMP
 	default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
 	default "8"
-- 
cgit v1.2.3


From e7986739a76cde5079da08809d8bbc6878387ae0 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:52 -0800
Subject: x86 smp: modify send_IPI_mask interface to accept cpumask_t pointers

Impact: cleanup, change parameter passing

  * Change genapic interfaces to accept cpumask_t pointers where possible.

  * Modify external callers to use cpumask_t pointers in function calls.

  * Create new send_IPI_mask_allbutself which is the same as the
    send_IPI_mask functions but removes smp_processor_id() from list.
    This removes another common need for a temporary cpumask_t variable.

  * Functions that used a temp cpumask_t variable for:

	cpumask_t allbutme = cpu_online_map;

	cpu_clear(smp_processor_id(), allbutme);
	if (!cpus_empty(allbutme))
		...

    become:

	if (!cpus_equal(cpu_online_map, cpumask_of_cpu(cpu)))
		...

  * Other minor code optimizations (like using cpus_clear instead of
    CPU_MASK_NONE, etc.)

Applies to linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/bigsmp/apic.h            |  14 +--
 arch/x86/include/asm/bigsmp/ipi.h             |   9 +-
 arch/x86/include/asm/es7000/apic.h            |  38 +++----
 arch/x86/include/asm/es7000/ipi.h             |   9 +-
 arch/x86/include/asm/genapic_32.h             |   9 +-
 arch/x86/include/asm/genapic_64.h             |  11 +-
 arch/x86/include/asm/ipi.h                    |  21 +++-
 arch/x86/include/asm/mach-default/mach_apic.h |  17 ++-
 arch/x86/include/asm/mach-default/mach_ipi.h  |  18 ++--
 arch/x86/include/asm/numaq/apic.h             |   6 +-
 arch/x86/include/asm/numaq/ipi.h              |   9 +-
 arch/x86/include/asm/smp.h                    |   6 +-
 arch/x86/include/asm/summit/apic.h            |  12 +--
 arch/x86/include/asm/summit/ipi.h             |   9 +-
 arch/x86/kernel/apic.c                        |   6 +-
 arch/x86/kernel/crash.c                       |   5 +-
 arch/x86/kernel/genapic_flat_64.c             |  76 +++++++++-----
 arch/x86/kernel/genx2apic_cluster.c           |  60 +++++++----
 arch/x86/kernel/genx2apic_phys.c              |  55 +++++++---
 arch/x86/kernel/genx2apic_uv_x.c              |  43 +++++---
 arch/x86/kernel/io_apic.c                     | 145 +++++++++++++-------------
 arch/x86/kernel/ipi.c                         |  26 +++--
 arch/x86/kernel/smp.c                         |   8 +-
 arch/x86/kernel/tlb_32.c                      |   2 +-
 arch/x86/kernel/tlb_64.c                      |   2 +-
 arch/x86/mach-generic/bigsmp.c                |   5 +-
 arch/x86/mach-generic/es7000.c                |   5 +-
 arch/x86/mach-generic/numaq.c                 |   5 +-
 arch/x86/mach-generic/summit.c                |   5 +-
 arch/x86/xen/smp.c                            |  17 ++-
 30 files changed, 380 insertions(+), 273 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index ce547f24a1c..dc6225ca48a 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -9,12 +9,12 @@ static inline int apic_id_registered(void)
 	return (1);
 }
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 {
 #ifdef CONFIG_SMP
-        return cpu_online_map;
+	return &cpu_online_map;
 #else
-        return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 #endif
 }
 
@@ -79,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid)
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
-	if (mps_cpu < NR_CPUS)
+	if (mps_cpu < nr_cpu_ids)
 		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
 
 	return BAD_APICID;
@@ -94,7 +94,7 @@ extern u8 cpu_2_logical_apicid[];
 /* Mapping from cpu number to logical apicid */
 static inline int cpu_to_logical_apicid(int cpu)
 {
-	if (cpu >= NR_CPUS)
+	if (cpu >= nr_cpu_ids)
 		return BAD_APICID;
 	return cpu_physical_id(cpu);
 }
@@ -119,12 +119,12 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
 }
 
 /* As we are using single CPU as destination, pick only one CPU here */
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 	int apicid;	
 
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	return apicid;
 }
diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h
index 9404c535b7e..63553e9f22b 100644
--- a/arch/x86/include/asm/bigsmp/ipi.h
+++ b/arch/x86/include/asm/bigsmp/ipi.h
@@ -1,9 +1,10 @@
 #ifndef __ASM_MACH_IPI_H
 #define __ASM_MACH_IPI_H
 
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
 
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
@@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector)
 	cpu_clear(smp_processor_id(), mask);
 
 	if (!cpus_empty(mask))
-		send_IPI_mask(mask, vector);
+		send_IPI_mask(&mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_map, vector);
+	send_IPI_mask(&cpu_online_map, vector);
 }
 
 #endif /* __ASM_MACH_IPI_H */
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index e24ef876915..4cac0837bb4 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -9,14 +9,14 @@ static inline int apic_id_registered(void)
 	        return (1);
 }
 
-static inline cpumask_t target_cpus_cluster(void)
+static inline const cpumask_t *target_cpus_cluster(void)
 {
-	return CPU_MASK_ALL;
+	return &CPU_MASK_ALL;
 }
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 {
-	return cpumask_of_cpu(smp_processor_id());
+	return &cpumask_of_cpu(smp_processor_id());
 }
 
 #define APIC_DFR_VALUE_CLUSTER		(APIC_DFR_CLUSTER)
@@ -80,9 +80,10 @@ extern int apic_version [MAX_APICS];
 static inline void setup_apic_routing(void)
 {
 	int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
-	printk("Enabling APIC mode:  %s.  Using %d I/O APICs, target cpus %lx\n",
+	printk("Enabling APIC mode:  %s. Using %d I/O APICs, target cpus %lx\n",
 		(apic_version[apic] == 0x14) ?
-		"Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]);
+			"Physical Cluster" : "Logical Cluster",
+			nr_ioapics, cpus_addr(*target_cpus())[0]);
 }
 
 static inline int multi_timer_check(int apic, int irq)
@@ -100,7 +101,7 @@ static inline int cpu_present_to_apicid(int mps_cpu)
 {
 	if (!mps_cpu)
 		return boot_cpu_physical_apicid;
-	else if (mps_cpu < NR_CPUS)
+	else if (mps_cpu < nr_cpu_ids)
 		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
 	else
 		return BAD_APICID;
@@ -120,9 +121,9 @@ extern u8 cpu_2_logical_apicid[];
 static inline int cpu_to_logical_apicid(int cpu)
 {
 #ifdef CONFIG_SMP
-       if (cpu >= NR_CPUS)
-	       return BAD_APICID;
-       return (int)cpu_2_logical_apicid[cpu];
+	if (cpu >= nr_cpu_ids)
+		return BAD_APICID;
+	return (int)cpu_2_logical_apicid[cpu];
 #else
 	return logical_smp_processor_id();
 #endif
@@ -146,14 +147,15 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
 	return (1);
 }
 
-static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
+static inline unsigned int
+cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpus_weight(cpumask);
+	num_bits_set = cpumask_weight(cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return 0xFF;
@@ -161,10 +163,10 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = cpumask_first(cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, cpumask)) {
+		if (cpumask_test_cpu(cpu, cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
@@ -179,14 +181,14 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpus_weight(cpumask);
+	num_bits_set = cpus_weight(*cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return cpu_to_logical_apicid(0);
@@ -194,10 +196,10 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, cpumask)) {
+		if (cpu_isset(cpu, *cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h
index 632a955fcc0..1a8507265f9 100644
--- a/arch/x86/include/asm/es7000/ipi.h
+++ b/arch/x86/include/asm/es7000/ipi.h
@@ -1,9 +1,10 @@
 #ifndef __ASM_ES7000_IPI_H
 #define __ASM_ES7000_IPI_H
 
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
 
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
@@ -13,12 +14,12 @@ static inline void send_IPI_allbutself(int vector)
 	cpumask_t mask = cpu_online_map;
 	cpu_clear(smp_processor_id(), mask);
 	if (!cpus_empty(mask))
-		send_IPI_mask(mask, vector);
+		send_IPI_mask(&mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_map, vector);
+	send_IPI_mask(&cpu_online_map, vector);
 }
 
 #endif /* __ASM_ES7000_IPI_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 0ac17d33a8c..b21ed21c574 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -24,7 +24,7 @@ struct genapic {
 	int (*probe)(void);
 
 	int (*apic_id_registered)(void);
-	cpumask_t (*target_cpus)(void);
+	const cpumask_t *(*target_cpus)(void);
 	int int_delivery_mode;
 	int int_dest_mode;
 	int ESR_DISABLE;
@@ -57,12 +57,13 @@ struct genapic {
 
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
-	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
-	cpumask_t (*vector_allocation_domain)(int cpu);
+	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
 
 #ifdef CONFIG_SMP
 	/* ipi */
-	void (*send_IPI_mask)(cpumask_t mask, int vector);
+	void (*send_IPI_mask)(const cpumask_t *mask, int vector);
+	void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 #endif
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 2cae011668b..a020e7d35a4 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_GENAPIC_64_H
 #define _ASM_X86_GENAPIC_64_H
 
+#include <linux/cpumask.h>
+
 /*
  * Copyright 2004 James Cleverdon, IBM.
  * Subject to the GNU Public License, v.2
@@ -18,16 +20,17 @@ struct genapic {
 	u32 int_delivery_mode;
 	u32 int_dest_mode;
 	int (*apic_id_registered)(void);
-	cpumask_t (*target_cpus)(void);
-	cpumask_t (*vector_allocation_domain)(int cpu);
+	const cpumask_t *(*target_cpus)(void);
+	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
 	void (*init_apic_ldr)(void);
 	/* ipi */
-	void (*send_IPI_mask)(cpumask_t mask, int vector);
+	void (*send_IPI_mask)(const cpumask_t *mask, int vector);
+	void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 	void (*send_IPI_self)(int vector);
 	/* */
-	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
+	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
 	unsigned int (*phys_pkg_id)(int index_msb);
 	unsigned int (*get_apic_id)(unsigned long x);
 	unsigned long (*set_apic_id)(unsigned int id);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index f89dffb28aa..24b6e613edf 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -117,7 +117,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
 	native_apic_mem_write(APIC_ICR, cfg);
 }
 
-static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
+static inline void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
@@ -128,11 +128,28 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
 	 * - mbligh
 	 */
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, mask) {
+	for_each_cpu_mask_nr(query_cpu, *mask) {
 		__send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
 				      vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
 }
 
+static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+{
+	unsigned long flags;
+	unsigned int query_cpu;
+	unsigned int this_cpu = smp_processor_id();
+
+	/* See Hack comment above */
+
+	local_irq_save(flags);
+	for_each_cpu_mask_nr(query_cpu, *mask)
+		if (query_cpu != this_cpu)
+			__send_IPI_dest_field(
+				per_cpu(x86_cpu_to_apicid, query_cpu),
+				vector, APIC_DEST_PHYSICAL);
+	local_irq_restore(flags);
+}
+
 #endif /* _ASM_X86_IPI_H */
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index 6cb3a467e06..c18896b0508 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -8,12 +8,12 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_FLAT)
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 { 
 #ifdef CONFIG_SMP
-	return cpu_online_map;
+	return &cpu_online_map;
 #else
-	return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 #endif
 } 
 
@@ -61,9 +61,9 @@ static inline int apic_id_registered(void)
 	return physid_isset(read_apic_id(), phys_cpu_present_map);
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
-	return cpus_addr(cpumask)[0];
+	return cpus_addr(*cpumask)[0];
 }
 
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
@@ -88,7 +88,7 @@ static inline int apicid_to_node(int logical_apicid)
 #endif
 }
 
-static inline cpumask_t vector_allocation_domain(int cpu)
+static inline void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
         /* Careful. Some cpus do not strictly honor the set of cpus
          * specified in the interrupt destination when using lowest
@@ -98,8 +98,7 @@ static inline cpumask_t vector_allocation_domain(int cpu)
          * deliver interrupts to the wrong hyperthread when only one
          * hyperthread was specified in the interrupt desitination.
          */
-        cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-        return domain;
+	*retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
 }
 #endif
 
@@ -131,7 +130,7 @@ static inline int cpu_to_logical_apicid(int cpu)
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
-	if (mps_cpu < NR_CPUS && cpu_present(mps_cpu))
+	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
 		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
 	else
 		return BAD_APICID;
diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h
index fabca01ebac..9353ab854a1 100644
--- a/arch/x86/include/asm/mach-default/mach_ipi.h
+++ b/arch/x86/include/asm/mach-default/mach_ipi.h
@@ -4,7 +4,8 @@
 /* Avoid include hell */
 #define NMI_VECTOR 0x02
 
-void send_IPI_mask_bitmask(cpumask_t mask, int vector);
+void send_IPI_mask_bitmask(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
 void __send_IPI_shortcut(unsigned int shortcut, int vector);
 
 extern int no_broadcast;
@@ -12,28 +13,27 @@ extern int no_broadcast;
 #ifdef CONFIG_X86_64
 #include <asm/genapic.h>
 #define send_IPI_mask (genapic->send_IPI_mask)
+#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
 #else
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	send_IPI_mask_bitmask(mask, vector);
 }
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
 #endif
 
 static inline void __local_send_IPI_allbutself(int vector)
 {
-	if (no_broadcast || vector == NMI_VECTOR) {
-		cpumask_t mask = cpu_online_map;
-
-		cpu_clear(smp_processor_id(), mask);
-		send_IPI_mask(mask, vector);
-	} else
+	if (no_broadcast || vector == NMI_VECTOR)
+		send_IPI_mask_allbutself(&cpu_online_map, vector);
+	else
 		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
 }
 
 static inline void __local_send_IPI_all(int vector)
 {
 	if (no_broadcast || vector == NMI_VECTOR)
-		send_IPI_mask(cpu_online_map, vector);
+		send_IPI_mask(&cpu_online_map, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector);
 }
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index 0bf2a06b7a4..1df7ebe738e 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -7,9 +7,9 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 {
-	return CPU_MASK_ALL;
+	return &CPU_MASK_ALL;
 }
 
 #define NO_BALANCE_IRQ (1)
@@ -122,7 +122,7 @@ static inline void enable_apic_mode(void)
  * We use physical apicids here, not logical, so just return the default
  * physical broadcast to stop people from breaking us
  */
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	return (int) 0xF;
 }
diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h
index 935588d286c..c734d7acc43 100644
--- a/arch/x86/include/asm/numaq/ipi.h
+++ b/arch/x86/include/asm/numaq/ipi.h
@@ -1,9 +1,10 @@
 #ifndef __ASM_NUMAQ_IPI_H
 #define __ASM_NUMAQ_IPI_H
 
-void send_IPI_mask_sequence(cpumask_t, int vector);
+void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
 
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
@@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector)
 	cpu_clear(smp_processor_id(), mask);
 
 	if (!cpus_empty(mask))
-		send_IPI_mask(mask, vector);
+		send_IPI_mask(&mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_map, vector);
+	send_IPI_mask(&cpu_online_map, vector);
 }
 
 #endif /* __ASM_NUMAQ_IPI_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index d12811ce51d..c4a9aa52df6 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -60,7 +60,7 @@ struct smp_ops {
 	void (*cpu_die)(unsigned int cpu);
 	void (*play_dead)(void);
 
-	void (*send_call_func_ipi)(cpumask_t mask);
+	void (*send_call_func_ipi)(const cpumask_t *mask);
 	void (*send_call_func_single_ipi)(int cpu);
 };
 
@@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu)
 
 static inline void arch_send_call_function_ipi(cpumask_t mask)
 {
-	smp_ops.send_call_func_ipi(mask);
+	smp_ops.send_call_func_ipi(&mask);
 }
 
 void cpu_disable_common(void);
@@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu);
 void native_play_dead(void);
 void play_dead_common(void);
 
-void native_send_call_func_ipi(cpumask_t mask);
+void native_send_call_func_ipi(const cpumask_t *mask);
 void native_send_call_func_single_ipi(int cpu);
 
 extern void prefill_possible_map(void);
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 9b3070f1c2a..437dc83725c 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -14,13 +14,13 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
 
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
 {
 	/* CPU_MASK_ALL (0xff) has undefined behaviour with
 	 * dest_LowestPrio mode logical clustered apic interrupt routing
 	 * Just start on cpu 0.  IRQ balancing will spread load
 	 */
-	return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 }
 
 #define INT_DELIVERY_MODE (dest_LowestPrio)
@@ -137,14 +137,14 @@ static inline void enable_apic_mode(void)
 {
 }
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int num_bits_set;
 	int cpus_found = 0;
 	int cpu;
 	int apicid;
 
-	num_bits_set = cpus_weight(cpumask);
+	num_bits_set = cpus_weight(*cpumask);
 	/* Return id to all */
 	if (num_bits_set == NR_CPUS)
 		return (int) 0xFF;
@@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, cpumask)) {
+		if (cpu_isset(cpu, *cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h
index 53bd1e7bd7b..a8a2c24f50c 100644
--- a/arch/x86/include/asm/summit/ipi.h
+++ b/arch/x86/include/asm/summit/ipi.h
@@ -1,9 +1,10 @@
 #ifndef __ASM_SUMMIT_IPI_H
 #define __ASM_SUMMIT_IPI_H
 
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
 
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
@@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector)
 	cpu_clear(smp_processor_id(), mask);
 
 	if (!cpus_empty(mask))
-		send_IPI_mask(mask, vector);
+		send_IPI_mask(&mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(cpu_online_map, vector);
+	send_IPI_mask(&cpu_online_map, vector);
 }
 
 #endif /* __ASM_SUMMIT_IPI_H */
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index b2cef49f308..a375791c08c 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -141,7 +141,7 @@ static int lapic_next_event(unsigned long delta,
 			    struct clock_event_device *evt);
 static void lapic_timer_setup(enum clock_event_mode mode,
 			      struct clock_event_device *evt);
-static void lapic_timer_broadcast(const struct cpumask *mask);
+static void lapic_timer_broadcast(const cpumask_t *mask);
 static void apic_pm_activate(void);
 
 /*
@@ -453,10 +453,10 @@ static void lapic_timer_setup(enum clock_event_mode mode,
 /*
  * Local APIC timer broadcast function
  */
-static void lapic_timer_broadcast(const struct cpumask *mask)
+static void lapic_timer_broadcast(const cpumask_t *mask)
 {
 #ifdef CONFIG_SMP
-	send_IPI_mask(*mask, LOCAL_TIMER_VECTOR);
+	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
 #endif
 }
 
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 26855381790..81e01f7b1d1 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -77,10 +77,7 @@ static int crash_nmi_callback(struct notifier_block *self,
 
 static void smp_send_nmi_allbutself(void)
 {
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(safe_smp_processor_id(), mask);
-	if (!cpus_empty(mask))
-		send_IPI_mask(mask, NMI_VECTOR);
+	send_IPI_allbutself(NMI_VECTOR);
 }
 
 static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index c0262791bda..50eebd0328f 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 1;
 }
 
-static cpumask_t flat_target_cpus(void)
+static const cpumask_t *flat_target_cpus(void)
 {
-	return cpu_online_map;
+	return &cpu_online_map;
 }
 
-static cpumask_t flat_vector_allocation_domain(int cpu)
+static void flat_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -45,8 +45,7 @@ static cpumask_t flat_vector_allocation_domain(int cpu)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t) { {[0] = APIC_ALL_CPUS, } };
 }
 
 /*
@@ -69,9 +68,8 @@ static void flat_init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
-static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
+static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
 {
-	unsigned long mask = cpus_addr(cpumask)[0];
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -79,20 +77,40 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
 	local_irq_restore(flags);
 }
 
+static void flat_send_IPI_mask(const cpumask_t *cpumask, int vector)
+{
+	unsigned long mask = cpus_addr(*cpumask)[0];
+
+	_flat_send_IPI_mask(mask, vector);
+}
+
+static void flat_send_IPI_mask_allbutself(const cpumask_t *cpumask, int vector)
+{
+	unsigned long mask = cpus_addr(*cpumask)[0];
+	int cpu = smp_processor_id();
+
+	if (cpu < BITS_PER_LONG)
+		clear_bit(cpu, &mask);
+	_flat_send_IPI_mask(mask, vector);
+}
+
 static void flat_send_IPI_allbutself(int vector)
 {
+	int cpu = smp_processor_id();
 #ifdef	CONFIG_HOTPLUG_CPU
 	int hotplug = 1;
 #else
 	int hotplug = 0;
 #endif
 	if (hotplug || vector == NMI_VECTOR) {
-		cpumask_t allbutme = cpu_online_map;
+		if (!cpus_equal(cpu_online_map, cpumask_of_cpu(cpu))) {
+			unsigned long mask = cpus_addr(cpu_online_map)[0];
 
-		cpu_clear(smp_processor_id(), allbutme);
+			if (cpu < BITS_PER_LONG)
+				clear_bit(cpu, &mask);
 
-		if (!cpus_empty(allbutme))
-			flat_send_IPI_mask(allbutme, vector);
+			_flat_send_IPI_mask(mask, vector);
+		}
 	} else if (num_online_cpus() > 1) {
 		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
 	}
@@ -101,7 +119,7 @@ static void flat_send_IPI_allbutself(int vector)
 static void flat_send_IPI_all(int vector)
 {
 	if (vector == NMI_VECTOR)
-		flat_send_IPI_mask(cpu_online_map, vector);
+		flat_send_IPI_mask(&cpu_online_map, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 }
@@ -135,9 +153,9 @@ static int flat_apic_id_registered(void)
 	return physid_isset(read_xapic_id(), phys_cpu_present_map);
 }
 
-static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
-	return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
+	return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
 }
 
 static unsigned int phys_pkg_id(int index_msb)
@@ -157,6 +175,7 @@ struct genapic apic_flat =  {
 	.send_IPI_all = flat_send_IPI_all,
 	.send_IPI_allbutself = flat_send_IPI_allbutself,
 	.send_IPI_mask = flat_send_IPI_mask,
+	.send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
@@ -188,35 +207,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
-static cpumask_t physflat_target_cpus(void)
+static const cpumask_t *physflat_target_cpus(void)
 {
-	return cpu_online_map;
+	return &cpu_online_map;
 }
 
-static cpumask_t physflat_vector_allocation_domain(int cpu)
+static void physflat_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-	return cpumask_of_cpu(cpu);
+	cpus_clear(*retmask);
+	cpu_set(cpu, *retmask);
 }
 
-static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
+static void physflat_send_IPI_mask(const cpumask_t *cpumask, int vector)
 {
 	send_IPI_mask_sequence(cpumask, vector);
 }
 
-static void physflat_send_IPI_allbutself(int vector)
+static void physflat_send_IPI_mask_allbutself(const cpumask_t *cpumask,
+					      int vector)
 {
-	cpumask_t allbutme = cpu_online_map;
+	send_IPI_mask_allbutself(cpumask, vector);
+}
 
-	cpu_clear(smp_processor_id(), allbutme);
-	physflat_send_IPI_mask(allbutme, vector);
+static void physflat_send_IPI_allbutself(int vector)
+{
+	send_IPI_mask_allbutself(&cpu_online_map, vector);
 }
 
 static void physflat_send_IPI_all(int vector)
 {
-	physflat_send_IPI_mask(cpu_online_map, vector);
+	physflat_send_IPI_mask(&cpu_online_map, vector);
 }
 
-static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 
@@ -224,7 +247,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
@@ -243,6 +266,7 @@ struct genapic apic_physflat =  {
 	.send_IPI_all = physflat_send_IPI_all,
 	.send_IPI_allbutself = physflat_send_IPI_allbutself,
 	.send_IPI_mask = physflat_send_IPI_mask,
+	.send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index f6a2c8eb48a..f5fa9a91ad3 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static cpumask_t x2apic_target_cpus(void)
+static const cpumask_t *x2apic_target_cpus(void)
 {
-	return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 }
 
 /*
  * for now each logical cpu is in its own vector allocation domain.
  */
-static cpumask_t x2apic_vector_allocation_domain(int cpu)
+static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	cpus_clear(*retmask);
+	cpu_set(cpu, *retmask);
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -56,32 +55,52 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
  * at once. We have 16 cpu's in a cluster. This will minimize IPI register
  * writes.
  */
-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu_mask(query_cpu, mask) {
-		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
-				       vector, APIC_DEST_LOGICAL);
-	}
+	for_each_cpu_mask_nr(query_cpu, *mask)
+		__x2apic_send_IPI_dest(
+			per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+			vector, APIC_DEST_LOGICAL);
 	local_irq_restore(flags);
 }
 
-static void x2apic_send_IPI_allbutself(int vector)
+static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
 {
-	cpumask_t mask = cpu_online_map;
+	unsigned long flags;
+	unsigned long query_cpu;
+	unsigned long this_cpu = smp_processor_id();
 
-	cpu_clear(smp_processor_id(), mask);
+	local_irq_save(flags);
+	for_each_cpu_mask_nr(query_cpu, *mask)
+		if (query_cpu != this_cpu)
+			__x2apic_send_IPI_dest(
+				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+				vector, APIC_DEST_LOGICAL);
+	local_irq_restore(flags);
+}
 
-	if (!cpus_empty(mask))
-		x2apic_send_IPI_mask(mask, vector);
+static void x2apic_send_IPI_allbutself(int vector)
+{
+	unsigned long flags;
+	unsigned long query_cpu;
+	unsigned long this_cpu = smp_processor_id();
+
+	local_irq_save(flags);
+	for_each_online_cpu(query_cpu)
+		if (query_cpu != this_cpu)
+			__x2apic_send_IPI_dest(
+				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+				vector, APIC_DEST_LOGICAL);
+	local_irq_restore(flags);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(cpu_online_map, vector);
+	x2apic_send_IPI_mask(&cpu_online_map, vector);
 }
 
 static int x2apic_apic_id_registered(void)
@@ -89,7 +108,7 @@ static int x2apic_apic_id_registered(void)
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 
@@ -97,8 +116,8 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
-	if ((unsigned)cpu < NR_CPUS)
+	cpu = first_cpu(*cpumask);
+	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_logical_apicid, cpu);
 	else
 		return BAD_APICID;
@@ -150,6 +169,7 @@ struct genapic apic_x2apic_cluster = {
 	.send_IPI_all = x2apic_send_IPI_all,
 	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
 	.send_IPI_mask = x2apic_send_IPI_mask,
+	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d042211768b..41c27b2f3d0 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static cpumask_t x2apic_target_cpus(void)
+static const cpumask_t *x2apic_target_cpus(void)
 {
-	return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 }
 
-static cpumask_t x2apic_vector_allocation_domain(int cpu)
+static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	cpus_clear(*retmask);
+	cpu_set(cpu, *retmask);
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -54,32 +53,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
 	x2apic_icr_write(cfg, apicid);
 }
 
-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu_mask(query_cpu, mask) {
+	for_each_cpu_mask_nr(query_cpu, *mask) {
 		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
 				       vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
 }
 
-static void x2apic_send_IPI_allbutself(int vector)
+static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
 {
-	cpumask_t mask = cpu_online_map;
+	unsigned long flags;
+	unsigned long query_cpu;
+	unsigned long this_cpu = smp_processor_id();
+
+	local_irq_save(flags);
+	for_each_cpu_mask_nr(query_cpu, *mask) {
+		if (query_cpu != this_cpu)
+			__x2apic_send_IPI_dest(
+				per_cpu(x86_cpu_to_apicid, query_cpu),
+				vector, APIC_DEST_PHYSICAL);
+	}
+	local_irq_restore(flags);
+}
 
-	cpu_clear(smp_processor_id(), mask);
+static void x2apic_send_IPI_allbutself(int vector)
+{
+	unsigned long flags;
+	unsigned long query_cpu;
+	unsigned long this_cpu = smp_processor_id();
 
-	if (!cpus_empty(mask))
-		x2apic_send_IPI_mask(mask, vector);
+	local_irq_save(flags);
+	for_each_online_cpu(query_cpu)
+		if (query_cpu != this_cpu)
+			__x2apic_send_IPI_dest(
+				per_cpu(x86_cpu_to_apicid, query_cpu),
+				vector, APIC_DEST_PHYSICAL);
+	local_irq_restore(flags);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(cpu_online_map, vector);
+	x2apic_send_IPI_mask(&cpu_online_map, vector);
 }
 
 static int x2apic_apic_id_registered(void)
@@ -87,7 +107,7 @@ static int x2apic_apic_id_registered(void)
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 
@@ -95,8 +115,8 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
-	if ((unsigned)cpu < NR_CPUS)
+	cpu = first_cpu(*cpumask);
+	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
 		return BAD_APICID;
@@ -145,6 +165,7 @@ struct genapic apic_x2apic_phys = {
 	.send_IPI_all = x2apic_send_IPI_all,
 	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
 	.send_IPI_mask = x2apic_send_IPI_mask,
+	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2c7dbdb9827..010659415ae 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -75,16 +75,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static cpumask_t uv_target_cpus(void)
+static const cpumask_t *uv_target_cpus(void)
 {
-	return cpumask_of_cpu(0);
+	return &cpumask_of_cpu(0);
 }
 
-static cpumask_t uv_vector_allocation_domain(int cpu)
+static void uv_vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	cpus_clear(*retmask);
+	cpu_set(cpu, *retmask);
 }
 
 int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -123,28 +122,37 @@ static void uv_send_IPI_one(int cpu, int vector)
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 }
 
-static void uv_send_IPI_mask(cpumask_t mask, int vector)
+static void uv_send_IPI_mask(const cpumask_t *mask, int vector)
 {
 	unsigned int cpu;
 
-	for_each_possible_cpu(cpu)
-		if (cpu_isset(cpu, mask))
+	for_each_cpu_mask_nr(cpu, *mask)
+		uv_send_IPI_one(cpu, vector);
+}
+
+static void uv_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+{
+	unsigned int cpu;
+	unsigned int this_cpu = smp_processor_id();
+
+	for_each_cpu_mask_nr(cpu, *mask)
+		if (cpu != this_cpu)
 			uv_send_IPI_one(cpu, vector);
 }
 
 static void uv_send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
-
-	cpu_clear(smp_processor_id(), mask);
+	unsigned int cpu;
+	unsigned int this_cpu = smp_processor_id();
 
-	if (!cpus_empty(mask))
-		uv_send_IPI_mask(mask, vector);
+	for_each_online_cpu(cpu)
+		if (cpu != this_cpu)
+			uv_send_IPI_one(cpu, vector);
 }
 
 static void uv_send_IPI_all(int vector)
 {
-	uv_send_IPI_mask(cpu_online_map, vector);
+	uv_send_IPI_mask(&cpu_online_map, vector);
 }
 
 static int uv_apic_id_registered(void)
@@ -156,7 +164,7 @@ static void uv_init_apic_ldr(void)
 {
 }
 
-static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask)
 {
 	int cpu;
 
@@ -164,7 +172,7 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(cpumask);
+	cpu = first_cpu(*cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
@@ -218,6 +226,7 @@ struct genapic apic_x2apic_uv_x = {
 	.send_IPI_all = uv_send_IPI_all,
 	.send_IPI_allbutself = uv_send_IPI_allbutself,
 	.send_IPI_mask = uv_send_IPI_mask,
+	.send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
 	.send_IPI_self = uv_send_IPI_self,
 	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 3d7d0d55253..7f23ce7f551 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -231,7 +231,8 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 
 #endif
 
-static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+static inline void
+set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 }
 
@@ -396,7 +397,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 	}
 }
 
-static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
 
 static void set_ioapic_affinity_irq_desc(struct irq_desc *desc,
 					 const struct cpumask *mask)
@@ -412,13 +414,13 @@ static void set_ioapic_affinity_irq_desc(struct irq_desc *desc,
 
 	irq = desc->irq;
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, *mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, *mask);
+	set_extra_move_desc(desc, mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 	/*
 	 * Only the high 8 bits are valid.
 	 */
@@ -1099,7 +1101,8 @@ void unlock_vector_lock(void)
 	spin_unlock(&vector_lock);
 }
 
-static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
+static int
+__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 {
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1115,35 +1118,32 @@ static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
 	unsigned int old_vector;
 	int cpu;
+	cpumask_t tmp_mask;
 
 	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
 		return -EBUSY;
 
-	/* Only try and allocate irqs on cpus that are present */
-	cpus_and(mask, mask, cpu_online_map);
-
 	old_vector = cfg->vector;
 	if (old_vector) {
-		cpumask_t tmp;
-		cpus_and(tmp, cfg->domain, mask);
-		if (!cpus_empty(tmp))
+		cpus_and(tmp_mask, *mask, cpu_online_map);
+		cpus_and(tmp_mask, cfg->domain, tmp_mask);
+		if (!cpus_empty(tmp_mask))
 			return 0;
 	}
 
-	for_each_cpu_mask_nr(cpu, mask) {
-		cpumask_t domain, new_mask;
+	/* Only try and allocate irqs on cpus that are present */
+	for_each_cpu_and(cpu, mask, &cpu_online_map) {
 		int new_cpu;
 		int vector, offset;
 
-		domain = vector_allocation_domain(cpu);
-		cpus_and(new_mask, domain, cpu_online_map);
+		vector_allocation_domain(cpu, &tmp_mask);
 
 		vector = current_vector;
 		offset = current_offset;
 next:
 		vector += 8;
 		if (vector >= first_system_vector) {
-			/* If we run out of vectors on large boxen, must share them. */
+			/* If out of vectors on large boxen, must share them. */
 			offset = (offset + 1) % 8;
 			vector = FIRST_DEVICE_VECTOR + offset;
 		}
@@ -1156,7 +1156,7 @@ next:
 		if (vector == SYSCALL_VECTOR)
 			goto next;
 #endif
-		for_each_cpu_mask_nr(new_cpu, new_mask)
+		for_each_cpu_and(new_cpu, &tmp_mask, &cpu_online_map)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
 		/* Found one! */
@@ -1166,16 +1166,17 @@ next:
 			cfg->move_in_progress = 1;
 			cfg->old_domain = cfg->domain;
 		}
-		for_each_cpu_mask_nr(new_cpu, new_mask)
+		for_each_cpu_and(new_cpu, &tmp_mask, &cpu_online_map)
 			per_cpu(vector_irq, new_cpu)[vector] = irq;
 		cfg->vector = vector;
-		cfg->domain = domain;
+		cfg->domain = tmp_mask;
 		return 0;
 	}
 	return -ENOSPC;
 }
 
-static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 {
 	int err;
 	unsigned long flags;
@@ -1384,8 +1385,8 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
 
 	cfg = desc->chip_data;
 
-	mask = TARGET_CPUS;
-	if (assign_irq_vector(irq, cfg, mask))
+	mask = *TARGET_CPUS;
+	if (assign_irq_vector(irq, cfg, &mask))
 		return;
 
 	cpus_and(mask, cfg->domain, mask);
@@ -1398,7 +1399,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
 
 
 	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
-			       cpu_mask_to_apicid(mask), trigger, polarity,
+			       cpu_mask_to_apicid(&mask), trigger, polarity,
 			       cfg->vector)) {
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic].mp_apicid, pin);
@@ -2121,7 +2122,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+	send_IPI_mask(&cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
 	spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
@@ -2170,18 +2171,19 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
  * as simple as edge triggered migration and we can do the irq migration
  * with a simple atomic update to IO-APIC RTE.
  */
-static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
+static void
+migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
-	cpumask_t tmp, cleanup_mask;
+	cpumask_t tmpmask;
 	struct irte irte;
 	int modify_ioapic_rte;
 	unsigned int dest;
 	unsigned long flags;
 	unsigned int irq;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	cpus_and(tmpmask, *mask, cpu_online_map);
+	if (cpus_empty(tmpmask))
 		return;
 
 	irq = desc->irq;
@@ -2194,8 +2196,8 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
 
 	set_extra_move_desc(desc, mask);
 
-	cpus_and(tmp, cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	cpus_and(tmpmask, cfg->domain, *mask);
+	dest = cpu_mask_to_apicid(&tmpmask);
 
 	modify_ioapic_rte = desc->status & IRQ_LEVEL;
 	if (modify_ioapic_rte) {
@@ -2213,13 +2215,13 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
 	modify_irte(irq, &irte);
 
 	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cpus_and(tmpmask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(tmpmask);
+		send_IPI_mask(&tmpmask, IRQ_MOVE_CLEANUP_VECTOR);
 		cfg->move_in_progress = 0;
 	}
 
-	desc->affinity = mask;
+	desc->affinity = *mask;
 }
 
 static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2241,7 +2243,7 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
 	}
 
 	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq_desc(desc, desc->pending_mask);
+	migrate_ioapic_irq_desc(desc, &desc->pending_mask);
 
 	ret = 0;
 	desc->status &= ~IRQ_MOVE_PENDING;
@@ -2292,7 +2294,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 		return;
 	}
 
-	migrate_ioapic_irq_desc(desc, *mask);
+	migrate_ioapic_irq_desc(desc, mask);
 }
 static void set_ir_ioapic_affinity_irq(unsigned int irq,
 				       const struct cpumask *mask)
@@ -2359,7 +2361,7 @@ static void irq_complete_move(struct irq_desc **descp)
 
 		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
 		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 		cfg->move_in_progress = 0;
 	}
 }
@@ -3089,13 +3091,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 	cpumask_t tmp;
 
 	cfg = irq_cfg(irq);
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, cfg, tmp);
+	tmp = *TARGET_CPUS;
+	err = assign_irq_vector(irq, cfg, &tmp);
 	if (err)
 		return err;
 
 	cpus_and(tmp, cfg->domain, tmp);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 
 #ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
@@ -3161,13 +3163,13 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, *mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, *mask);
+	set_extra_move_desc(desc, mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 
 	read_msi_msg_desc(desc, &msg);
 
@@ -3184,8 +3186,8 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void ir_set_msi_irq_affinity(unsigned int irq,
-				    const struct cpumask *mask)
+static void
+ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3200,13 +3202,13 @@ static void ir_set_msi_irq_affinity(unsigned int irq,
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, *mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, *mask);
+	set_extra_move_desc(desc, mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
@@ -3224,7 +3226,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq,
 	if (cfg->move_in_progress) {
 		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
 		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 		cfg->move_in_progress = 0;
 	}
 
@@ -3419,7 +3421,7 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static void dmar_msi_set_affinity(unsigned int irq, const cpumask_t *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3431,13 +3433,13 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, *mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, *mask);
+	set_extra_move_desc(desc, mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 
 	dmar_msi_read(irq, &msg);
 
@@ -3481,7 +3483,7 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static void hpet_msi_set_affinity(unsigned int irq, const cpumask_t *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3493,13 +3495,13 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, *mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, *mask);
+	set_extra_move_desc(desc, mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 
 	hpet_msi_read(irq, &msg);
 
@@ -3564,7 +3566,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static void set_ht_irq_affinity(unsigned int irq, const cpumask_t *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3575,13 +3577,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, *mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, *mask);
+	set_extra_move_desc(desc, mask);
 
 	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(tmp);
+	dest = cpu_mask_to_apicid(&tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
 	cpumask_copy(&desc->affinity, mask);
@@ -3607,14 +3609,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 	cpumask_t tmp;
 
 	cfg = irq_cfg(irq);
-	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, cfg, tmp);
+	err = assign_irq_vector(irq, cfg, TARGET_CPUS);
 	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
 		cpus_and(tmp, cfg->domain, tmp);
-		dest = cpu_mask_to_apicid(tmp);
+		dest = cpu_mask_to_apicid(&tmp);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
@@ -3650,7 +3651,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 		       unsigned long mmr_offset)
 {
-	const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+	const cpumask_t *eligible_cpu = &cpumask_of_cpu(cpu);
 	struct irq_cfg *cfg;
 	int mmr_pnode;
 	unsigned long mmr_value;
@@ -3660,7 +3661,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 
 	cfg = irq_cfg(irq);
 
-	err = assign_irq_vector(irq, cfg, *eligible_cpu);
+	err = assign_irq_vector(irq, cfg, eligible_cpu);
 	if (err != 0)
 		return err;
 
@@ -3679,7 +3680,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	entry->polarity = 0;
 	entry->trigger = 0;
 	entry->mask = 0;
-	entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+	entry->dest = cpu_mask_to_apicid(eligible_cpu);
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3890,7 +3891,7 @@ void __init setup_ioapic_dest(void)
 	int pin, ioapic, irq, irq_entry;
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	cpumask_t mask;
+	const cpumask_t *mask;
 
 	if (skip_ioapic_setup == 1)
 		return;
@@ -3921,16 +3922,16 @@ void __init setup_ioapic_dest(void)
 			 */
 			if (desc->status &
 			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-				mask = desc->affinity;
+				mask = &desc->affinity;
 			else
 				mask = TARGET_CPUS;
 
 #ifdef CONFIG_INTR_REMAP
 			if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq_desc(desc, &mask);
+				set_ir_ioapic_affinity_irq_desc(desc, mask);
 			else
 #endif
-				set_ioapic_affinity_irq_desc(desc, &mask);
+				set_ioapic_affinity_irq_desc(desc, mask);
 		}
 
 	}
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index f1c688e46f3..86aa50fc65a 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -116,9 +116,9 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
 /*
  * This is only used on smaller machines.
  */
-void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
+void send_IPI_mask_bitmask(const cpumask_t *cpumask, int vector)
 {
-	unsigned long mask = cpus_addr(cpumask)[0];
+	unsigned long mask = cpus_addr(*cpumask)[0];
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -127,7 +127,7 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
 	local_irq_restore(flags);
 }
 
-void send_IPI_mask_sequence(cpumask_t mask, int vector)
+void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
 {
 	unsigned long flags;
 	unsigned int query_cpu;
@@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
 	 */
 
 	local_irq_save(flags);
-	for_each_possible_cpu(query_cpu) {
-		if (cpu_isset(query_cpu, mask)) {
+	for_each_cpu_mask_nr(query_cpu, *mask)
+		__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
+	local_irq_restore(flags);
+}
+
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+{
+	unsigned long flags;
+	unsigned int query_cpu;
+	unsigned int this_cpu = smp_processor_id();
+
+	/* See Hack comment above */
+
+	local_irq_save(flags);
+	for_each_cpu_mask_nr(query_cpu, *mask)
+		if (query_cpu != this_cpu)
 			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
 					      vector);
-		}
-	}
 	local_irq_restore(flags);
 }
 
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 3f92b134ab9..341df946f9a 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu)
 		WARN_ON(1);
 		return;
 	}
-	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+	send_IPI_mask(&cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
 }
 
 void native_send_call_func_single_ipi(int cpu)
 {
-	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+	send_IPI_mask(&cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-void native_send_call_func_ipi(cpumask_t mask)
+void native_send_call_func_ipi(const cpumask_t *mask)
 {
 	cpumask_t allbutself;
 
 	allbutself = cpu_online_map;
 	cpu_clear(smp_processor_id(), allbutself);
 
-	if (cpus_equal(mask, allbutself) &&
+	if (cpus_equal(*mask, allbutself) &&
 	    cpus_equal(cpu_online_map, cpu_callout_map))
 		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 	else
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index f4049f3513b..174ea90d1cb 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -164,7 +164,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	 * We have to send the IPI only to
 	 * CPUs affected.
 	 */
-	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+	send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
 
 	while (!cpus_empty(flush_cpumask))
 		/* nothing. lockup detection does not belong here */
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 8f919ca6949..de6f1bda0c5 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	 * We have to send the IPI only to
 	 * CPUs affected.
 	 */
-	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+	send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
 
 	while (!cpus_empty(f->flush_cpumask))
 		cpu_relax();
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 3624a364b7f..bc4c7840b2a 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -42,9 +42,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
 	 { }
 };
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
-        return cpumask_of_cpu(cpu);
+	cpus_clear(*retmask);
+	cpu_set(cpu, *retmask);
 }
 
 static int probe_bigsmp(void)
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 7b4e6d0d169..4ba5ccaa158 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -87,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 }
 #endif
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -97,8 +97,7 @@ static cpumask_t vector_allocation_domain(int cpu)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 
 struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 71a309b122e..511d7941364 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -48,8 +48,7 @@ static cpumask_t vector_allocation_domain(int cpu)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 
 struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 2c6d234e000..2821ffc188b 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -24,7 +24,7 @@ static int probe_summit(void)
 	return 0;
 }
 
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -34,8 +34,7 @@ static cpumask_t vector_allocation_domain(int cpu)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
-	return domain;
+	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
 }
 
 struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index acd9b6705e0..2cce362c987 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void)
 {
 	int i, rc;
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 		if (rc >= 0) {
 			num_processors++;
@@ -196,7 +196,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 
 	/* Restrict the possible_map according to max_cpus. */
 	while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
-		for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--)
+		for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
 			continue;
 		cpu_clear(cpu, cpu_possible_map);
 	}
@@ -408,24 +408,22 @@ static void xen_smp_send_reschedule(int cpu)
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
 
-static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
+static void xen_send_IPI_mask(const cpumask_t *mask, enum ipi_vector vector)
 {
 	unsigned cpu;
 
-	cpus_and(mask, mask, cpu_online_map);
-
-	for_each_cpu_mask_nr(cpu, mask)
+	for_each_cpu_and(cpu, mask, &cpu_online_map)
 		xen_send_IPI_one(cpu, vector);
 }
 
-static void xen_smp_send_call_function_ipi(cpumask_t mask)
+static void xen_smp_send_call_function_ipi(const cpumask_t *mask)
 {
 	int cpu;
 
 	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 
 	/* Make sure other vcpus get a chance to run if they need to. */
-	for_each_cpu_mask_nr(cpu, mask) {
+	for_each_cpu_mask_nr(cpu, *mask) {
 		if (xen_vcpu_stolen(cpu)) {
 			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
 			break;
@@ -435,7 +433,8 @@ static void xen_smp_send_call_function_ipi(cpumask_t mask)
 
 static void xen_smp_send_call_function_single_ipi(int cpu)
 {
-	xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+	xen_send_IPI_mask(&cpumask_of_cpu(cpu),
+			  XEN_CALL_FUNCTION_SINGLE_VECTOR);
 }
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
-- 
cgit v1.2.3


From a1681965011916c2f1f0f1f87e70784f5d5d5be5 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:53 -0800
Subject: x86: move and enhance debug printk for nr_cpu_ids etc.

Impact: cleanup, better debugging

This has proven useful in debugging, *before* we try to use
for_each_possible_cpu().  It also now shows nr_cpumask_bits.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/kernel/setup_percpu.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 1c2084291f9..0b63b08e753 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -152,6 +152,11 @@ void __init setup_per_cpu_areas(void)
 	old_size = PERCPU_ENOUGH_ROOM;
 	align = max_t(unsigned long, PAGE_SIZE, align);
 	size = roundup(old_size, align);
+
+	printk(KERN_INFO
+		"NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
+		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
+
 	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
 			  size);
 
@@ -168,24 +173,24 @@ void __init setup_per_cpu_areas(void)
 			       "cpu %d has no node %d or node-local memory\n",
 				cpu, node);
 			if (ptr)
-				printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n",
+				printk(KERN_DEBUG
+					"per cpu data for cpu%d at %016lx\n",
 					 cpu, __pa(ptr));
 		}
 		else {
 			ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
 							__pa(MAX_DMA_ADDRESS));
 			if (ptr)
-				printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
-					 cpu, node, __pa(ptr));
+				printk(KERN_DEBUG
+					"per cpu data for cpu%d on node%d "
+					"at %016lx\n",
+					cpu, node, __pa(ptr));
 		}
 #endif
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 	}
 
-	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
-		NR_CPUS, nr_cpu_ids, nr_node_ids);
-
 	/* Setup percpu data maps */
 	setup_per_cpu_maps();
 
-- 
cgit v1.2.3


From 95d313cf1c1ecedc8bec5727b09bdacbf67dfc45 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:54 -0800
Subject: x86: Add cpu_mask_to_apicid_and

Impact: new API

Add a helper function that takes two cpumask's, and's them and then
returns the apicid of the result.  This removes a need in io_apic.c
that uses a temporary cpumask to hold (mask & cfg->domain).

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/include/asm/bigsmp/apic.h            | 16 +++++++++
 arch/x86/include/asm/es7000/apic.h            | 47 +++++++++++++++++++++++++++
 arch/x86/include/asm/genapic_32.h             |  3 ++
 arch/x86/include/asm/genapic_64.h             |  2 ++
 arch/x86/include/asm/mach-default/mach_apic.h | 10 ++++++
 arch/x86/include/asm/mach-generic/mach_apic.h |  1 +
 arch/x86/include/asm/numaq/apic.h             |  6 ++++
 arch/x86/include/asm/summit/apic.h            | 39 ++++++++++++++++++++++
 arch/x86/kernel/genapic_flat_64.c             | 26 +++++++++++++++
 arch/x86/kernel/genx2apic_cluster.c           | 16 +++++++++
 arch/x86/kernel/genx2apic_phys.c              | 16 +++++++++
 arch/x86/kernel/genx2apic_uv_x.c              | 16 +++++++++
 12 files changed, 198 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index dc6225ca48a..99f9abacf6a 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -129,6 +129,22 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
+static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						  const cpumask_t *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask))
+			return cpu_to_logical_apicid(cpu);
+
+	return BAD_APICID;
+}
+
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
 {
 	return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index 4cac0837bb4..c2bed772af8 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -214,6 +214,53 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
+static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						  const cpumask_t *andmask)
+{
+	int num_bits_set;
+	int num_bits_set2;
+	int cpus_found = 0;
+	int cpu;
+	int apicid = 0;
+
+	num_bits_set = cpus_weight(*cpumask);
+	num_bits_set2 = cpus_weight(*andmask);
+	num_bits_set = min_t(int, num_bits_set, num_bits_set2);
+	/* Return id to all */
+	if (num_bits_set >= nr_cpu_ids)
+#if defined CONFIG_ES7000_CLUSTERED_APIC
+		return 0xFF;
+#else
+		return cpu_to_logical_apicid(0);
+#endif
+	/*
+	 * The cpus in the mask must all be on the apic cluster.  If are not
+	 * on the same apicid cluster return default value of TARGET_CPUS.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask)
+			apicid = cpu_to_logical_apicid(cpu);
+	while (cpus_found < num_bits_set) {
+		if (cpu_isset(cpu, *cpumask) && cpu_isset(cpu, *andmask)) {
+			int new_apicid = cpu_to_logical_apicid(cpu);
+			if (apicid_cluster(apicid) !=
+					apicid_cluster(new_apicid)) {
+				printk(KERN_WARNING
+					"%s: Not a valid mask!\n", __func__);
+#if defined CONFIG_ES7000_CLUSTERED_APIC
+				return 0xFF;
+#else
+				return cpu_to_logical_apicid(0);
+#endif
+			}
+			apicid = new_apicid;
+			cpus_found++;
+		}
+		cpu++;
+	}
+	return apicid;
+}
+
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
 {
 	return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index b21ed21c574..325298a8223 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -58,6 +58,8 @@ struct genapic {
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
 	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+	unsigned int (*cpu_mask_to_apicid_and)(const cpumask_t *cpumask,
+					       const cpumask_t *andmask);
 	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
 
 #ifdef CONFIG_SMP
@@ -115,6 +117,7 @@ struct genapic {
 	APICFUNC(get_apic_id)				\
 	.apic_id_mask = APIC_ID_MASK,			\
 	APICFUNC(cpu_mask_to_apicid)			\
+	APICFUNC(cpu_mask_to_apicid_and)		\
 	APICFUNC(vector_allocation_domain)		\
 	APICFUNC(acpi_madt_oem_check)			\
 	IPIFUNC(send_IPI_mask)				\
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index a020e7d35a4..301c7f41125 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -31,6 +31,8 @@ struct genapic {
 	void (*send_IPI_self)(int vector);
 	/* */
 	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+	unsigned int (*cpu_mask_to_apicid_and)(const cpumask_t *cpumask,
+					       const cpumask_t *andmask);
 	unsigned int (*phys_pkg_id)(int index_msb);
 	unsigned int (*get_apic_id)(unsigned long x);
 	unsigned long (*set_apic_id)(unsigned int id);
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index c18896b0508..229b605d104 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -28,6 +28,7 @@ static inline const cpumask_t *target_cpus(void)
 #define apic_id_registered (genapic->apic_id_registered)
 #define init_apic_ldr (genapic->init_apic_ldr)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
 #define phys_pkg_id	(genapic->phys_pkg_id)
 #define vector_allocation_domain    (genapic->vector_allocation_domain)
 #define read_apic_id()  (GET_APIC_ID(apic_read(APIC_ID)))
@@ -66,6 +67,15 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return cpus_addr(*cpumask)[0];
 }
 
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask,
+					      const cpumask_t *andmask)
+{
+	unsigned long mask1 = cpus_addr(*cpumask)[0];
+	unsigned long mask2 = cpus_addr(*andmask)[0];
+
+	return (unsigned int)(mask1 & mask2);
+}
+
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
 {
 	return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
index e430f47df66..48553e958ad 100644
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -24,6 +24,7 @@
 #define check_phys_apicid_present (genapic->check_phys_apicid_present)
 #define check_apicid_used (genapic->check_apicid_used)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
 #define vector_allocation_domain (genapic->vector_allocation_domain)
 #define enable_apic_mode (genapic->enable_apic_mode)
 #define phys_pkg_id (genapic->phys_pkg_id)
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index 1df7ebe738e..abf668ced50 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -127,6 +127,12 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return (int) 0xF;
 }
 
+static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						  const cpumask_t *andmask)
+{
+	return (int) 0xF;
+}
+
 /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
 {
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 437dc83725c..cbcc2c7eb1d 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -170,6 +170,45 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
+static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						  const cpumask_t *andmask)
+{
+	int num_bits_set;
+	int num_bits_set2;
+	int cpus_found = 0;
+	int cpu;
+	int apicid = 0;
+
+	num_bits_set = cpus_weight(*cpumask);
+	num_bits_set2 = cpus_weight(*andmask);
+	num_bits_set = min_t(int, num_bits_set, num_bits_set2);
+	/* Return id to all */
+	if (num_bits_set >= nr_cpu_ids)
+		return 0xFF;
+	/*
+	 * The cpus in the mask must all be on the apic cluster.  If are not
+	 * on the same apicid cluster return default value of TARGET_CPUS.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask)
+			apicid = cpu_to_logical_apicid(cpu);
+	while (cpus_found < num_bits_set) {
+		if (cpu_isset(cpu, *cpumask) && cpu_isset(cpu, *andmask)) {
+			int new_apicid = cpu_to_logical_apicid(cpu);
+			if (apicid_cluster(apicid) !=
+					apicid_cluster(new_apicid)) {
+				printk(KERN_WARNING
+					"%s: Not a valid mask!\n", __func__);
+				return 0xFF;
+			}
+			apicid = apicid | new_apicid;
+			cpus_found++;
+		}
+		cpu++;
+	}
+	return apicid;
+}
+
 /* cpuid returns the value latched in the HW at reset, not the APIC ID
  * register's value.  For any box whose BIOS changes APIC IDs, like
  * clustered APIC systems, we must use hard_smp_processor_id.
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 50eebd0328f..1efecd206a7 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -158,6 +158,15 @@ static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
 }
 
+static unsigned int flat_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						const cpumask_t *andmask)
+{
+	unsigned long mask1 = cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
+	unsigned long mask2 = cpus_addr(*andmask)[0] & APIC_ALL_CPUS;
+
+	return (int)(mask1 & mask2);
+}
+
 static unsigned int phys_pkg_id(int index_msb)
 {
 	return hard_smp_processor_id() >> index_msb;
@@ -178,6 +187,7 @@ struct genapic apic_flat =  {
 	.send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
@@ -254,6 +264,21 @@ static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
+static unsigned int physflat_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						    const cpumask_t *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask))
+			return per_cpu(x86_cpu_to_apicid, cpu);
+	return BAD_APICID;
+}
+
 struct genapic apic_physflat =  {
 	.name = "physical flat",
 	.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
@@ -269,6 +294,7 @@ struct genapic apic_physflat =  {
 	.send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index f5fa9a91ad3..fd8047f4e45 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -123,6 +123,21 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
+static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						  const cpumask_t *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask))
+			return per_cpu(x86_cpu_to_apicid, cpu);
+	return BAD_APICID;
+}
+
 static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned int id;
@@ -172,6 +187,7 @@ struct genapic apic_x2apic_cluster = {
 	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index 41c27b2f3d0..d5578bb8f16 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -122,6 +122,21 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
+static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+						  const cpumask_t *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask))
+			return per_cpu(x86_cpu_to_apicid, cpu);
+	return BAD_APICID;
+}
+
 static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned int id;
@@ -168,6 +183,7 @@ struct genapic apic_x2apic_phys = {
 	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 010659415ae..53bd2570272 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -179,6 +179,21 @@ static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
+static unsigned int uv_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
+					      const cpumask_t *andmask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
+		if (cpu_isset(cpu, *andmask))
+			return per_cpu(x86_cpu_to_apicid, cpu);
+	return BAD_APICID;
+}
+
 static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned int id;
@@ -229,6 +244,7 @@ struct genapic apic_x2apic_uv_x = {
 	.send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
 	.send_IPI_self = uv_send_IPI_self,
 	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
 	.phys_pkg_id = phys_pkg_id,
 	.get_apic_id = get_apic_id,
 	.set_apic_id = set_apic_id,
-- 
cgit v1.2.3


From 6eeb7c5a99434596c5953a95baa17d2f085664e3 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:55 -0800
Subject: x86: update add-cpu_mask_to_apicid_and to use struct cpumask*

Impact: use updated APIs

Various API updates for x86:add-cpu_mask_to_apicid_and

(Note: separate because previous patch has been "backported" to 2.6.27.)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/include/asm/bigsmp/apic.h            | 10 +++++-----
 arch/x86/include/asm/es7000/apic.h            | 19 ++++++++++---------
 arch/x86/include/asm/genapic_32.h             |  4 ++--
 arch/x86/include/asm/genapic_64.h             |  4 ++--
 arch/x86/include/asm/mach-default/mach_apic.h |  8 ++++----
 arch/x86/include/asm/numaq/apic.h             |  4 ++--
 arch/x86/include/asm/summit/apic.h            | 18 +++++++++---------
 arch/x86/kernel/genapic_flat_64.c             | 21 +++++++++++----------
 arch/x86/kernel/genx2apic_cluster.c           | 10 +++++-----
 arch/x86/kernel/genx2apic_phys.c              | 10 +++++-----
 arch/x86/kernel/genx2apic_uv_x.c              | 10 +++++-----
 11 files changed, 60 insertions(+), 58 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index 99f9abacf6a..976399debb3 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -129,8 +129,8 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						  const cpumask_t *andmask)
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
 	int cpu;
 
@@ -138,9 +138,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask))
-			return cpu_to_logical_apicid(cpu);
+	cpu = cpumask_any_and(cpumask, andmask);
+	if (cpu < nr_cpu_ids)
+		return cpu_to_logical_apicid(cpu);
 
 	return BAD_APICID;
 }
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index c2bed772af8..ba8423c5363 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -214,8 +214,8 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						  const cpumask_t *andmask)
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
 	int num_bits_set;
 	int num_bits_set2;
@@ -223,9 +223,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	int cpu;
 	int apicid = 0;
 
-	num_bits_set = cpus_weight(*cpumask);
-	num_bits_set2 = cpus_weight(*andmask);
-	num_bits_set = min_t(int, num_bits_set, num_bits_set2);
+	num_bits_set = cpumask_weight(cpumask);
+	num_bits_set2 = cpumask_weight(andmask);
+	num_bits_set = min(num_bits_set, num_bits_set2);
 	/* Return id to all */
 	if (num_bits_set >= nr_cpu_ids)
 #if defined CONFIG_ES7000_CLUSTERED_APIC
@@ -237,11 +237,12 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask)
-			apicid = cpu_to_logical_apicid(cpu);
+	cpu = cpumask_first_and(cpumask, andmask);
+	apicid = cpu_to_logical_apicid(cpu);
+
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, *cpumask) && cpu_isset(cpu, *andmask)) {
+		if (cpumask_test_cpu(cpu, cpumask) &&
+		    cpumask_test_cpu(cpu, andmask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)) {
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 325298a8223..eed6e305291 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -58,8 +58,8 @@ struct genapic {
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
 	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
-	unsigned int (*cpu_mask_to_apicid_and)(const cpumask_t *cpumask,
-					       const cpumask_t *andmask);
+	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+					       const struct cpumask *andmask);
 	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 301c7f41125..244b71729ec 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -31,8 +31,8 @@ struct genapic {
 	void (*send_IPI_self)(int vector);
 	/* */
 	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
-	unsigned int (*cpu_mask_to_apicid_and)(const cpumask_t *cpumask,
-					       const cpumask_t *andmask);
+	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+					       const struct cpumask *andmask);
 	unsigned int (*phys_pkg_id)(int index_msb);
 	unsigned int (*get_apic_id)(unsigned long x);
 	unsigned long (*set_apic_id)(unsigned int id);
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index 229b605d104..df8e024c43c 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -67,11 +67,11 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return cpus_addr(*cpumask)[0];
 }
 
-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask,
-					      const cpumask_t *andmask)
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
-	unsigned long mask1 = cpus_addr(*cpumask)[0];
-	unsigned long mask2 = cpus_addr(*andmask)[0];
+	unsigned long mask1 = cpumask_bits(cpumask)[0];
+	unsigned long mask2 = cpumask_bits(andmask)[0];
 
 	return (unsigned int)(mask1 & mask2);
 }
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index abf668ced50..c80f00d2996 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -127,8 +127,8 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return (int) 0xF;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						  const cpumask_t *andmask)
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
 	return (int) 0xF;
 }
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index cbcc2c7eb1d..651a9384934 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -170,8 +170,8 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						  const cpumask_t *andmask)
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
 	int num_bits_set;
 	int num_bits_set2;
@@ -179,9 +179,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	int cpu;
 	int apicid = 0;
 
-	num_bits_set = cpus_weight(*cpumask);
-	num_bits_set2 = cpus_weight(*andmask);
-	num_bits_set = min_t(int, num_bits_set, num_bits_set2);
+	num_bits_set = cpumask_weight(cpumask);
+	num_bits_set2 = cpumask_weight(andmask);
+	num_bits_set = min(num_bits_set, num_bits_set2);
 	/* Return id to all */
 	if (num_bits_set >= nr_cpu_ids)
 		return 0xFF;
@@ -189,11 +189,11 @@ static inline unsigned int cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask)
-			apicid = cpu_to_logical_apicid(cpu);
+	cpu = cpumask_first_and(cpumask, andmask);
+	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpu_isset(cpu, *cpumask) && cpu_isset(cpu, *andmask)) {
+		if (cpumask_test_cpu(cpu, cpumask)
+		    && cpumask_test_cpu(cpu, andmask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)) {
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 1efecd206a7..c772bb10b17 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -158,13 +158,13 @@ static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
 }
 
-static unsigned int flat_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						const cpumask_t *andmask)
+static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						const struct cpumask *andmask)
 {
-	unsigned long mask1 = cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
-	unsigned long mask2 = cpus_addr(*andmask)[0] & APIC_ALL_CPUS;
+	unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+	unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
 
-	return (int)(mask1 & mask2);
+	return mask1 & mask2;
 }
 
 static unsigned int phys_pkg_id(int index_msb)
@@ -264,8 +264,9 @@ static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
-static unsigned int physflat_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						    const cpumask_t *andmask)
+static unsigned int
+physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+				const struct cpumask *andmask)
 {
 	int cpu;
 
@@ -273,9 +274,9 @@ static unsigned int physflat_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask))
-			return per_cpu(x86_cpu_to_apicid, cpu);
+	cpu = cpumask_any_and(cpumask, andmask);
+	if (cpu < nr_cpu_ids)
+		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
 }
 
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index fd8047f4e45..e7d16f53b9c 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -123,8 +123,8 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						  const cpumask_t *andmask)
+static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
 	int cpu;
 
@@ -132,9 +132,9 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask))
-			return per_cpu(x86_cpu_to_apicid, cpu);
+	cpu = cpumask_any_and(cpumask, andmask);
+	if (cpu < nr_cpu_ids)
+		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
 }
 
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d5578bb8f16..9d0386c7e79 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -122,8 +122,8 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-						  const cpumask_t *andmask)
+static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+						  const struct cpumask *andmask)
 {
 	int cpu;
 
@@ -131,9 +131,9 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask))
-			return per_cpu(x86_cpu_to_apicid, cpu);
+	cpu = cpumask_any_and(cpumask, andmask);
+	if (cpu < nr_cpu_ids)
+		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
 }
 
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 53bd2570272..22596ec94c8 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -179,8 +179,8 @@ static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask)
 		return BAD_APICID;
 }
 
-static unsigned int uv_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
-					      const cpumask_t *andmask)
+static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+					      const struct cpumask *andmask)
 {
 	int cpu;
 
@@ -188,9 +188,9 @@ static unsigned int uv_cpu_mask_to_apicid_and(const cpumask_t *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	while ((cpu = next_cpu(-1, *cpumask)) < nr_cpu_ids)
-		if (cpu_isset(cpu, *andmask))
-			return per_cpu(x86_cpu_to_apicid, cpu);
+	cpu = cpumask_any_and(cpumask, andmask);
+	if (cpu < nr_cpu_ids)
+		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
 }
 
-- 
cgit v1.2.3


From 22f65d31b25a320a5246592160bcb102d2791c45 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:56 -0800
Subject: x86: Update io_apic.c to use new cpumask API

Impact: cleanup, consolidate patches, use new API

Consolidate the following into a single patch to adapt to new
sparseirq code in arch/x86/kernel/io_apic.c, add allocation of
cpumask_var_t's in domain and old_domain, and reduce further
merge conflicts.  Only one file (arch/x86/kernel/io_apic.c) is
changed in all of these patches.

	0006-x86-io_apic-change-irq_cfg-domain-old_domain-to.patch
	0007-x86-io_apic-set_desc_affinity.patch
	0008-x86-io_apic-send_cleanup_vector.patch
	0009-x86-io_apic-eliminate-remaining-cpumask_ts-from-st.patch
	0021-x86-final-cleanups-in-io_apic-to-use-new-cpumask-AP.patch

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/io_apic.c | 302 ++++++++++++++++++++++------------------------
 1 file changed, 145 insertions(+), 157 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 7f23ce7f551..60bb8b19f4c 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -136,8 +136,8 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
 
 struct irq_cfg {
 	struct irq_pin_list *irq_2_pin;
-	cpumask_t domain;
-	cpumask_t old_domain;
+	cpumask_var_t domain;
+	cpumask_var_t old_domain;
 	unsigned move_cleanup_count;
 	u8 vector;
 	u8 move_in_progress : 1;
@@ -149,22 +149,22 @@ static struct irq_cfg irq_cfgx[] = {
 #else
 static struct irq_cfg irq_cfgx[NR_IRQS] = {
 #endif
-	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-	[3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-	[4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-	[5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-	[6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-	[7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-	[8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-	[9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-	[10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-	[11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-	[12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-	[13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-	[14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+	[0]  = { .vector = IRQ0_VECTOR,  },
+	[1]  = { .vector = IRQ1_VECTOR,  },
+	[2]  = { .vector = IRQ2_VECTOR,  },
+	[3]  = { .vector = IRQ3_VECTOR,  },
+	[4]  = { .vector = IRQ4_VECTOR,  },
+	[5]  = { .vector = IRQ5_VECTOR,  },
+	[6]  = { .vector = IRQ6_VECTOR,  },
+	[7]  = { .vector = IRQ7_VECTOR,  },
+	[8]  = { .vector = IRQ8_VECTOR,  },
+	[9]  = { .vector = IRQ9_VECTOR,  },
+	[10] = { .vector = IRQ10_VECTOR, },
+	[11] = { .vector = IRQ11_VECTOR, },
+	[12] = { .vector = IRQ12_VECTOR, },
+	[13] = { .vector = IRQ13_VECTOR, },
+	[14] = { .vector = IRQ14_VECTOR, },
+	[15] = { .vector = IRQ15_VECTOR, },
 };
 
 void __init arch_early_irq_init(void)
@@ -180,6 +180,10 @@ void __init arch_early_irq_init(void)
 	for (i = 0; i < count; i++) {
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
+		alloc_bootmem_cpumask_var(&cfg[i].domain);
+		alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+		if (i < NR_IRQS_LEGACY)
+			cpumask_setall(cfg[i].domain);
 	}
 }
 
@@ -204,6 +208,20 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 	node = cpu_to_node(cpu);
 
 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+	if (cfg) {
+		/* FIXME: needs alloc_cpumask_var_node() */
+		if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
+			kfree(cfg);
+			cfg = NULL;
+		} else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) {
+			free_cpumask_var(cfg->domain);
+			kfree(cfg);
+			cfg = NULL;
+		} else {
+			cpumask_clear(cfg->domain);
+			cpumask_clear(cfg->old_domain);
+		}
+	}
 	printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
 
 	return cfg;
@@ -362,6 +380,26 @@ static void ioapic_mask_entry(int apic, int pin)
 }
 
 #ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+	cpumask_var_t cleanup_mask;
+
+	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+		unsigned int i;
+		cfg->move_cleanup_count = 0;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			cfg->move_cleanup_count++;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		free_cpumask_var(cleanup_mask);
+	}
+	cfg->move_in_progress = 0;
+}
+
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
 {
 	int apic, pin;
@@ -400,40 +438,52 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 static int
 assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
 
-static void set_ioapic_affinity_irq_desc(struct irq_desc *desc,
-					 const struct cpumask *mask)
+/*
+ * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
+ * of that, or returns BAD_APICID and leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int dest;
-	cpumask_t tmp;
 	unsigned int irq;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
-		return;
+		return BAD_APICID;
 
 	irq = desc->irq;
 	cfg = desc->chip_data;
 	if (assign_irq_vector(irq, cfg, mask))
-		return;
+		return BAD_APICID;
 
+	cpumask_and(&desc->affinity, cfg->domain, mask);
 	set_extra_move_desc(desc, mask);
+	return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
+}
 
-	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(&tmp);
-	/*
-	 * Only the high 8 bits are valid.
-	 */
-	dest = SET_APIC_LOGICAL_ID(dest);
+static void
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int dest;
+	unsigned int irq;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	__target_IO_APIC_irq(irq, dest, cfg);
-	cpumask_copy(&desc->affinity, mask);
+	dest = set_desc_affinity(desc, mask);
+	if (dest != BAD_APICID) {
+		/* Only the high 8 bits are valid. */
+		dest = SET_APIC_LOGICAL_ID(dest);
+		__target_IO_APIC_irq(irq, dest, cfg);
+	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-static void set_ioapic_affinity_irq(unsigned int irq,
-				    const struct cpumask *mask)
+static void
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc;
 
@@ -1117,26 +1167,32 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 	 */
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
 	unsigned int old_vector;
-	int cpu;
-	cpumask_t tmp_mask;
+	int cpu, err;
+	cpumask_var_t tmp_mask;
 
 	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
 		return -EBUSY;
 
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
+
 	old_vector = cfg->vector;
 	if (old_vector) {
-		cpus_and(tmp_mask, *mask, cpu_online_map);
-		cpus_and(tmp_mask, cfg->domain, tmp_mask);
-		if (!cpus_empty(tmp_mask))
+		cpumask_and(tmp_mask, mask, cpu_online_mask);
+		cpumask_and(tmp_mask, cfg->domain, tmp_mask);
+		if (!cpumask_empty(tmp_mask)) {
+			free_cpumask_var(tmp_mask);
 			return 0;
+		}
 	}
 
 	/* Only try and allocate irqs on cpus that are present */
-	for_each_cpu_and(cpu, mask, &cpu_online_map) {
+	err = -ENOSPC;
+	for_each_cpu_and(cpu, mask, cpu_online_mask) {
 		int new_cpu;
 		int vector, offset;
 
-		vector_allocation_domain(cpu, &tmp_mask);
+		vector_allocation_domain(cpu, tmp_mask);
 
 		vector = current_vector;
 		offset = current_offset;
@@ -1156,7 +1212,7 @@ next:
 		if (vector == SYSCALL_VECTOR)
 			goto next;
 #endif
-		for_each_cpu_and(new_cpu, &tmp_mask, &cpu_online_map)
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
 		/* Found one! */
@@ -1164,15 +1220,17 @@ next:
 		current_offset = offset;
 		if (old_vector) {
 			cfg->move_in_progress = 1;
-			cfg->old_domain = cfg->domain;
+			cpumask_copy(cfg->old_domain, cfg->domain);
 		}
-		for_each_cpu_and(new_cpu, &tmp_mask, &cpu_online_map)
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq;
 		cfg->vector = vector;
-		cfg->domain = tmp_mask;
-		return 0;
+		cpumask_copy(cfg->domain, tmp_mask);
+		err = 0;
+		break;
 	}
-	return -ENOSPC;
+	free_cpumask_var(tmp_mask);
+	return err;
 }
 
 static int
@@ -1189,23 +1247,20 @@ assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 
 static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
 {
-	cpumask_t mask;
 	int cpu, vector;
 
 	BUG_ON(!cfg->vector);
 
 	vector = cfg->vector;
-	cpus_and(mask, cfg->domain, cpu_online_map);
-	for_each_cpu_mask_nr(cpu, mask)
+	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
 		per_cpu(vector_irq, cpu)[vector] = -1;
 
 	cfg->vector = 0;
-	cpus_clear(cfg->domain);
+	cpumask_clear(cfg->domain);
 
 	if (likely(!cfg->move_in_progress))
 		return;
-	cpus_and(mask, cfg->old_domain, cpu_online_map);
-	for_each_cpu_mask_nr(cpu, mask) {
+	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
 		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
 								vector++) {
 			if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1230,7 +1285,7 @@ void __setup_vector_irq(int cpu)
 		if (!desc)
 			continue;
 		cfg = desc->chip_data;
-		if (!cpu_isset(cpu, cfg->domain))
+		if (!cpumask_test_cpu(cpu, cfg->domain))
 			continue;
 		vector = cfg->vector;
 		per_cpu(vector_irq, cpu)[vector] = irq;
@@ -1242,7 +1297,7 @@ void __setup_vector_irq(int cpu)
 			continue;
 
 		cfg = irq_cfg(irq);
-		if (!cpu_isset(cpu, cfg->domain))
+		if (!cpumask_test_cpu(cpu, cfg->domain))
 			per_cpu(vector_irq, cpu)[vector] = -1;
 	}
 }
@@ -1378,18 +1433,17 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
 {
 	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
-	cpumask_t mask;
+	unsigned int dest;
 
 	if (!IO_APIC_IRQ(irq))
 		return;
 
 	cfg = desc->chip_data;
 
-	mask = *TARGET_CPUS;
-	if (assign_irq_vector(irq, cfg, &mask))
+	if (assign_irq_vector(irq, cfg, TARGET_CPUS))
 		return;
 
-	cpus_and(mask, cfg->domain, mask);
+	dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
 
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1399,8 +1453,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
 
 
 	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
-			       cpu_mask_to_apicid(&mask), trigger, polarity,
-			       cfg->vector)) {
+			       dest, trigger, polarity, cfg->vector)) {
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic].mp_apicid, pin);
 		__clear_irq_vector(irq, cfg);
@@ -2122,7 +2175,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	send_IPI_mask(&cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+	send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
 	spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
@@ -2175,15 +2228,13 @@ static void
 migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
-	cpumask_t tmpmask;
 	struct irte irte;
 	int modify_ioapic_rte;
 	unsigned int dest;
 	unsigned long flags;
 	unsigned int irq;
 
-	cpus_and(tmpmask, *mask, cpu_online_map);
-	if (cpus_empty(tmpmask))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
 	irq = desc->irq;
@@ -2196,8 +2247,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 
 	set_extra_move_desc(desc, mask);
 
-	cpus_and(tmpmask, cfg->domain, *mask);
-	dest = cpu_mask_to_apicid(&tmpmask);
+	dest = cpu_mask_to_apicid_and(cfg->domain, mask);
 
 	modify_ioapic_rte = desc->status & IRQ_LEVEL;
 	if (modify_ioapic_rte) {
@@ -2214,14 +2264,10 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 	 */
 	modify_irte(irq, &irte);
 
-	if (cfg->move_in_progress) {
-		cpus_and(tmpmask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(tmpmask);
-		send_IPI_mask(&tmpmask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
 
-	desc->affinity = *mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 
 static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2247,7 +2293,7 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
 
 	ret = 0;
 	desc->status &= ~IRQ_MOVE_PENDING;
-	cpus_clear(desc->pending_mask);
+	cpumask_clear(&desc->pending_mask);
 
 unmask:
 	unmask_IO_APIC_irq_desc(desc);
@@ -2333,7 +2379,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		if (!cfg->move_cleanup_count)
 			goto unlock;
 
-		if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 			goto unlock;
 
 		__get_cpu_var(vector_irq)[vector] = -1;
@@ -2356,14 +2402,8 @@ static void irq_complete_move(struct irq_desc **descp)
 
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
-	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
-		cpumask_t cleanup_mask;
-
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+		send_cleanup_vector(cfg);
 }
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
@@ -3088,16 +3128,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 	struct irq_cfg *cfg;
 	int err;
 	unsigned dest;
-	cpumask_t tmp;
 
 	cfg = irq_cfg(irq);
-	tmp = *TARGET_CPUS;
-	err = assign_irq_vector(irq, cfg, &tmp);
+	err = assign_irq_vector(irq, cfg, TARGET_CPUS);
 	if (err)
 		return err;
 
-	cpus_and(tmp, cfg->domain, tmp);
-	dest = cpu_mask_to_apicid(&tmp);
+	dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
 
 #ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
@@ -3157,19 +3194,12 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
-	cpumask_t tmp;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
-
-	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(&tmp);
 
 	read_msi_msg_desc(desc, &msg);
 
@@ -3179,7 +3209,6 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg_desc(desc, &msg);
-	cpumask_copy(&desc->affinity, mask);
 }
 #ifdef CONFIG_INTR_REMAP
 /*
@@ -3192,24 +3221,15 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	unsigned int dest;
-	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return;
-
 	if (get_irte(irq, &irte))
 		return;
 
-	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
-	set_extra_move_desc(desc, mask);
-
-	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(&tmp);
-
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
 
@@ -3223,14 +3243,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	 * at the new destination. So, time to cleanup the previous
 	 * vector allocation.
 	 */
-	if (cfg->move_in_progress) {
-		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-		send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		cfg->move_in_progress = 0;
-	}
-
-	cpumask_copy(&desc->affinity, mask);
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
 }
 
 #endif
@@ -3421,25 +3435,18 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const cpumask_t *mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
-	cpumask_t tmp;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
-
-	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(&tmp);
 
 	dmar_msi_read(irq, &msg);
 
@@ -3449,7 +3456,6 @@ static void dmar_msi_set_affinity(unsigned int irq, const cpumask_t *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
-	cpumask_copy(&desc->affinity, mask);
 }
 
 #endif /* CONFIG_SMP */
@@ -3483,25 +3489,18 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const cpumask_t *mask)
+static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
-	cpumask_t tmp;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
-
-	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(&tmp);
 
 	hpet_msi_read(irq, &msg);
 
@@ -3511,7 +3510,6 @@ static void hpet_msi_set_affinity(unsigned int irq, const cpumask_t *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	hpet_msi_write(irq, &msg);
-	cpumask_copy(&desc->affinity, mask);
 }
 
 #endif /* CONFIG_SMP */
@@ -3566,27 +3564,19 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, const cpumask_t *mask)
+static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	unsigned int dest;
-	cpumask_t tmp;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
 		return;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
-
-	cpumask_and(&tmp, &cfg->domain, mask);
-	dest = cpu_mask_to_apicid(&tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
-	cpumask_copy(&desc->affinity, mask);
 }
 
 #endif
@@ -3606,7 +3596,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
 	struct irq_cfg *cfg;
 	int err;
-	cpumask_t tmp;
 
 	cfg = irq_cfg(irq);
 	err = assign_irq_vector(irq, cfg, TARGET_CPUS);
@@ -3614,8 +3603,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 		struct ht_irq_msg msg;
 		unsigned dest;
 
-		cpus_and(tmp, cfg->domain, tmp);
-		dest = cpu_mask_to_apicid(&tmp);
+		dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
@@ -3651,7 +3639,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 		       unsigned long mmr_offset)
 {
-	const cpumask_t *eligible_cpu = &cpumask_of_cpu(cpu);
+	const struct cpumask *eligible_cpu = cpumask_of(cpu);
 	struct irq_cfg *cfg;
 	int mmr_pnode;
 	unsigned long mmr_value;
@@ -3891,7 +3879,7 @@ void __init setup_ioapic_dest(void)
 	int pin, ioapic, irq, irq_entry;
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	const cpumask_t *mask;
+	const struct cpumask *mask;
 
 	if (skip_ioapic_setup == 1)
 		return;
-- 
cgit v1.2.3


From b78936e14ee47b6b2d628501a0eab5270db80132 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:57 -0800
Subject: xen: convert to cpumask_var_t and new cpumask primitives.

Simple change, and eventual space saving when NR_CPUS >> nr_cpu_ids.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
---
 arch/x86/xen/smp.c     | 9 ++++++---
 arch/x86/xen/suspend.c | 3 ++-
 arch/x86/xen/xen-ops.h | 2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 2cce362c987..b3a95868839 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -33,7 +33,7 @@
 #include "xen-ops.h"
 #include "mmu.h"
 
-cpumask_t xen_cpu_initialized_map;
+cpumask_var_t xen_cpu_initialized_map;
 
 static DEFINE_PER_CPU(int, resched_irq);
 static DEFINE_PER_CPU(int, callfunc_irq);
@@ -192,7 +192,10 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 	if (xen_smp_intr_init(0))
 		BUG();
 
-	xen_cpu_initialized_map = cpumask_of_cpu(0);
+	if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
+		panic("could not allocate xen_cpu_initialized_map\n");
+
+	cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
 
 	/* Restrict the possible_map according to max_cpus. */
 	while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
@@ -221,7 +224,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	struct vcpu_guest_context *ctxt;
 	struct desc_struct *gdt;
 
-	if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
+	if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
 		return 0;
 
 	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 2a234db5949..212ffe012b7 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -35,7 +35,8 @@ void xen_post_suspend(int suspend_cancelled)
 			pfn_to_mfn(xen_start_info->console.domU.mfn);
 	} else {
 #ifdef CONFIG_SMP
-		xen_cpu_initialized_map = cpu_online_map;
+		BUG_ON(xen_cpu_initialized_map == NULL);
+		cpumask_copy(xen_cpu_initialized_map, cpu_online_mask);
 #endif
 		xen_vcpu_restore();
 	}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9e1afae8461..c1f8faf0a2c 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void);
 __cpuinit void xen_init_lock_cpu(int cpu);
 void xen_uninit_lock_cpu(int cpu);
 
-extern cpumask_t xen_cpu_initialized_map;
+extern cpumask_var_t xen_cpu_initialized_map;
 #else
 static inline void xen_smp_init(void) {}
 #endif
-- 
cgit v1.2.3


From d7b381bb7b1ad69ff008ea063d26e988b686c8de Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:58 -0800
Subject: x86: fixup_irqs() doesnt need an argument.

Impact: cleanup, remove on-stack cpumask.

The "map" arg is always cpu_online_mask.  Importantly, set_affinity
always ands the argument with cpu_online_mask anyway, so we don't need
to do it in fixup_irqs(), avoiding a temporary.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/include/asm/irq.h |  2 +-
 arch/x86/kernel/irq_32.c   | 13 +++++++------
 arch/x86/kernel/irq_64.c   | 15 ++++++++-------
 arch/x86/kernel/smpboot.c  |  2 +-
 4 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index bae0eda9548..8766d30fb74 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -37,7 +37,7 @@ extern int irqbalance_disable(char *str);
 
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
-extern void fixup_irqs(cpumask_t map);
+extern void fixup_irqs(void);
 #endif
 
 extern unsigned int do_IRQ(struct pt_regs *regs);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 9cf9cbbf7a0..9dc5588f336 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -233,27 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs)
 #ifdef CONFIG_HOTPLUG_CPU
 #include <mach_apic.h>
 
-void fixup_irqs(cpumask_t map)
+/* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
+void fixup_irqs(void)
 {
 	unsigned int irq;
 	static int warned;
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
-		cpumask_t mask;
+		const struct cpumask *affinity;
 
 		if (!desc)
 			continue;
 		if (irq == 2)
 			continue;
 
-		cpus_and(mask, desc->affinity, map);
-		if (any_online_cpu(mask) == NR_CPUS) {
+		affinity = &desc->affinity;
+		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
 			printk("Breaking affinity for irq %i\n", irq);
-			mask = map;
+			affinity = cpu_all_mask;
 		}
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, &mask);
+			desc->chip->set_affinity(irq, affinity);
 		else if (desc->action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 27f2307b0a3..fca2991443f 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -83,16 +83,17 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-void fixup_irqs(cpumask_t map)
+/* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
+void fixup_irqs(void)
 {
 	unsigned int irq;
 	static int warned;
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
-		cpumask_t mask;
 		int break_affinity = 0;
 		int set_affinity = 1;
+		const struct cpumask *affinity;
 
 		if (!desc)
 			continue;
@@ -102,23 +103,23 @@ void fixup_irqs(cpumask_t map)
 		/* interrupt's are disabled at this point */
 		spin_lock(&desc->lock);
 
+		affinity = &desc->affinity;
 		if (!irq_has_action(irq) ||
-		    cpus_equal(desc->affinity, map)) {
+		    cpumask_equal(affinity, cpu_online_mask)) {
 			spin_unlock(&desc->lock);
 			continue;
 		}
 
-		cpus_and(mask, desc->affinity, map);
-		if (cpus_empty(mask)) {
+		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
 			break_affinity = 1;
-			mask = map;
+			affinity = cpu_all_mask;
 		}
 
 		if (desc->chip->mask)
 			desc->chip->mask(irq);
 
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, &mask);
+			desc->chip->set_affinity(irq, affinity);
 		else if (!(warned++))
 			set_affinity = 0;
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9d58134e023..8b6f675b363 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1346,7 +1346,7 @@ void cpu_disable_common(void)
 	lock_vector_lock();
 	remove_cpu_from_maps(cpu);
 	unlock_vector_lock();
-	fixup_irqs(cpu_online_map);
+	fixup_irqs();
 }
 
 int native_cpu_disable(void)
-- 
cgit v1.2.3


From bcda016eddd7a8b374bb371473c821a91ff1d8cc Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:33:59 -0800
Subject: x86: cosmetic changes apic-related files.

This patch simply changes cpumask_t to struct cpumask and similar
trivial modernizations.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/include/asm/bigsmp/ipi.h             | 14 +++-----
 arch/x86/include/asm/es7000/ipi.h             | 13 +++----
 arch/x86/include/asm/genapic_32.h             | 11 +++---
 arch/x86/include/asm/genapic_64.h             | 11 +++---
 arch/x86/include/asm/ipi.h                    | 10 +++---
 arch/x86/include/asm/mach-default/mach_apic.h | 12 +++----
 arch/x86/include/asm/mach-default/mach_ipi.h  | 12 +++----
 arch/x86/include/asm/numaq/ipi.h              | 14 +++-----
 arch/x86/include/asm/smp.h                    |  4 +--
 arch/x86/kernel/genapic_flat_64.c             | 50 ++++++++++++++-------------
 arch/x86/kernel/genx2apic_cluster.c           | 25 +++++++-------
 arch/x86/kernel/genx2apic_phys.c              | 25 +++++++-------
 arch/x86/kernel/genx2apic_uv_x.c              | 24 ++++++-------
 arch/x86/kernel/ipi.c                         | 14 ++++----
 arch/x86/kernel/smp.c                         |  6 ++--
 arch/x86/xen/smp.c                            | 11 +++---
 16 files changed, 127 insertions(+), 129 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h
index 63553e9f22b..27fcd01b3ae 100644
--- a/arch/x86/include/asm/bigsmp/ipi.h
+++ b/arch/x86/include/asm/bigsmp/ipi.h
@@ -1,26 +1,22 @@
 #ifndef __ASM_MACH_IPI_H
 #define __ASM_MACH_IPI_H
 
-void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 
-static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
 
 static inline void send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(smp_processor_id(), mask);
-
-	if (!cpus_empty(mask))
-		send_IPI_mask(&mask, vector);
+	send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(&cpu_online_map, vector);
+	send_IPI_mask(cpu_online_mask, vector);
 }
 
 #endif /* __ASM_MACH_IPI_H */
diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h
index 1a8507265f9..7e8ed24d4b8 100644
--- a/arch/x86/include/asm/es7000/ipi.h
+++ b/arch/x86/include/asm/es7000/ipi.h
@@ -1,25 +1,22 @@
 #ifndef __ASM_ES7000_IPI_H
 #define __ASM_ES7000_IPI_H
 
-void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 
-static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
 
 static inline void send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(smp_processor_id(), mask);
-	if (!cpus_empty(mask))
-		send_IPI_mask(&mask, vector);
+	send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(&cpu_online_map, vector);
+	send_IPI_mask(cpu_online_mask, vector);
 }
 
 #endif /* __ASM_ES7000_IPI_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index eed6e305291..746f37a7963 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -24,7 +24,7 @@ struct genapic {
 	int (*probe)(void);
 
 	int (*apic_id_registered)(void);
-	const cpumask_t *(*target_cpus)(void);
+	const struct cpumask *(*target_cpus)(void);
 	int int_delivery_mode;
 	int int_dest_mode;
 	int ESR_DISABLE;
@@ -57,15 +57,16 @@ struct genapic {
 
 	unsigned (*get_apic_id)(unsigned long x);
 	unsigned long apic_id_mask;
-	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
 	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
 					       const struct cpumask *andmask);
-	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
+	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
 
 #ifdef CONFIG_SMP
 	/* ipi */
-	void (*send_IPI_mask)(const cpumask_t *mask, int vector);
-	void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector);
+	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
+					 int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 #endif
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 244b71729ec..adf32fb56aa 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -20,17 +20,18 @@ struct genapic {
 	u32 int_delivery_mode;
 	u32 int_dest_mode;
 	int (*apic_id_registered)(void);
-	const cpumask_t *(*target_cpus)(void);
-	void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
+	const struct cpumask *(*target_cpus)(void);
+	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
 	void (*init_apic_ldr)(void);
 	/* ipi */
-	void (*send_IPI_mask)(const cpumask_t *mask, int vector);
-	void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector);
+	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
+					 int vector);
 	void (*send_IPI_allbutself)(int vector);
 	void (*send_IPI_all)(int vector);
 	void (*send_IPI_self)(int vector);
 	/* */
-	unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
 	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
 					       const struct cpumask *andmask);
 	unsigned int (*phys_pkg_id)(int index_msb);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 24b6e613edf..c745a306f7d 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -117,7 +117,8 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
 	native_apic_mem_write(APIC_ICR, cfg);
 }
 
-static inline void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
+static inline void send_IPI_mask_sequence(const struct cpumask *mask,
+					  int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
@@ -128,14 +129,15 @@ static inline void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
 	 * - mbligh
 	 */
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask) {
+	for_each_cpu(query_cpu, mask) {
 		__send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
 				      vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
 }
 
-static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+static inline void send_IPI_mask_allbutself(const struct cpumask *mask,
+					    int vector)
 {
 	unsigned long flags;
 	unsigned int query_cpu;
@@ -144,7 +146,7 @@ static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
 	/* See Hack comment above */
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask)
+	for_each_cpu(query_cpu, mask)
 		if (query_cpu != this_cpu)
 			__send_IPI_dest_field(
 				per_cpu(x86_cpu_to_apicid, query_cpu),
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index df8e024c43c..8863d978cb9 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -8,12 +8,12 @@
 
 #define APIC_DFR_VALUE	(APIC_DFR_FLAT)
 
-static inline const cpumask_t *target_cpus(void)
+static inline const struct cpumask *target_cpus(void)
 { 
 #ifdef CONFIG_SMP
-	return &cpu_online_map;
+	return cpu_online_mask;
 #else
-	return &cpumask_of_cpu(0);
+	return cpumask_of(0);
 #endif
 } 
 
@@ -62,9 +62,9 @@ static inline int apic_id_registered(void)
 	return physid_isset(read_apic_id(), phys_cpu_present_map);
 }
 
-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
-	return cpus_addr(*cpumask)[0];
+	return cpumask_bits(cpumask)[0];
 }
 
 static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -98,7 +98,7 @@ static inline int apicid_to_node(int logical_apicid)
 #endif
 }
 
-static inline void vector_allocation_domain(int cpu, cpumask_t *retmask)
+static inline void vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
         /* Careful. Some cpus do not strictly honor the set of cpus
          * specified in the interrupt destination when using lowest
diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h
index 9353ab854a1..191312d155d 100644
--- a/arch/x86/include/asm/mach-default/mach_ipi.h
+++ b/arch/x86/include/asm/mach-default/mach_ipi.h
@@ -4,8 +4,8 @@
 /* Avoid include hell */
 #define NMI_VECTOR 0x02
 
-void send_IPI_mask_bitmask(const cpumask_t *mask, int vector);
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+void send_IPI_mask_bitmask(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 void __send_IPI_shortcut(unsigned int shortcut, int vector);
 
 extern int no_broadcast;
@@ -15,17 +15,17 @@ extern int no_broadcast;
 #define send_IPI_mask (genapic->send_IPI_mask)
 #define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
 #else
-static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	send_IPI_mask_bitmask(mask, vector);
 }
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 #endif
 
 static inline void __local_send_IPI_allbutself(int vector)
 {
 	if (no_broadcast || vector == NMI_VECTOR)
-		send_IPI_mask_allbutself(&cpu_online_map, vector);
+		send_IPI_mask_allbutself(cpu_online_mask, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
 }
@@ -33,7 +33,7 @@ static inline void __local_send_IPI_allbutself(int vector)
 static inline void __local_send_IPI_all(int vector)
 {
 	if (no_broadcast || vector == NMI_VECTOR)
-		send_IPI_mask(&cpu_online_map, vector);
+		send_IPI_mask(cpu_online_mask, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector);
 }
diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h
index c734d7acc43..a8374c65277 100644
--- a/arch/x86/include/asm/numaq/ipi.h
+++ b/arch/x86/include/asm/numaq/ipi.h
@@ -1,26 +1,22 @@
 #ifndef __ASM_NUMAQ_IPI_H
 #define __ASM_NUMAQ_IPI_H
 
-void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
 
-static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	send_IPI_mask_sequence(mask, vector);
 }
 
 static inline void send_IPI_allbutself(int vector)
 {
-	cpumask_t mask = cpu_online_map;
-	cpu_clear(smp_processor_id(), mask);
-
-	if (!cpus_empty(mask))
-		send_IPI_mask(&mask, vector);
+	send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
 static inline void send_IPI_all(int vector)
 {
-	send_IPI_mask(&cpu_online_map, vector);
+	send_IPI_mask(cpu_online_mask, vector);
 }
 
 #endif /* __ASM_NUMAQ_IPI_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index c4a9aa52df6..830b9fcb642 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -60,7 +60,7 @@ struct smp_ops {
 	void (*cpu_die)(unsigned int cpu);
 	void (*play_dead)(void);
 
-	void (*send_call_func_ipi)(const cpumask_t *mask);
+	void (*send_call_func_ipi)(const struct cpumask *mask);
 	void (*send_call_func_single_ipi)(int cpu);
 };
 
@@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu);
 void native_play_dead(void);
 void play_dead_common(void);
 
-void native_send_call_func_ipi(const cpumask_t *mask);
+void native_send_call_func_ipi(const struct cpumask *mask);
 void native_send_call_func_single_ipi(int cpu);
 
 extern void prefill_possible_map(void);
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index c772bb10b17..7fa5f49c2dd 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 1;
 }
 
-static const cpumask_t *flat_target_cpus(void)
+static const struct cpumask *flat_target_cpus(void)
 {
-	return &cpu_online_map;
+	return cpu_online_mask;
 }
 
-static void flat_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
 	/* Careful. Some cpus do not strictly honor the set of cpus
 	 * specified in the interrupt destination when using lowest
@@ -45,7 +45,8 @@ static void flat_vector_allocation_domain(int cpu, cpumask_t *retmask)
 	 * deliver interrupts to the wrong hyperthread when only one
 	 * hyperthread was specified in the interrupt desitination.
 	 */
-	*retmask = (cpumask_t) { {[0] = APIC_ALL_CPUS, } };
+	cpumask_clear(retmask);
+	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
 }
 
 /*
@@ -77,16 +78,17 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
 	local_irq_restore(flags);
 }
 
-static void flat_send_IPI_mask(const cpumask_t *cpumask, int vector)
+static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
 {
-	unsigned long mask = cpus_addr(*cpumask)[0];
+	unsigned long mask = cpumask_bits(cpumask)[0];
 
 	_flat_send_IPI_mask(mask, vector);
 }
 
-static void flat_send_IPI_mask_allbutself(const cpumask_t *cpumask, int vector)
+static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+					  int vector)
 {
-	unsigned long mask = cpus_addr(*cpumask)[0];
+	unsigned long mask = cpumask_bits(cpumask)[0];
 	int cpu = smp_processor_id();
 
 	if (cpu < BITS_PER_LONG)
@@ -103,8 +105,8 @@ static void flat_send_IPI_allbutself(int vector)
 	int hotplug = 0;
 #endif
 	if (hotplug || vector == NMI_VECTOR) {
-		if (!cpus_equal(cpu_online_map, cpumask_of_cpu(cpu))) {
-			unsigned long mask = cpus_addr(cpu_online_map)[0];
+		if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
+			unsigned long mask = cpumask_bits(cpu_online_mask)[0];
 
 			if (cpu < BITS_PER_LONG)
 				clear_bit(cpu, &mask);
@@ -119,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector)
 static void flat_send_IPI_all(int vector)
 {
 	if (vector == NMI_VECTOR)
-		flat_send_IPI_mask(&cpu_online_map, vector);
+		flat_send_IPI_mask(cpu_online_mask, vector);
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 }
@@ -153,9 +155,9 @@ static int flat_apic_id_registered(void)
 	return physid_isset(read_xapic_id(), phys_cpu_present_map);
 }
 
-static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
-	return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
+	return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
 }
 
 static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -217,23 +219,23 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
-static const cpumask_t *physflat_target_cpus(void)
+static const struct cpumask *physflat_target_cpus(void)
 {
-	return &cpu_online_map;
+	return cpu_online_mask;
 }
 
-static void physflat_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
-	cpus_clear(*retmask);
-	cpu_set(cpu, *retmask);
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
 }
 
-static void physflat_send_IPI_mask(const cpumask_t *cpumask, int vector)
+static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
 {
 	send_IPI_mask_sequence(cpumask, vector);
 }
 
-static void physflat_send_IPI_mask_allbutself(const cpumask_t *cpumask,
+static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
 					      int vector)
 {
 	send_IPI_mask_allbutself(cpumask, vector);
@@ -241,15 +243,15 @@ static void physflat_send_IPI_mask_allbutself(const cpumask_t *cpumask,
 
 static void physflat_send_IPI_allbutself(int vector)
 {
-	send_IPI_mask_allbutself(&cpu_online_map, vector);
+	send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
 static void physflat_send_IPI_all(int vector)
 {
-	physflat_send_IPI_mask(&cpu_online_map, vector);
+	physflat_send_IPI_mask(cpu_online_mask, vector);
 }
 
-static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
 	int cpu;
 
@@ -257,7 +259,7 @@ static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(*cpumask);
+	cpu = cpumask_first(cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index e7d16f53b9c..4716a0c9f93 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -22,18 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static const cpumask_t *x2apic_target_cpus(void)
+static const struct cpumask *x2apic_target_cpus(void)
 {
-	return &cpumask_of_cpu(0);
+	return cpumask_of(0);
 }
 
 /*
  * for now each logical cpu is in its own vector allocation domain.
  */
-static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
-	cpus_clear(*retmask);
-	cpu_set(cpu, *retmask);
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -55,27 +55,28 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
  * at once. We have 16 cpu's in a cluster. This will minimize IPI register
  * writes.
  */
-static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask)
+	for_each_cpu(query_cpu, mask)
 		__x2apic_send_IPI_dest(
 			per_cpu(x86_cpu_to_logical_apicid, query_cpu),
 			vector, APIC_DEST_LOGICAL);
 	local_irq_restore(flags);
 }
 
-static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
+					    int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 	unsigned long this_cpu = smp_processor_id();
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask)
+	for_each_cpu(query_cpu, mask)
 		if (query_cpu != this_cpu)
 			__x2apic_send_IPI_dest(
 				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
@@ -100,7 +101,7 @@ static void x2apic_send_IPI_allbutself(int vector)
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(&cpu_online_map, vector);
+	x2apic_send_IPI_mask(cpu_online_mask, vector);
 }
 
 static int x2apic_apic_id_registered(void)
@@ -108,7 +109,7 @@ static int x2apic_apic_id_registered(void)
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
 	int cpu;
 
@@ -116,7 +117,7 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(*cpumask);
+	cpu = cpumask_first(cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_logical_apicid, cpu);
 	else
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index 9d0386c7e79..b255507884f 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -29,15 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static const cpumask_t *x2apic_target_cpus(void)
+static const struct cpumask *x2apic_target_cpus(void)
 {
-	return &cpumask_of_cpu(0);
+	return cpumask_of(0);
 }
 
-static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
-	cpus_clear(*retmask);
-	cpu_set(cpu, *retmask);
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
 }
 
 static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -53,27 +53,28 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
 	x2apic_icr_write(cfg, apicid);
 }
 
-static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask) {
+	for_each_cpu(query_cpu, mask) {
 		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
 				       vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
 }
 
-static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
+					    int vector)
 {
 	unsigned long flags;
 	unsigned long query_cpu;
 	unsigned long this_cpu = smp_processor_id();
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask) {
+	for_each_cpu(query_cpu, mask) {
 		if (query_cpu != this_cpu)
 			__x2apic_send_IPI_dest(
 				per_cpu(x86_cpu_to_apicid, query_cpu),
@@ -99,7 +100,7 @@ static void x2apic_send_IPI_allbutself(int vector)
 
 static void x2apic_send_IPI_all(int vector)
 {
-	x2apic_send_IPI_mask(&cpu_online_map, vector);
+	x2apic_send_IPI_mask(cpu_online_mask, vector);
 }
 
 static int x2apic_apic_id_registered(void)
@@ -107,7 +108,7 @@ static int x2apic_apic_id_registered(void)
 	return 1;
 }
 
-static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
 	int cpu;
 
@@ -115,7 +116,7 @@ static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(*cpumask);
+	cpu = cpumask_first(cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 22596ec94c8..3984682cd84 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -75,15 +75,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
 /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
 
-static const cpumask_t *uv_target_cpus(void)
+static const struct cpumask *uv_target_cpus(void)
 {
-	return &cpumask_of_cpu(0);
+	return cpumask_of(0);
 }
 
-static void uv_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
-	cpus_clear(*retmask);
-	cpu_set(cpu, *retmask);
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
 }
 
 int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -122,20 +122,20 @@ static void uv_send_IPI_one(int cpu, int vector)
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 }
 
-static void uv_send_IPI_mask(const cpumask_t *mask, int vector)
+static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
 {
 	unsigned int cpu;
 
-	for_each_cpu_mask_nr(cpu, *mask)
+	for_each_cpu(cpu, mask)
 		uv_send_IPI_one(cpu, vector);
 }
 
-static void uv_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
 {
 	unsigned int cpu;
 	unsigned int this_cpu = smp_processor_id();
 
-	for_each_cpu_mask_nr(cpu, *mask)
+	for_each_cpu(cpu, mask)
 		if (cpu != this_cpu)
 			uv_send_IPI_one(cpu, vector);
 }
@@ -152,7 +152,7 @@ static void uv_send_IPI_allbutself(int vector)
 
 static void uv_send_IPI_all(int vector)
 {
-	uv_send_IPI_mask(&cpu_online_map, vector);
+	uv_send_IPI_mask(cpu_online_mask, vector);
 }
 
 static int uv_apic_id_registered(void)
@@ -164,7 +164,7 @@ static void uv_init_apic_ldr(void)
 {
 }
 
-static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
 {
 	int cpu;
 
@@ -172,7 +172,7 @@ static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask)
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = first_cpu(*cpumask);
+	cpu = cpumask_first(cpumask);
 	if ((unsigned)cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index 86aa50fc65a..285bbf8831f 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
 /*
  * This is only used on smaller machines.
  */
-void send_IPI_mask_bitmask(const cpumask_t *cpumask, int vector)
+void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
 {
-	unsigned long mask = cpus_addr(*cpumask)[0];
+	unsigned long mask = cpumask_bits(cpumask)[0];
 	unsigned long flags;
 
 	local_irq_save(flags);
-	WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+	WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
 	__send_IPI_dest_field(mask, vector);
 	local_irq_restore(flags);
 }
 
-void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
 {
 	unsigned long flags;
 	unsigned int query_cpu;
@@ -139,12 +139,12 @@ void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
 	 */
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask)
+	for_each_cpu(query_cpu, mask)
 		__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
 	local_irq_restore(flags);
 }
 
-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
 {
 	unsigned long flags;
 	unsigned int query_cpu;
@@ -153,7 +153,7 @@ void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
 	/* See Hack comment above */
 
 	local_irq_save(flags);
-	for_each_cpu_mask_nr(query_cpu, *mask)
+	for_each_cpu(query_cpu, mask)
 		if (query_cpu != this_cpu)
 			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
 					      vector);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 341df946f9a..49ed667b06f 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -118,15 +118,15 @@ static void native_smp_send_reschedule(int cpu)
 		WARN_ON(1);
 		return;
 	}
-	send_IPI_mask(&cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+	send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
 }
 
 void native_send_call_func_single_ipi(int cpu)
 {
-	send_IPI_mask(&cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+	send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-void native_send_call_func_ipi(const cpumask_t *mask)
+void native_send_call_func_ipi(const struct cpumask *mask)
 {
 	cpumask_t allbutself;
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index b3a95868839..c44e2069c7c 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -411,22 +411,23 @@ static void xen_smp_send_reschedule(int cpu)
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
 
-static void xen_send_IPI_mask(const cpumask_t *mask, enum ipi_vector vector)
+static void xen_send_IPI_mask(const struct cpumask *mask,
+			      enum ipi_vector vector)
 {
 	unsigned cpu;
 
-	for_each_cpu_and(cpu, mask, &cpu_online_map)
+	for_each_cpu_and(cpu, mask, cpu_online_mask)
 		xen_send_IPI_one(cpu, vector);
 }
 
-static void xen_smp_send_call_function_ipi(const cpumask_t *mask)
+static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
 {
 	int cpu;
 
 	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 
 	/* Make sure other vcpus get a chance to run if they need to. */
-	for_each_cpu_mask_nr(cpu, *mask) {
+	for_each_cpu(cpu, mask) {
 		if (xen_vcpu_stolen(cpu)) {
 			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
 			break;
@@ -436,7 +437,7 @@ static void xen_smp_send_call_function_ipi(const cpumask_t *mask)
 
 static void xen_smp_send_call_function_single_ipi(int cpu)
 {
-	xen_send_IPI_mask(&cpumask_of_cpu(cpu),
+	xen_send_IPI_mask(cpumask_of(cpu),
 			  XEN_CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-- 
cgit v1.2.3


From 78637a97b7fe1df51f40a460448df0b93d511176 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:00 -0800
Subject: x86: Set CONFIG_NR_CPUS even on UP

Impact: cleanup

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/Kconfig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1fd44352f27..4a3f5851ec6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -599,12 +599,12 @@ config MAXSMP
 	  If unsure, say N.
 
 config NR_CPUS
-	depends on SMP
 	int "Maximum number of CPUs" if SMP && !MAXSMP
 	range 2 512 if SMP && !MAXSMP
+	default "1" if !SMP
 	default "4096" if MAXSMP
-	default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
-	default "8"
+	default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000)
+	default "8" if SMP
 	help
 	  This allows you to specify the maximum number of CPUs which this
 	  kernel will support.  The maximum supported value is 512 and the
-- 
cgit v1.2.3


From 168ef543a43678146e06b3911e987ac021d575b8 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:01 -0800
Subject: x86: prepare for cpumask iterators to only go to nr_cpu_ids

Impact: cleanup, futureproof

In fact, all cpumask ops will only be valid (in general) for bit
numbers < nr_cpu_ids.  So use that instead of NR_CPUS in various
places.

This is always safe: no cpu number can be >= nr_cpu_ids, and
nr_cpu_ids is initialized to NR_CPUS at boot.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c              | 2 +-
 arch/x86/mach-voyager/voyager_smp.c | 2 +-
 arch/x86/mm/numa_64.c               | 4 ++--
 arch/x86/mm/srat_64.c               | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index a375791c08c..3b630ec2493 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -2106,7 +2106,7 @@ __cpuinit int apic_is_clustered_box(void)
 	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
 	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		/* are we being called early in kernel startup? */
 		if (bios_cpu_apicid) {
 			id = bios_cpu_apicid[i];
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 9c990185e9f..a5bc05492b1 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -672,7 +672,7 @@ void __init smp_boot_cpus(void)
 
 	/* loop over all the extended VIC CPUs and boot them.  The
 	 * Quad CPUs must be bootstrapped by their extended VIC cpu */
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
 			continue;
 		do_boot_cpu(i);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index cebcbf152d4..71a14f89f89 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -278,7 +278,7 @@ void __init numa_init_array(void)
 	int rr, i;
 
 	rr = first_node(node_online_map);
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		if (early_cpu_to_node(i) != NUMA_NO_NODE)
 			continue;
 		numa_set_node(i, rr);
@@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
 	memnodemap[0] = 0;
 	node_set_online(0);
 	node_set(0, node_possible_map);
-	for (i = 0; i < NR_CPUS; i++)
+	for (i = 0; i < nr_cpu_ids; i++)
 		numa_set_node(i, 0);
 	e820_register_active_regions(0, start_pfn, last_pfn);
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 51c0a2fc14f..09737c8af07 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 		if (!node_online(i))
 			setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++) {
 		int node = early_cpu_to_node(i);
 
 		if (node == NUMA_NO_NODE)
-- 
cgit v1.2.3


From 1de88cd4a33fcc2fcf70cbce01688723f728675d Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:02 -0800
Subject: x86: Use cpumask accessors code for possible/present maps.

Impact: use new API

Use the accessors rather than frobbing bits directly.  Most of this is
in arch code I haven't even compiled, but is straightforward.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/apic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 3b630ec2493..edda4c00e3d 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1903,8 +1903,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	}
 #endif
 
-	cpu_set(cpu, cpu_possible_map);
-	cpu_set(cpu, cpu_present_map);
+	set_cpu_possible(cpu, true);
+	set_cpu_present(cpu, true);
 }
 
 #ifdef CONFIG_X86_64
-- 
cgit v1.2.3


From b2bb85549134c005e997e5a7ed303bda6a1ae738 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:03 -0800
Subject: x86: Remove cpumask games in x86/kernel/cpu/intel_cacheinfo.c

Impact: remove cpumask_t from stack.

We should not try to save and restore cpus_allowed on current.

We can't use work_on_cpu() here, since it's in the hotplug cpu path
(if anyone else tries to get the hotplug lock from a workqueue we
could deadlock against them).

Fortunately, we can just use smp_call_function_single() since the
function can run from an interrupt.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
---
 arch/x86/kernel/cpu/intel_cacheinfo.c | 41 ++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 22 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 43ea612d3e9..fb7f946cb65 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
 	per_cpu(cpuid4_info, cpu) = NULL;
 }
 
-static int __cpuinit detect_cache_attributes(unsigned int cpu)
+static void get_cpu_leaves(void *_retval)
 {
-	struct _cpuid4_info	*this_leaf;
-	unsigned long		j;
-	int			retval;
-	cpumask_t		oldmask;
-
-	if (num_cache_leaves == 0)
-		return -ENOENT;
-
-	per_cpu(cpuid4_info, cpu) = kzalloc(
-	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
-	if (per_cpu(cpuid4_info, cpu) == NULL)
-		return -ENOMEM;
-
-	oldmask = current->cpus_allowed;
-	retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-	if (retval)
-		goto out;
+	int j, *retval = _retval, cpu = smp_processor_id();
 
 	/* Do cpuid and store the results */
 	for (j = 0; j < num_cache_leaves; j++) {
+		struct _cpuid4_info *this_leaf;
 		this_leaf = CPUID4_INFO_IDX(cpu, j);
-		retval = cpuid4_cache_lookup(j, this_leaf);
-		if (unlikely(retval < 0)) {
+		*retval = cpuid4_cache_lookup(j, this_leaf);
+		if (unlikely(*retval < 0)) {
 			int i;
 
 			for (i = 0; i < j; i++)
@@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
 		}
 		cache_shared_cpu_map_setup(cpu, j);
 	}
-	set_cpus_allowed_ptr(current, &oldmask);
+}
+
+static int __cpuinit detect_cache_attributes(unsigned int cpu)
+{
+	int			retval;
+
+	if (num_cache_leaves == 0)
+		return -ENOENT;
+
+	per_cpu(cpuid4_info, cpu) = kzalloc(
+	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
+	if (per_cpu(cpuid4_info, cpu) == NULL)
+		return -ENOMEM;
 
-out:
+	smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
 	if (retval) {
 		kfree(per_cpu(cpuid4_info, cpu));
 		per_cpu(cpuid4_info, cpu) = NULL;
-- 
cgit v1.2.3


From 4cd4601d592d07b26e4b7d2bb8fcd55bbfd6cf6e Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:04 -0800
Subject: x86: use work_on_cpu in x86/kernel/cpu/mcheck/mce_amd_64.c

Impact: Remove cpumask_t's from stack.

Simple transition to work_on_cpu(), rather than cpumask games.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Robert Richter <robert.richter@amd.com>
Cc: jacob.shin@amd.com
---
 arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 108 ++++++++++++++++----------------
 1 file changed, 55 insertions(+), 53 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 5eb390a4b2e..a1de80f368f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
  * CPU Initialization
  */
 
+struct thresh_restart {
+	struct threshold_block *b;
+	int reset;
+	u16 old_limit;
+};
+
 /* must be called with correct cpu affinity */
-static void threshold_restart_bank(struct threshold_block *b,
-				   int reset, u16 old_limit)
+static long threshold_restart_bank(void *_tr)
 {
+	struct thresh_restart *tr = _tr;
 	u32 mci_misc_hi, mci_misc_lo;
 
-	rdmsr(b->address, mci_misc_lo, mci_misc_hi);
+	rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
 
-	if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
-		reset = 1;	/* limit cannot be lower than err count */
+	if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+		tr->reset = 1;	/* limit cannot be lower than err count */
 
-	if (reset) {		/* reset err count and overflow bit */
+	if (tr->reset) {		/* reset err count and overflow bit */
 		mci_misc_hi =
 		    (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
-		    (THRESHOLD_MAX - b->threshold_limit);
-	} else if (old_limit) {	/* change limit w/o reset */
+		    (THRESHOLD_MAX - tr->b->threshold_limit);
+	} else if (tr->old_limit) {	/* change limit w/o reset */
 		int new_count = (mci_misc_hi & THRESHOLD_MAX) +
-		    (old_limit - b->threshold_limit);
+		    (tr->old_limit - tr->b->threshold_limit);
 		mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
 		    (new_count & THRESHOLD_MAX);
 	}
 
-	b->interrupt_enable ?
+	tr->b->interrupt_enable ?
 	    (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
 	    (mci_misc_hi &= ~MASK_INT_TYPE_HI);
 
 	mci_misc_hi |= MASK_COUNT_EN_HI;
-	wrmsr(b->address, mci_misc_lo, mci_misc_hi);
+	wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+	return 0;
 }
 
 /* cpu init entry point, called from mce.c with preempt off */
@@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
 	unsigned int cpu = smp_processor_id();
 	u8 lvt_off;
 	u32 low = 0, high = 0, address = 0;
+	struct thresh_restart tr;
 
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
@@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
 			wrmsr(address, low, high);
 
 			threshold_defaults.address = address;
-			threshold_restart_bank(&threshold_defaults, 0, 0);
+			tr.b = &threshold_defaults;
+			tr.reset = 0;
+			tr.old_limit = 0;
+			threshold_restart_bank(&tr);
 		}
 	}
 }
@@ -251,20 +262,6 @@ struct threshold_attr {
 	ssize_t(*store) (struct threshold_block *, const char *, size_t count);
 };
 
-static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
-					   cpumask_t *newmask)
-{
-	*oldmask = current->cpus_allowed;
-	cpus_clear(*newmask);
-	cpu_set(cpu, *newmask);
-	set_cpus_allowed_ptr(current, newmask);
-}
-
-static void affinity_restore(const cpumask_t *oldmask)
-{
-	set_cpus_allowed_ptr(current, oldmask);
-}
-
 #define SHOW_FIELDS(name)                                           \
 static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
 {                                                                   \
@@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
 				      const char *buf, size_t count)
 {
 	char *end;
-	cpumask_t oldmask, newmask;
+	struct thresh_restart tr;
 	unsigned long new = simple_strtoul(buf, &end, 0);
 	if (end == buf)
 		return -EINVAL;
 	b->interrupt_enable = !!new;
 
-	affinity_set(b->cpu, &oldmask, &newmask);
-	threshold_restart_bank(b, 0, 0);
-	affinity_restore(&oldmask);
+	tr.b = b;
+	tr.reset = 0;
+	tr.old_limit = 0;
+	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
 
 	return end - buf;
 }
@@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
 				     const char *buf, size_t count)
 {
 	char *end;
-	cpumask_t oldmask, newmask;
-	u16 old;
+	struct thresh_restart tr;
 	unsigned long new = simple_strtoul(buf, &end, 0);
 	if (end == buf)
 		return -EINVAL;
@@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
 		new = THRESHOLD_MAX;
 	if (new < 1)
 		new = 1;
-	old = b->threshold_limit;
+	tr.old_limit = b->threshold_limit;
 	b->threshold_limit = new;
+	tr.b = b;
+	tr.reset = 0;
 
-	affinity_set(b->cpu, &oldmask, &newmask);
-	threshold_restart_bank(b, 0, old);
-	affinity_restore(&oldmask);
+	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
 
 	return end - buf;
 }
 
-static ssize_t show_error_count(struct threshold_block *b, char *buf)
+static long local_error_count(void *_b)
 {
-	u32 high, low;
-	cpumask_t oldmask, newmask;
-	affinity_set(b->cpu, &oldmask, &newmask);
+	struct threshold_block *b = _b;
+	u32 low, high;
+
 	rdmsr(b->address, low, high);
-	affinity_restore(&oldmask);
-	return sprintf(buf, "%x\n",
-		       (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
+	return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
+}
+
+static ssize_t show_error_count(struct threshold_block *b, char *buf)
+{
+	return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
 }
 
 static ssize_t store_error_count(struct threshold_block *b,
 				 const char *buf, size_t count)
 {
-	cpumask_t oldmask, newmask;
-	affinity_set(b->cpu, &oldmask, &newmask);
-	threshold_restart_bank(b, 1, 0);
-	affinity_restore(&oldmask);
+	struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
+
+	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
 	return 1;
 }
 
@@ -463,12 +462,19 @@ out_free:
 	return err;
 }
 
+static long local_allocate_threshold_blocks(void *_bank)
+{
+	unsigned int *bank = _bank;
+
+	return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
+					 MSR_IA32_MC0_MISC + *bank * 4);
+}
+
 /* symlinks sibling shared banks to first core.  first core owns dir/files. */
 static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 {
 	int i, err = 0;
 	struct threshold_bank *b = NULL;
-	cpumask_t oldmask, newmask;
 	char name[32];
 
 	sprintf(name, "threshold_bank%i", bank);
@@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 
 	per_cpu(threshold_banks, cpu)[bank] = b;
 
-	affinity_set(cpu, &oldmask, &newmask);
-	err = allocate_threshold_blocks(cpu, bank, 0,
-					MSR_IA32_MC0_MISC + bank * 4);
-	affinity_restore(&oldmask);
-
+	err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
 	if (err)
 		goto out_free;
 
-- 
cgit v1.2.3


From e4d98207ea3f3d15eb664282df16d18c4ac86f80 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:05 -0800
Subject: x86: xen: use smp_call_function_many()

Impact: use new API, remove cpumask from stack.

Change smp_call_function_mask() callers to smp_call_function_many().

This removes a cpumask from the stack, and falls back should allocating
the cpumask var fail (only possible with CONFIG_CPUMASKS_OFFSTACK).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: jeremy@xensource.com
---
 arch/x86/xen/mmu.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 636ef4caa52..e59e53b11e2 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1079,7 +1079,7 @@ static void drop_other_mm_ref(void *info)
 
 static void xen_drop_mm_ref(struct mm_struct *mm)
 {
-	cpumask_t mask;
+	cpumask_var_t mask;
 	unsigned cpu;
 
 	if (current->active_mm == mm) {
@@ -1091,7 +1091,16 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
 	}
 
 	/* Get the "official" set of cpus referring to our pagetable. */
-	mask = mm->cpu_vm_mask;
+	if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
+		for_each_online_cpu(cpu) {
+			if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
+			    && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
+				continue;
+			smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
+		}
+		return;
+	}
+	cpumask_copy(mask, &mm->cpu_vm_mask);
 
 	/* It's possible that a vcpu may have a stale reference to our
 	   cr3, because its in lazy mode, and it hasn't yet flushed
@@ -1100,11 +1109,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
 	   if needed. */
 	for_each_online_cpu(cpu) {
 		if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
-			cpu_set(cpu, mask);
+			cpumask_set_cpu(cpu, mask);
 	}
 
-	if (!cpus_empty(mask))
-		smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
+	if (!cpumask_empty(mask))
+		smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
+	free_cpumask_var(mask);
 }
 #else
 static void xen_drop_mm_ref(struct mm_struct *mm)
-- 
cgit v1.2.3


From 83b19597f793fd5f91533bda0dc2eb3d21936798 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 16 Dec 2008 17:34:06 -0800
Subject: x86: Introduce topology_core_cpumask()/topology_thread_cpumask()

Impact: new API

The old topology_core_siblings() and topology_thread_siblings() return
a cpumask_t; these new ones return a (const) struct cpumask *.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/include/asm/topology.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index ff386ff50ed..79e31e9dcdd 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -226,6 +226,8 @@ extern cpumask_t cpu_coregroup_map(int cpu);
 #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 #define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu)		(&per_cpu(cpu_core_map, cpu))
+#define topology_thread_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
 
 /* indicates that pointers to the topology cpumask_t maps are valid */
 #define arch_provides_topology_pointers		yes
-- 
cgit v1.2.3


From d733e00d7c10cc68333fbb88108bb15bb044f61b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Dec 2008 13:35:51 +0100
Subject: x86: update io_apic.c to the new cpumask code

Impact: build fix

The sparseirq tree crossed with the cpumask changes, fix the fallout.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 6bd51ce3ce3..58938cc4b7d 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -347,13 +347,14 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
 	}
 }
 
-static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+static void
+set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg = desc->chip_data;
 
 	if (!cfg->move_in_progress) {
 		/* it means that domain is not changed */
-		if (!cpus_intersects(desc->affinity, mask))
+		if (!cpumask_intersects(&desc->affinity, mask))
 			cfg->move_desc_pending = 1;
 	}
 }
-- 
cgit v1.2.3


From 8ce7996009bab7b2d23e7af7ad831fed7eb6faa1 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 16 Dec 2008 12:17:35 -0800
Subject: x86: add swiotlb allocation functions

Add x86-specific swiotlb allocation functions.  These are purely
default for the moment.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-swiotlb_64.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 3c539d111ab..f47a097a135 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -3,6 +3,8 @@
 #include <linux/pci.h>
 #include <linux/cache.h>
 #include <linux/module.h>
+#include <linux/swiotlb.h>
+#include <linux/bootmem.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/iommu.h>
@@ -11,6 +13,16 @@
 
 int swiotlb __read_mostly;
 
+void *swiotlb_alloc_boot(size_t size, unsigned long nslabs)
+{
+	return alloc_bootmem_low_pages(size);
+}
+
+void *swiotlb_alloc(unsigned order, unsigned long nslabs)
+{
+	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
+}
+
 static dma_addr_t
 swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
 			int direction)
-- 
cgit v1.2.3


From cfb80c9eae8c7ed8f2ee81090062d15ead51cbe8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 16 Dec 2008 12:17:36 -0800
Subject: x86: unify pci iommu setup and allow swiotlb to compile for 32 bit

swiotlb on 32 bit will be used by Xen domain 0 support.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/dma-mapping.h | 2 +-
 arch/x86/include/asm/pci.h         | 2 ++
 arch/x86/include/asm/pci_64.h      | 1 -
 arch/x86/kernel/Makefile           | 3 ++-
 arch/x86/kernel/pci-dma.c          | 6 ++++--
 arch/x86/kernel/pci-swiotlb_64.c   | 2 ++
 arch/x86/mm/init_32.c              | 3 +++
 7 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 097794ff6b7..3b43a65894c 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -65,7 +65,7 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
 		return dma_ops;
 	else
 		return dev->archdata.dma_ops;
-#endif /* _ASM_X86_DMA_MAPPING_H */
+#endif
 }
 
 /* Make sure we keep the same behaviour */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 875b38edf19..50ac542c938 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -82,6 +82,8 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 static inline void early_quirks(void) { }
 #endif
 
+extern void pci_iommu_alloc(void);
+
 #endif  /* __KERNEL__ */
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
index d02d936840a..4da20798277 100644
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -23,7 +23,6 @@ extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
 			       int reg, int len, u32 value);
 
 extern void dma32_reserve_bootmem(void);
-extern void pci_iommu_alloc(void);
 
 /* The PCI address space does equal the physical memory
  * address space.  The networking and block device layers use
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b62a7667828..a9c656f2d66 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -105,6 +105,8 @@ microcode-$(CONFIG_MICROCODE_INTEL)	+= microcode_intel.o
 microcode-$(CONFIG_MICROCODE_AMD)	+= microcode_amd.o
 obj-$(CONFIG_MICROCODE)			+= microcode.o
 
+obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb_64.o # NB rename without _64
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
@@ -118,7 +120,6 @@ ifeq ($(CONFIG_X86_64),y)
         obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
         obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o
         obj-$(CONFIG_AMD_IOMMU)		+= amd_iommu_init.o amd_iommu.o
-        obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb_64.o
 
         obj-$(CONFIG_PCI_MMCONFIG)	+= mmconf-fam10h_64.o
 endif
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index e150ad4f0cc..00e07447a5b 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -105,11 +105,15 @@ static void __init dma32_free_bootmem(void)
 	dma32_bootmem_ptr = NULL;
 	dma32_bootmem_size = 0;
 }
+#endif
 
 void __init pci_iommu_alloc(void)
 {
+#ifdef CONFIG_X86_64
 	/* free the range so iommu could get some range less than 4G */
 	dma32_free_bootmem();
+#endif
+
 	/*
 	 * The order of these functions is important for
 	 * fall-back/fail-over reasons
@@ -125,8 +129,6 @@ void __init pci_iommu_alloc(void)
 	pci_swiotlb_init();
 }
 
-#endif
-
 void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 				 dma_addr_t *dma_addr, gfp_t flag)
 {
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index f47a097a135..a991afea670 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -62,8 +62,10 @@ struct dma_mapping_ops swiotlb_dma_ops = {
 void __init pci_swiotlb_init(void)
 {
 	/* don't initialize swiotlb if iommu=off (no_iommu=1) */
+#ifdef CONFIG_X86_64
 	if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)
 	       swiotlb = 1;
+#endif
 	if (swiotlb_force)
 		swiotlb = 1;
 	if (swiotlb) {
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c483f424207..2b4b14fc0c0 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/pci.h>
 #include <linux/pfn.h>
 #include <linux/poison.h>
 #include <linux/bootmem.h>
@@ -971,6 +972,8 @@ void __init mem_init(void)
 
 	start_periodic_check_for_corruption();
 
+	pci_iommu_alloc();
+
 #ifdef CONFIG_FLATMEM
 	BUG_ON(!mem_map);
 #endif
-- 
cgit v1.2.3


From 1d32251e846ccbcf9d2da041dffd1199f94b2a3b Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 16 Dec 2008 12:17:37 -0800
Subject: x86/swiotlb: add default phys<->bus conversion

Xen will override these later on.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-swiotlb_64.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index a991afea670..93a8371f2c2 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -23,6 +23,16 @@ void *swiotlb_alloc(unsigned order, unsigned long nslabs)
 	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
 }
 
+dma_addr_t swiotlb_phys_to_bus(phys_addr_t paddr)
+{
+	return paddr;
+}
+
+phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
+{
+	return baddr;
+}
+
 static dma_addr_t
 swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
 			int direction)
-- 
cgit v1.2.3


From a08636690d06b2e36cfb4c2b3ee133a81c47e1e0 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 16 Dec 2008 12:17:38 -0800
Subject: x86/swiotlb: add default swiotlb_arch_range_needs_mapping

Xen will override these later on.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-swiotlb_64.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 93a8371f2c2..242c3440687 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -33,6 +33,11 @@ phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
 	return baddr;
 }
 
+int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
+{
+	return 0;
+}
+
 static dma_addr_t
 swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
 			int direction)
-- 
cgit v1.2.3


From a775a38b1353161a6d7af86b667d6523c12c1a37 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 17 Dec 2008 15:21:39 -0800
Subject: x86: fix cpu_mask_to_apicid_and to include cpu_online_mask

Impact: fix potential APIC crash

In determining the destination apicid, there are usually three cpumasks
that are considered: the incoming cpumask arg, cfg->domain and the
cpu_online_mask.  Since we are just introducing the cpu_mask_to_apicid_and
function, make sure it includes the cpu_online_mask in it's evaluation.
[Added with this patch.]

There are two io_apic.c functions that did not previously use the
cpu_online_mask:  setup_IO_APIC_irq and msi_compose_msg.  Both of these
simply used cpu_mask_to_apicid(cfg->domain & TARGET_CPUS), and all but
one arch (NUMAQ[*]) returns only online cpus in the TARGET_CPUS mask,
so the behavior is identical for all cases.

[*: NUMAQ bug?]

Note that alloc_cpumask_var is only used for the 32-bit cases where
it's highly likely that the cpumask set size will be small and therefore
CPUMASK_OFFSTACK=n.  But if that's not the case, failing the allocate
will cause the same return value as the default.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/bigsmp/apic.h            |  4 ++-
 arch/x86/include/asm/es7000/apic.h            | 40 ++++++++++++---------------
 arch/x86/include/asm/mach-default/mach_apic.h |  3 +-
 arch/x86/include/asm/summit/apic.h            | 30 +++++++++++---------
 arch/x86/kernel/genapic_flat_64.c             |  4 ++-
 arch/x86/kernel/genx2apic_cluster.c           |  4 ++-
 arch/x86/kernel/genx2apic_phys.c              |  4 ++-
 arch/x86/kernel/genx2apic_uv_x.c              |  4 ++-
 8 files changed, 52 insertions(+), 41 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index 976399debb3..d8dd9f53791 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -138,7 +138,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_any_and(cpumask, andmask);
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
 	if (cpu < nr_cpu_ids)
 		return cpu_to_logical_apicid(cpu);
 
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index ba8423c5363..51ac1230294 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -214,51 +214,47 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
 						  const struct cpumask *andmask)
 {
 	int num_bits_set;
-	int num_bits_set2;
 	int cpus_found = 0;
 	int cpu;
-	int apicid = 0;
+	int apicid = cpu_to_logical_apicid(0);
+	cpumask_var_t cpumask;
+
+	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+		return apicid;
+
+	cpumask_and(cpumask, inmask, andmask);
+	cpumask_and(cpumask, cpumask, cpu_online_mask);
 
 	num_bits_set = cpumask_weight(cpumask);
-	num_bits_set2 = cpumask_weight(andmask);
-	num_bits_set = min(num_bits_set, num_bits_set2);
 	/* Return id to all */
-	if (num_bits_set >= nr_cpu_ids)
-#if defined CONFIG_ES7000_CLUSTERED_APIC
-		return 0xFF;
-#else
-		return cpu_to_logical_apicid(0);
-#endif
+	if (num_bits_set == NR_CPUS)
+		goto exit;
 	/*
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = cpumask_first_and(cpumask, andmask);
+	cpu = cpumask_first(cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
-
 	while (cpus_found < num_bits_set) {
-		if (cpumask_test_cpu(cpu, cpumask) &&
-		    cpumask_test_cpu(cpu, andmask)) {
+		if (cpumask_test_cpu(cpu, cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
-					apicid_cluster(new_apicid)) {
-				printk(KERN_WARNING
-					"%s: Not a valid mask!\n", __func__);
-#if defined CONFIG_ES7000_CLUSTERED_APIC
-				return 0xFF;
-#else
+					apicid_cluster(new_apicid)){
+				printk ("%s: Not a valid mask!\n", __func__);
 				return cpu_to_logical_apicid(0);
-#endif
 			}
 			apicid = new_apicid;
 			cpus_found++;
 		}
 		cpu++;
 	}
+exit:
+	free_cpumask_var(cpumask);
 	return apicid;
 }
 
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index 8863d978cb9..cc09cbbee27 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -72,8 +72,9 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 {
 	unsigned long mask1 = cpumask_bits(cpumask)[0];
 	unsigned long mask2 = cpumask_bits(andmask)[0];
+	unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
 
-	return (unsigned int)(mask1 & mask2);
+	return (unsigned int)(mask1 & mask2 & mask3);
 }
 
 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 651a9384934..99327d1be49 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -170,35 +170,37 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 	return apicid;
 }
 
-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
 						  const struct cpumask *andmask)
 {
 	int num_bits_set;
-	int num_bits_set2;
 	int cpus_found = 0;
 	int cpu;
-	int apicid = 0;
+	int apicid = 0xFF;
+	cpumask_var_t cpumask;
+
+	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+		return (int) 0xFF;
+
+	cpumask_and(cpumask, inmask, andmask);
+	cpumask_and(cpumask, cpumask, cpu_online_mask);
 
 	num_bits_set = cpumask_weight(cpumask);
-	num_bits_set2 = cpumask_weight(andmask);
-	num_bits_set = min(num_bits_set, num_bits_set2);
 	/* Return id to all */
-	if (num_bits_set >= nr_cpu_ids)
-		return 0xFF;
+	if (num_bits_set == nr_cpu_ids)
+		goto exit;
 	/*
 	 * The cpus in the mask must all be on the apic cluster.  If are not
 	 * on the same apicid cluster return default value of TARGET_CPUS.
 	 */
-	cpu = cpumask_first_and(cpumask, andmask);
+	cpu = cpumask_first(cpumask);
 	apicid = cpu_to_logical_apicid(cpu);
 	while (cpus_found < num_bits_set) {
-		if (cpumask_test_cpu(cpu, cpumask)
-		    && cpumask_test_cpu(cpu, andmask)) {
+		if (cpumask_test_cpu(cpu, cpumask)) {
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
-					apicid_cluster(new_apicid)) {
-				printk(KERN_WARNING
-					"%s: Not a valid mask!\n", __func__);
+					apicid_cluster(new_apicid)){
+				printk ("%s: Not a valid mask!\n", __func__);
 				return 0xFF;
 			}
 			apicid = apicid | new_apicid;
@@ -206,6 +208,8 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 		}
 		cpu++;
 	}
+exit:
+	free_cpumask_var(cpumask);
 	return apicid;
 }
 
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 7fa5f49c2dd..34185488e4f 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -276,7 +276,9 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_any_and(cpumask, andmask);
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
 	if (cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index 4716a0c9f93..d451c9b9fdf 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -133,7 +133,9 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_any_and(cpumask, andmask);
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
 	if (cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index b255507884f..62895cf315f 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -132,7 +132,9 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_any_and(cpumask, andmask);
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
 	if (cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 3984682cd84..0e88be11227 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -188,7 +188,9 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
 	 * May as well be the first.
 	 */
-	cpu = cpumask_any_and(cpumask, andmask);
+	for_each_cpu_and(cpu, cpumask, andmask)
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
 	if (cpu < nr_cpu_ids)
 		return per_cpu(x86_cpu_to_apicid, cpu);
 	return BAD_APICID;
-- 
cgit v1.2.3


From 3b11ce7f542e415c90267b4482d4611410b468e6 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 17 Dec 2008 15:21:39 -0800
Subject: x86: use possible_cpus=NUM to extend the possible cpus allowed

Impact: add new boot parameter

Use possible_cpus=NUM kernel parameter to extend the number of possible
cpus.

The ability to HOTPLUG ON cpus that are "possible" but not "present" is
dealt with in a later patch.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/apic.c    | 20 ++++++++++++--------
 arch/x86/kernel/smpboot.c | 25 +++++++++++++++++++++----
 2 files changed, 33 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 93cf2d13f33..f7a32a3beb2 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1819,28 +1819,32 @@ void disconnect_bsp_APIC(int virt_wire_setup)
 void __cpuinit generic_processor_info(int apicid, int version)
 {
 	int cpu;
-	cpumask_t tmp_map;
 
 	/*
 	 * Validate version
 	 */
 	if (version == 0x0) {
 		pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
-			"fixing up to 0x10. (tell your hw vendor)\n",
-			version);
+			   "fixing up to 0x10. (tell your hw vendor)\n",
+				version);
 		version = 0x10;
 	}
 	apic_version[apicid] = version;
 
-	if (num_processors >= NR_CPUS) {
-		pr_warning("WARNING: NR_CPUS limit of %i reached."
-			"  Processor ignored.\n", NR_CPUS);
+	if (num_processors >= nr_cpu_ids) {
+		int max = nr_cpu_ids;
+		int thiscpu = max + disabled_cpus;
+
+		pr_warning(
+			"ACPI: NR_CPUS/possible_cpus limit of %i reached."
+			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
+
+		disabled_cpus++;
 		return;
 	}
 
 	num_processors++;
-	cpus_complement(tmp_map, cpu_present_map);
-	cpu = first_cpu(tmp_map);
+	cpu = cpumask_next_zero(-1, cpu_present_mask);
 
 	physid_set(apicid, phys_cpu_present_map);
 	if (apicid == boot_cpu_physical_apicid) {
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index be946678804..1a9941b1115 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1252,6 +1252,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 	check_nmi_watchdog();
 }
 
+static int __initdata setup_possible_cpus = -1;
+static int __init _setup_possible_cpus(char *str)
+{
+	get_option(&str, &setup_possible_cpus);
+	return 0;
+}
+early_param("possible_cpus", _setup_possible_cpus);
+
+
 /*
  * cpu_possible_map should be static, it cannot change as cpu's
  * are onlined, or offlined. The reason is per-cpu data-structures
@@ -1264,7 +1273,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
  *
  * Three ways to find out the number of additional hotplug CPUs:
  * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with additional_cpus=NUM
+ * - The user can overwrite it with possible_cpus=NUM
  * - Otherwise don't reserve additional CPUs.
  * We do this because additional CPUs waste a lot of memory.
  * -AK
@@ -1277,9 +1286,17 @@ __init void prefill_possible_map(void)
 	if (!num_processors)
 		num_processors = 1;
 
-	possible = num_processors + disabled_cpus;
-	if (possible > NR_CPUS)
-		possible = NR_CPUS;
+	if (setup_possible_cpus == -1)
+		possible = num_processors + disabled_cpus;
+	else
+		possible = setup_possible_cpus;
+
+	if (possible > CONFIG_NR_CPUS) {
+		printk(KERN_WARNING
+			"%d Processors exceeds NR_CPUS limit of %d\n",
+			possible, CONFIG_NR_CPUS);
+		possible = CONFIG_NR_CPUS;
+	}
 
 	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
 		possible, max_t(int, possible - num_processors, 0));
-- 
cgit v1.2.3


From a7883dece6ef82097e6bdf19c1d0a20351e06056 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 19 Dec 2008 00:59:09 +0100
Subject: x86: fix warning in arch/x86/kernel/io_apic.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

this warning:

  arch/x86/kernel/io_apic.c: In function ‘ir_set_msi_irq_affinity’:
  arch/x86/kernel/io_apic.c:3373: warning: ‘cfg’ may be used uninitialized in this function

triggers because the variable was truly uninitialized. We'd crash on
entering this code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 58938cc4b7d..908c1d00a5c 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3360,7 +3360,7 @@ static void
 ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg;
+	struct irq_cfg *cfg = desc->chip_data;
 	unsigned int dest;
 	struct irte irte;
 
-- 
cgit v1.2.3


From b69edc76539be6a4aa39a22f85365fd4a3b3b9d2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 31 Oct 2008 01:02:41 +0100
Subject: x86 hibernate: Mark ACPI NVS memory region at startup

Introduce new initcall for marking the ACPI NVS memory at startup, so
that it can be saved/restored during hibernation/resume.

Based on a patch by Zhang Rui.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/e820.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7aafeb5263e..74c6a21fdc8 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -665,6 +665,27 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
 }
 #endif
 
+#ifdef CONFIG_HIBERNATION
+/**
+ * Mark ACPI NVS memory region, so that we can save/restore it during
+ * hibernation and the subsequent resume.
+ */
+static int __init e820_mark_nvs_memory(void)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+
+		if (ei->type == E820_NVS)
+			hibernate_nvs_register(ei->addr, ei->size);
+	}
+
+	return 0;
+}
+core_initcall(e820_mark_nvs_memory);
+#endif
+
 /*
  * Early reserved memory areas.
  */
-- 
cgit v1.2.3


From ba84ed9546e91348fdf3ff2bff859b0ee53b407a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 26 Oct 2008 20:56:30 +0100
Subject: ACPI hibernate: Introduce new kernel parameter acpi_sleep=s4_nonvs

On some machines it may be necessary to disable the saving/restoring
of the ACPI NVS memory region during hibernation/resume.  For this
purpose, introduce new ACPI kernel command line option
acpi_sleep=s4_nonvs.

Based on a patch by Zhang Rui.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Nigel Cunningham <nigel@tuxonice.net>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/sleep.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 806b4e9051b..707c1f6f95f 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -159,6 +159,8 @@ static int __init acpi_sleep_setup(char *str)
 #endif
 		if (strncmp(str, "old_ordering", 12) == 0)
 			acpi_old_suspend_ordering();
+		if (strncmp(str, "s4_nonvs", 8) == 0)
+			acpi_s4_no_nvs();
 		str = strchr(str, ',');
 		if (str != NULL)
 			str += strspn(str, ", \t");
-- 
cgit v1.2.3


From b909895739427874c089bc0e03dc119f99cab2dd Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 19 Dec 2008 13:48:34 -0800
Subject: sparseirq: fix numa_migrate_irq_desc dependency and comments

Impact: reduce kconfig variable scope and clean up

Bartlomiej pointed out that the config dependencies and comments are not right.

update it depend to NUMA, and fix some comments

Reported-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig          | 2 +-
 arch/x86/kernel/io_apic.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 60a008857a3..5c243826334 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -250,7 +250,7 @@ config SPARSE_IRQ
 
 config NUMA_MIGRATE_IRQ_DESC
 	bool "Move irq desc when changing irq smp_affinity"
-	depends on SPARSE_IRQ && SMP
+	depends on SPARSE_IRQ && NUMA
 	default n
 	help
 	  This enables moving irq_desc to cpu/node that irq will use handled.
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index bfe1245b1a3..a74887b416c 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2471,7 +2471,7 @@ static void irq_complete_move(struct irq_desc **descp)
 		if (likely(!cfg->move_desc_pending))
 			return;
 
-		/* domain is not change, but affinity is changed */
+		/* domain has not changed, but affinity did */
 		me = smp_processor_id();
 		if (cpu_isset(me, desc->affinity)) {
 			*descp = desc = move_irq_desc(desc, me);
-- 
cgit v1.2.3


From 7b37b5fd9ba32c0c5afc3537eed7e7466f2173e2 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 23 Dec 2008 01:47:42 -0500
Subject: ACPI: disable MPS when NO APIC-table found

When ACPI is asked to find an MADT (APIC table)
and fails, then ACPI expects to run in PIC mode.

However, if an MP Table is was found, IRQs will be
registered as if an IOAPIC is being used, even
though ACPI is configuring interrupt links links for PIC mode.

In this scenario, disable MPS so that IRQs
are registered in PIC mode, consistent with ACPI.

http://bugzilla.kernel.org/show_bug.cgi?id=12257

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/boot.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4c51a2f8fd3..de8ce79dd88 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1359,6 +1359,17 @@ static void __init acpi_process_madt(void)
 			       "Invalid BIOS MADT, disabling ACPI\n");
 			disable_acpi();
 		}
+	} else {
+		/*
+ 		 * ACPI found no MADT, and so ACPI wants UP PIC mode.
+ 		 * In the event an MPS table was found, forget it.
+ 		 * Boot with "acpi=off" to use MPS on such a system.
+ 		 */
+		if (smp_found_config) {
+			printk(KERN_WARNING PREFIX
+				"No APIC-table, disabling MPS\n");
+			smp_found_config = 0;
+		}
 	}
 #endif
 	return;
-- 
cgit v1.2.3


From b77b881f21b29aa7efa668fde69ee3dc0372ae3f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 19 Dec 2008 15:23:44 -0800
Subject: x86: fix lguest used_vectors breakage, -v2

Impact: fix lguest, clean up

32-bit lguest used used_vectors to record vectors, but that model of
allocating vectors changed and got broken, after we changed vector
allocation to a per_cpu array.

Try enable that for 64bit, and the array is used for all vectors that
are not managed by vector_irq per_cpu array.

Also kill system_vectors[], that is now a duplication of the
used_vectors bitmap.

[ merged in cpus4096 due to io_apic.c cpumask changes. ]
[ -v2, fix build failure ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/desc.h  | 10 ++++------
 arch/x86/include/asm/irq.h   |  1 +
 arch/x86/kernel/apic.c       |  2 --
 arch/x86/kernel/io_apic.c    |  9 +++------
 arch/x86/kernel/irqinit_32.c | 16 +++++++++++++++-
 arch/x86/kernel/irqinit_64.c | 13 +++++++++++++
 arch/x86/kernel/traps.c      | 12 +++++++-----
 7 files changed, 43 insertions(+), 20 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e6b82b17b07..dc27705f544 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -320,16 +320,14 @@ static inline void set_intr_gate(unsigned int n, void *addr)
 	_set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
 }
 
-#define SYS_VECTOR_FREE		0
-#define SYS_VECTOR_ALLOCED	1
-
 extern int first_system_vector;
-extern char system_vectors[];
+/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
+extern unsigned long used_vectors[];
 
 static inline void alloc_system_vector(int vector)
 {
-	if (system_vectors[vector] == SYS_VECTOR_FREE) {
-		system_vectors[vector] = SYS_VECTOR_ALLOCED;
+	if (!test_bit(vector, used_vectors)) {
+		set_bit(vector, used_vectors);
 		if (first_system_vector > vector)
 			first_system_vector = vector;
 	} else
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 8766d30fb74..4bb732e45a8 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -46,5 +46,6 @@ extern void native_init_IRQ(void);
 
 /* Interrupt vector management */
 extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
+extern int vector_used_by_percpu_irq(unsigned int vector);
 
 #endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index f7a32a3beb2..b9019271af6 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -118,8 +118,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
 int first_system_vector = 0xfe;
 
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-
 /*
  * Debug level, exported for io_apic.c
  */
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 908c1d00a5c..1cbf7c8d46e 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1326,13 +1326,10 @@ next:
 		}
 		if (unlikely(current_vector == vector))
 			continue;
-#ifdef CONFIG_X86_64
-		if (vector == IA32_SYSCALL_VECTOR)
-			goto next;
-#else
-		if (vector == SYSCALL_VECTOR)
+
+		if (test_bit(vector, used_vectors))
 			goto next;
-#endif
+
 		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 6a92f47c52e..61aa2a1004b 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -110,6 +110,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
 	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
 };
 
+int vector_used_by_percpu_irq(unsigned int vector)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (per_cpu(vector_irq, cpu)[vector] != -1)
+			return 1;
+	}
+
+	return 0;
+}
+
 /* Overridden in paravirt.c */
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
@@ -146,10 +158,12 @@ void __init native_init_IRQ(void)
 	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
 	/* IPI for single call function */
-	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
+	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+				 call_function_single_interrupt);
 
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 40c1e62ec78..1020919efe1 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -135,6 +135,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
 	[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
 };
 
+int vector_used_by_percpu_irq(unsigned int vector)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (per_cpu(vector_irq, cpu)[vector] != -1)
+			return 1;
+	}
+
+	return 0;
+}
+
 void __init init_ISA_irqs(void)
 {
 	int i;
@@ -187,6 +199,7 @@ static void __init smp_intr_init(void)
 
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 #endif
 }
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 04d242ab016..4a6dff39a47 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -72,9 +72,6 @@
 
 #include "cpu/mcheck/mce.h"
 
-DECLARE_BITMAP(used_vectors, NR_VECTORS);
-EXPORT_SYMBOL_GPL(used_vectors);
-
 asmlinkage int system_call(void);
 
 /* Do we ignore FPU interrupts ? */
@@ -89,6 +86,9 @@ gate_desc idt_table[256]
 	__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
 #endif
 
+DECLARE_BITMAP(used_vectors, NR_VECTORS);
+EXPORT_SYMBOL_GPL(used_vectors);
+
 static int ignore_nmis;
 
 static inline void conditional_sti(struct pt_regs *regs)
@@ -949,9 +949,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 
 void __init trap_init(void)
 {
-#ifdef CONFIG_X86_32
 	int i;
-#endif
 
 #ifdef CONFIG_EISA
 	void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@ -1008,11 +1006,15 @@ void __init trap_init(void)
 	}
 
 	set_system_trap_gate(SYSCALL_VECTOR, &system_call);
+#endif
 
 	/* Reserve all the builtin and the syscall vector: */
 	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
 		set_bit(i, used_vectors);
 
+#ifdef CONFIG_X86_64
+	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
+#else
 	set_bit(SYSCALL_VECTOR, used_vectors);
 #endif
 	/*
-- 
cgit v1.2.3


From 7d87d5365556b1c6e8c00abcc632c3ad1fdc58b8 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 22 Dec 2008 17:33:28 -0800
Subject: x86: use logical apicid in x2apic_cluster's
 x2apic_cpu_mask_to_apicid_and()

These commits:

	commit 95d313cf1c1ecedc8bec5727b09bdacbf67dfc45
	Author: Mike Travis <travis@sgi.com>
	Date:   Tue Dec 16 17:33:54 2008 -0800

	    x86: Add cpu_mask_to_apicid_and

and
	commit 6eeb7c5a99434596c5953a95baa17d2f085664e3
	Author: Mike Travis <travis@sgi.com>
	Date:   Tue Dec 16 17:33:55 2008 -0800

	    x86: update add-cpu_mask_to_apicid_and to use struct cpumask*

broke interrupt delivery on x2apic platforms.  As x2apic cluster mode uses
logical delivery mode, we need to use logical apicid instead of physical apicid
in x2apic_cpu_mask_to_apicid_and()

Impact: fixes the broken interrupt delivery issue on generic x2apic platforms.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Mike Travis <travis@sgi.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/genx2apic_cluster.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index d451c9b9fdf..6ce497cc372 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -114,7 +114,7 @@ static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
 	int cpu;
 
 	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
 	 * May as well be the first.
 	 */
 	cpu = cpumask_first(cpumask);
@@ -130,14 +130,14 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	int cpu;
 
 	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
 	 * May as well be the first.
 	 */
 	for_each_cpu_and(cpu, cpumask, andmask)
 		if (cpumask_test_cpu(cpu, cpu_online_mask))
 			break;
 	if (cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_apicid, cpu);
+		return per_cpu(x86_cpu_to_logical_apicid, cpu);
 	return BAD_APICID;
 }
 
-- 
cgit v1.2.3


From c3d80000e3a812fe5a200d6bde755fbd7fa65481 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 23 Dec 2008 15:15:17 +0100
Subject: x86: export vector_used_by_percpu_irq

Impact: build fix

lguest can be built as a module and makes use of this new symbol:

ERROR: "vector_used_by_percpu_irq" [drivers/lguest/lg.ko] undefined!

export it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3f1d9d18df6..bce53e1352a 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -9,6 +9,7 @@
 #include <asm/apic.h>
 #include <asm/io_apic.h>
 #include <asm/smp.h>
+#include <asm/irq.h>
 
 atomic_t irq_err_count;
 
@@ -190,3 +191,5 @@ u64 arch_irq_stat(void)
 #endif
 	return sum;
 }
+
+EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
-- 
cgit v1.2.3


From 4e17fee24a39448f3a20e9cf98887b7665825848 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 25 Dec 2008 12:04:17 +0100
Subject: x86: turn CONFIG_SPARSE_IRQ off by default

New feature - lets disable it by default first - can flip it around
later.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5c243826334..d14a8806227 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -241,7 +241,6 @@ config X86_HAS_BOOT_CPU_ID
 config SPARSE_IRQ
 	bool "Support sparse irq numbering"
 	depends on PCI_MSI || HT_IRQ
-	default y
 	help
 	  This enables support for sparse irq, esp for msi/msi-x. You may need
 	  if you have lots of cards supports msi-x installed.
-- 
cgit v1.2.3


From 973656fe1afb4adf95d7b9ab75d4660cd3821ea1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 25 Dec 2008 16:26:47 +0100
Subject: x86, sparseirq: clean up Kconfig entry

Impact: improve help text

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d14a8806227..e4c038abb71 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -242,10 +242,14 @@ config SPARSE_IRQ
 	bool "Support sparse irq numbering"
 	depends on PCI_MSI || HT_IRQ
 	help
-	  This enables support for sparse irq, esp for msi/msi-x. You may need
-	  if you have lots of cards supports msi-x installed.
+	  This enables support for sparse irqs. This is useful for distro
+	  kernels that want to define a high CONFIG_NR_CPUS value but still
+	  want to have low kernel memory footprint on smaller machines.
 
-	  If you don't know what to do here, say Y.
+	  ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread
+	    out the irq_desc[] array in a more NUMA-friendly way. )
+
+	  If you don't know what to do here, say N.
 
 config NUMA_MIGRATE_IRQ_DESC
 	bool "Move irq desc when changing irq smp_affinity"
-- 
cgit v1.2.3


From a73ad3331fdbf4191cf99b83ea9ac7082b6757ba Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 25 Dec 2008 10:39:01 -0800
Subject: x86: unify the implementation of FPU traps

On 32 bits, we may suffer IRQ 13, or supposedly we might have a buggy
implementation which gives spurious trap 16.  We did not check for
this on 64 bits, but there is no reason we can't make the code the
same in both cases.  Furthermore, this is presumably rare, so do the
spurious check last, instead of first.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/traps.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f37cee75ab5..f5a640ba04b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -689,12 +689,7 @@ void math_error(void __user *ip)
 	cwd = get_fpu_cwd(task);
 	swd = get_fpu_swd(task);
 
-	err = swd & ~cwd & 0x3f;
-
-#ifdef CONFIG_X86_32
-	if (!err)
-		return;
-#endif
+	err = swd & ~cwd;
 
 	if (err & 0x001) {	/* Invalid op */
 		/*
@@ -712,7 +707,9 @@ void math_error(void __user *ip)
 	} else if (err & 0x020) { /* Precision */
 		info.si_code = FPE_FLTRES;
 	} else {
-		info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */
+		/* If we're using IRQ 13, or supposedly even some trap 16
+		   implementations, it's possible we get a spurious trap... */
+		return;		/* Spurious trap, no error */
 	}
 	force_sig_info(SIGFPE, &info, task);
 }
-- 
cgit v1.2.3


From 71ab6b245fda6e7597a667a67cce0d26c3c7a14b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 25 Dec 2008 17:18:43 +1030
Subject: x86: remove impossible test in mtrr/main.c

Impact: cleanup

enable_mtrr_cleanup is static, and is never set to anything but 0 or 1.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/mtrr/main.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index c78c04821ea..acd9ac54d47 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -823,16 +823,14 @@ static int enable_mtrr_cleanup __initdata =
 
 static int __init disable_mtrr_cleanup_setup(char *str)
 {
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 0;
+	enable_mtrr_cleanup = 0;
 	return 0;
 }
 early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
 
 static int __init enable_mtrr_cleanup_setup(char *str)
 {
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 1;
+	enable_mtrr_cleanup = 1;
 	return 0;
 }
 early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
-- 
cgit v1.2.3


From bd8b96dfc216eebc72950a6c40da8d3eca8667df Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 26 Dec 2008 09:20:22 +0100
Subject: x86: clean up comment style in arch/x86/kernel/traps.c

Impact: cleanup

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/traps.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f5a640ba04b..dbfb8028992 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -292,8 +292,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = 8;
 
-	/* This is always a kernel trap and never fixable (and thus must
-	   never return). */
+	/*
+	 * This is always a kernel trap and never fixable (and thus must
+	 * never return).
+	 */
 	for (;;)
 		die(str, regs, error_code);
 }
@@ -524,9 +526,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 }
 
 #ifdef CONFIG_X86_64
-/* Help handler running on IST stack to switch back to user stack
-   for scheduling or signal handling. The actual stack switch is done in
-   entry.S */
+/*
+ * Help handler running on IST stack to switch back to user stack
+ * for scheduling or signal handling. The actual stack switch is done in
+ * entry.S
+ */
 asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
 	struct pt_regs *regs = eregs;
@@ -536,8 +540,10 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 	/* Exception from user space */
 	else if (user_mode(eregs))
 		regs = task_pt_regs(current);
-	/* Exception from kernel and interrupts are enabled. Move to
-	   kernel process stack. */
+	/*
+	 * Exception from kernel and interrupts are enabled. Move to
+	 * kernel process stack.
+	 */
 	else if (eregs->flags & X86_EFLAGS_IF)
 		regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
 	if (eregs != regs)
@@ -707,8 +713,10 @@ void math_error(void __user *ip)
 	} else if (err & 0x020) { /* Precision */
 		info.si_code = FPE_FLTRES;
 	} else {
-		/* If we're using IRQ 13, or supposedly even some trap 16
-		   implementations, it's possible we get a spurious trap... */
+		/*
+		 * If we're using IRQ 13, or supposedly even some trap 16
+		 * implementations, it's possible we get a spurious trap...
+		 */
 		return;		/* Spurious trap, no error */
 	}
 	force_sig_info(SIGFPE, &info, task);
-- 
cgit v1.2.3


From 18eefedfe8ad33e8fc7614c13359e29a9fab4644 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 26 Dec 2008 12:29:48 +0900
Subject: irq: simplify for_each_irq_desc() usage

Impact: cleanup

all for_each_irq_desc() usage point have !desc check.
then its check can move into for_each_irq_desc() macro.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index a74887b416c..2fe543f58ac 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1345,8 +1345,6 @@ void __setup_vector_irq(int cpu)
 
 	/* Mark the inuse vectors */
 	for_each_irq_desc(irq, desc) {
-		if (!desc)
-			continue;
 		cfg = desc->chip_data;
 		if (!cpu_isset(cpu, cfg->domain))
 			continue;
@@ -1730,8 +1728,6 @@ __apicdebuginit(void) print_IO_APIC(void)
 	for_each_irq_desc(irq, desc) {
 		struct irq_pin_list *entry;
 
-		if (!desc)
-			continue;
 		cfg = desc->chip_data;
 		entry = cfg->irq_2_pin;
 		if (!entry)
@@ -2378,9 +2374,6 @@ static void ir_irq_migration(struct work_struct *work)
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
-		if (!desc)
-			continue;
-
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
 
@@ -2671,9 +2664,6 @@ static inline void init_IO_APIC_traps(void)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for_each_irq_desc(irq, desc) {
-		if (!desc)
-			continue;
-
 		cfg = desc->chip_data;
 		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
 			/*
-- 
cgit v1.2.3


From 393d68fb9929817cde7ab31c82d66fcb28ad35fc Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 26 Dec 2008 22:23:38 +1030
Subject: cpumask: x86: Introduce cpumask_of_{node,pcibus} to replace
 {node,pcibus}_to_cpumask

Impact: New APIs

The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these
return a pointer to a struct cpumask.  Part of removing cpumasks from
the stack.

Also makes __pcibus_to_node take a const pointer.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pci.h      | 10 ++++++++--
 arch/x86/include/asm/topology.h | 35 +++++++++++++++++++++++------------
 arch/x86/kernel/setup_percpu.c  |  8 ++++----
 3 files changed, 35 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 875b38edf19..52d80d3d94f 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -98,9 +98,9 @@ static inline void early_quirks(void) { }
 
 #ifdef CONFIG_NUMA
 /* Returns the node based on pci bus */
-static inline int __pcibus_to_node(struct pci_bus *bus)
+static inline int __pcibus_to_node(const struct pci_bus *bus)
 {
-	struct pci_sysdata *sd = bus->sysdata;
+	const struct pci_sysdata *sd = bus->sysdata;
 
 	return sd->node;
 }
@@ -109,6 +109,12 @@ static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus)
 {
 	return node_to_cpumask(__pcibus_to_node(bus));
 }
+
+static inline const struct cpumask *
+cpumask_of_pcibus(const struct pci_bus *bus)
+{
+	return cpumask_of_node(__pcibus_to_node(bus));
+}
 #endif
 
 #endif /* _ASM_X86_PCI_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index ff386ff50ed..45da5dc50fc 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -61,13 +61,19 @@ static inline int cpu_to_node(int cpu)
  *
  * Side note: this function creates the returned cpumask on the stack
  * so with a high NR_CPUS count, excessive stack space is used.  The
- * node_to_cpumask_ptr function should be used whenever possible.
+ * cpumask_of_node function should be used whenever possible.
  */
 static inline cpumask_t node_to_cpumask(int node)
 {
 	return node_to_cpumask_map[node];
 }
 
+/* Returns a bitmask of CPUs on Node 'node'. */
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+	return &node_to_cpumask_map[node];
+}
+
 #else /* CONFIG_X86_64 */
 
 /* Mappings between node number and cpus on that node. */
@@ -82,7 +88,7 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 extern int cpu_to_node(int cpu);
 extern int early_cpu_to_node(int cpu);
-extern const cpumask_t *_node_to_cpumask_ptr(int node);
+extern const cpumask_t *cpumask_of_node(int node);
 extern cpumask_t node_to_cpumask(int node);
 
 #else	/* !CONFIG_DEBUG_PER_CPU_MAPS */
@@ -103,7 +109,7 @@ static inline int early_cpu_to_node(int cpu)
 }
 
 /* Returns a pointer to the cpumask of CPUs on Node 'node'. */
-static inline const cpumask_t *_node_to_cpumask_ptr(int node)
+static inline const cpumask_t *cpumask_of_node(int node)
 {
 	return &node_to_cpumask_map[node];
 }
@@ -116,12 +122,15 @@ static inline cpumask_t node_to_cpumask(int node)
 
 #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
 
-/* Replace default node_to_cpumask_ptr with optimized version */
+/*
+ * Replace default node_to_cpumask_ptr with optimized version
+ * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
+ */
 #define node_to_cpumask_ptr(v, node)		\
-		const cpumask_t *v = _node_to_cpumask_ptr(node)
+		const cpumask_t *v = cpumask_of_node(node)
 
 #define node_to_cpumask_ptr_next(v, node)	\
-			   v = _node_to_cpumask_ptr(node)
+			   v = cpumask_of_node(node)
 
 #endif /* CONFIG_X86_64 */
 
@@ -187,7 +196,7 @@ extern int __node_distance(int, int);
 #define	cpu_to_node(cpu)	0
 #define	early_cpu_to_node(cpu)	0
 
-static inline const cpumask_t *_node_to_cpumask_ptr(int node)
+static inline const cpumask_t *cpumask_of_node(int node)
 {
 	return &cpu_online_map;
 }
@@ -200,12 +209,15 @@ static inline int node_to_first_cpu(int node)
 	return first_cpu(cpu_online_map);
 }
 
-/* Replace default node_to_cpumask_ptr with optimized version */
+/*
+ * Replace default node_to_cpumask_ptr with optimized version
+ * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
+ */
 #define node_to_cpumask_ptr(v, node)		\
-		const cpumask_t *v = _node_to_cpumask_ptr(node)
+		const cpumask_t *v = cpumask_of_node(node)
 
 #define node_to_cpumask_ptr_next(v, node)	\
-			   v = _node_to_cpumask_ptr(node)
+			   v = cpumask_of_node(node)
 #endif
 
 #include <asm-generic/topology.h>
@@ -214,8 +226,7 @@ static inline int node_to_first_cpu(int node)
 /* Returns the number of the first CPU on Node 'node'. */
 static inline int node_to_first_cpu(int node)
 {
-	node_to_cpumask_ptr(mask, node);
-	return first_cpu(*mask);
+	return cpumask_first(cpumask_of_node(node));
 }
 #endif
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 1c2084291f9..8e8b1193add 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -334,25 +334,25 @@ static const cpumask_t cpu_mask_none;
 /*
  * Returns a pointer to the bitmask of CPUs on Node 'node'.
  */
-const cpumask_t *_node_to_cpumask_ptr(int node)
+const cpumask_t *cpumask_of_node(int node)
 {
 	if (node_to_cpumask_map == NULL) {
 		printk(KERN_WARNING
-			"_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
+			"cpumask_of_node(%d): no node_to_cpumask_map!\n",
 			node);
 		dump_stack();
 		return (const cpumask_t *)&cpu_online_map;
 	}
 	if (node >= nr_node_ids) {
 		printk(KERN_WARNING
-			"_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n",
+			"cpumask_of_node(%d): node > nr_node_ids(%d)\n",
 			node, nr_node_ids);
 		dump_stack();
 		return &cpu_mask_none;
 	}
 	return &node_to_cpumask_map[node];
 }
-EXPORT_SYMBOL(_node_to_cpumask_ptr);
+EXPORT_SYMBOL(cpumask_of_node);
 
 /*
  * Returns a bitmask of CPUs on Node 'node'.
-- 
cgit v1.2.3


From 030bb203e01db12e3f2866799f4f03a114d06349 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 26 Dec 2008 22:23:41 +1030
Subject: cpumask: cpu_coregroup_mask(): x86

Impact: New API

Like cpu_coregroup_map, but returns a (const) pointer.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
---
 arch/x86/include/asm/topology.h |  1 +
 arch/x86/kernel/smpboot.c       | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 45da5dc50fc..168203c0c31 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -231,6 +231,7 @@ static inline int node_to_first_cpu(int node)
 #endif
 
 extern cpumask_t cpu_coregroup_map(int cpu);
+extern const struct cpumask *cpu_coregroup_mask(int cpu);
 
 #ifdef ENABLE_TOPO_DEFINES
 #define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 468c2f9d47a..d5274b6b088 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -497,7 +497,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
 }
 
 /* maps the cpu to the sched domain representing multi-core */
-cpumask_t cpu_coregroup_map(int cpu)
+const struct cpumask *cpu_coregroup_mask(int cpu)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	/*
@@ -505,9 +505,14 @@ cpumask_t cpu_coregroup_map(int cpu)
 	 * And for power savings, we return cpu_core_map
 	 */
 	if (sched_mc_power_savings || sched_smt_power_savings)
-		return per_cpu(cpu_core_map, cpu);
+		return &per_cpu(cpu_core_map, cpu);
 	else
-		return c->llc_shared_map;
+		return &c->llc_shared_map;
+}
+
+cpumask_t cpu_coregroup_map(int cpu)
+{
+	return *cpu_coregroup_mask(cpu);
 }
 
 static void impress_friends(void)
-- 
cgit v1.2.3


From 0b936bfdeb85784b7df132b2c64fb34ad9b11ffa Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Tue, 23 Dec 2008 21:51:28 +0530
Subject: x86: reboot.c declare port_cf9_safe before they get used

Impact: cleanup, avoid sparse warning

Include "../pci/pci.h" for port_cf9_safe

Fixes this sparse warning:

  arch/x86/kernel/reboot.c:43:6: warning: symbol 'port_cf9_safe' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/reboot.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 61f718df6ee..b165eb0884e 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -22,6 +22,7 @@
 #endif
 
 #include <mach_ipi.h>
+#include "../pci/pci.h"
 
 
 /*
-- 
cgit v1.2.3


From b6b301aa9fba57b114c3a00f5f43abf672bd4ecd Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Tue, 23 Dec 2008 21:52:33 +0530
Subject: x86: apic.c x2apic_preenabled and disable_x2apic should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:

  arch/x86/kernel/apic.c:103:5: warning: symbol 'disable_x2apic' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apic.h | 2 +-
 arch/x86/kernel/apic.c      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 25caa0738af..e644bf6f90d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -93,7 +93,7 @@ static inline u32 native_apic_msr_read(u32 reg)
 }
 
 #ifndef CONFIG_X86_32
-extern int x2apic, x2apic_preenabled;
+extern int x2apic;
 extern void check_x2apic(void);
 extern void enable_x2apic(void);
 extern void enable_IR_x2apic(void);
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 7397911f847..3a961bd9f61 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -97,8 +97,8 @@ __setup("apicpmtimer", setup_apicpmtimer);
 #ifdef HAVE_X2APIC
 int x2apic;
 /* x2apic enabled before OS handover */
-int x2apic_preenabled;
-int disable_x2apic;
+static int x2apic_preenabled;
+static int disable_x2apic;
 static __init int setup_nox2apic(char *str)
 {
 	disable_x2apic = 1;
-- 
cgit v1.2.3


From 13a0c3c269b223f60abfac8a9811d77111a8b4ba Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 26 Dec 2008 02:05:47 -0800
Subject: sparseirq: work around compiler optimizing away __weak functions

Impact: fix panic on null pointer with sparseirq

Some GCC versions seem to inline the weak global function,
when that function is empty.

Work it around, by making the functions return a (dummy) integer.

Signed-off-by: Yinghai <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 2fe543f58ac..97603937784 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -170,7 +170,7 @@ static struct irq_cfg irq_cfgx[NR_IRQS] = {
 	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
-void __init arch_early_irq_init(void)
+int __init arch_early_irq_init(void)
 {
 	struct irq_cfg *cfg;
 	struct irq_desc *desc;
@@ -184,6 +184,8 @@ void __init arch_early_irq_init(void)
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
 	}
+
+	return 0;
 }
 
 #ifdef CONFIG_SPARSE_IRQ
@@ -212,7 +214,7 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 	return cfg;
 }
 
-void arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int cpu)
 {
 	struct irq_cfg *cfg;
 
@@ -224,6 +226,8 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu)
 			BUG_ON(1);
 		}
 	}
+
+	return 0;
 }
 
 #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-- 
cgit v1.2.3


From 70a7d3cc1308a55104fbe505d76f2aca8a4cf53e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 22 Dec 2008 10:26:05 -0800
Subject: swiotlb: add hwdev to swiotlb_phys_to_bus() / swiotlb_sg_to_bus()

Impact: extend functions with a (yet unused) parameter, update callsites

Some architectures need it - in preparation for highmem swiotlb.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-swiotlb_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 242c3440687..6cf8a816dc2 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -23,7 +23,7 @@ void *swiotlb_alloc(unsigned order, unsigned long nslabs)
 	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
 }
 
-dma_addr_t swiotlb_phys_to_bus(phys_addr_t paddr)
+dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
 {
 	return paddr;
 }
-- 
cgit v1.2.3


From 1da4f9894c243a9c58c505fd8f3e6cc0709a8825 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Sun, 28 Dec 2008 15:02:05 +0900
Subject: swiotlb: replace architecture-specific swiotlb.h with linux/swiotlb.h

Impact: cleanup

This replaces architecture-specific swiotlb.h (X86 and IA64) with
linux/swiotlb.h.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/swiotlb.h | 38 +-------------------------------------
 1 file changed, 1 insertion(+), 37 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 51fb2c76ad7..b9e4e20174f 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -1,46 +1,10 @@
 #ifndef _ASM_X86_SWIOTLB_H
 #define _ASM_X86_SWIOTLB_H
 
-#include <asm/dma-mapping.h>
+#include <linux/swiotlb.h>
 
 /* SWIOTLB interface */
 
-extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr,
-				     size_t size, int dir);
-extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-				    dma_addr_t *dma_handle, gfp_t flags);
-extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-				 size_t size, int dir);
-extern void swiotlb_sync_single_for_cpu(struct device *hwdev,
-					dma_addr_t dev_addr,
-					size_t size, int dir);
-extern void swiotlb_sync_single_for_device(struct device *hwdev,
-					   dma_addr_t dev_addr,
-					   size_t size, int dir);
-extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev,
-					      dma_addr_t dev_addr,
-					      unsigned long offset,
-					      size_t size, int dir);
-extern void swiotlb_sync_single_range_for_device(struct device *hwdev,
-						 dma_addr_t dev_addr,
-						 unsigned long offset,
-						 size_t size, int dir);
-extern void swiotlb_sync_sg_for_cpu(struct device *hwdev,
-				    struct scatterlist *sg, int nelems,
-				    int dir);
-extern void swiotlb_sync_sg_for_device(struct device *hwdev,
-				       struct scatterlist *sg, int nelems,
-				       int dir);
-extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
-			  int nents, int direction);
-extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
-			     int nents, int direction);
-extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
-extern void swiotlb_free_coherent(struct device *hwdev, size_t size,
-				  void *vaddr, dma_addr_t dma_handle);
-extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
-extern void swiotlb_init(void);
-
 extern int swiotlb_force;
 
 #ifdef CONFIG_SWIOTLB
-- 
cgit v1.2.3


From 6092848a2a23b660150a38bc06f59d75838d70c8 Mon Sep 17 00:00:00 2001
From: Sergio Luis <sergio@larces.uece.br>
Date: Sun, 28 Dec 2008 04:12:26 -0300
Subject: x86: mark get_cpu_leaves() with __cpuinit annotation

Impact: fix section mismatch warning

Commit b2bb85549134c005e997e5a7ed303bda6a1ae738 ("x86: Remove cpumask games
in x86/kernel/cpu/intel_cacheinfo.c") introduced get_cpu_leaves(), which
references __cpuinit cpuid4_cache_lookup().

Mark get_cpu_leaves() with a __cpuinit annotation.

Signed-off-by: Sergio Luis <sergio@larces.uece.br>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/intel_cacheinfo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index fb7f946cb65..7bd00a56567 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,7 +534,7 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
 	per_cpu(cpuid4_info, cpu) = NULL;
 }
 
-static void get_cpu_leaves(void *_retval)
+static void __cpuinit get_cpu_leaves(void *_retval)
 {
 	int j, *retval = _retval, cpu = smp_processor_id();
 
-- 
cgit v1.2.3


From c805b7300ed20ec4f10ea385988d6d3fa935b26c Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sat, 27 Dec 2008 17:10:18 +0300
Subject: x86: mach-default setup.c cleanups

Impact: cleanup

- Break long lines into shorter form.
- Use pr_ macros instead of plain printk.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mach-default/setup.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 37b9ae4d44c..df167f26562 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -133,29 +133,28 @@ void __init time_init_hook(void)
  **/
 void mca_nmi_hook(void)
 {
-	/* If I recall correctly, there's a whole bunch of other things that
+	/*
+	 * If I recall correctly, there's a whole bunch of other things that
 	 * we can do to check for NMI problems, but that's all I know about
 	 * at the moment.
 	 */
-
-	printk("NMI generated from unknown source!\n");
+	pr_warning("NMI generated from unknown source!\n");
 }
 #endif
 
 static __init int no_ipi_broadcast(char *str)
 {
 	get_option(&str, &no_broadcast);
-	printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" :
-											"IPI Broadcast");
+	pr_info("Using %s mode\n",
+		no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
 	return 1;
 }
-
 __setup("no_ipi_broadcast=", no_ipi_broadcast);
 
 static int __init print_ipi_mode(void)
 {
-	printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" :
-											"Shortcut");
+	pr_info("Using IPI %s mode\n",
+		no_broadcast ? "No-Shortcut" : "Shortcut");
 	return 0;
 }
 
-- 
cgit v1.2.3


From 2f06de0671096e19350c9efe21cfdbc0891aab20 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sat, 27 Dec 2008 21:37:10 +0530
Subject: x86: introducing asm/sys_ia32.h

Impact: cleanup, avoid 44 sparse warnings, new file asm/sys_ia32.h

Fixes following sparse warnings:

  CHECK   arch/x86/ia32/sys_ia32.c
arch/x86/ia32/sys_ia32.c:53:17: warning: symbol 'sys32_truncate64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:60:17: warning: symbol 'sys32_ftruncate64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:98:17: warning: symbol 'sys32_stat64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:109:17: warning: symbol 'sys32_lstat64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:119:17: warning: symbol 'sys32_fstat64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:128:17: warning: symbol 'sys32_fstatat' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:164:17: warning: symbol 'sys32_mmap' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:195:17: warning: symbol 'sys32_mprotect' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:201:17: warning: symbol 'sys32_pipe' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:215:17: warning: symbol 'sys32_rt_sigaction' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:291:17: warning: symbol 'sys32_sigaction' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:330:17: warning: symbol 'sys32_rt_sigprocmask' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:370:17: warning: symbol 'sys32_alarm' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:383:17: warning: symbol 'sys32_old_select' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:393:17: warning: symbol 'sys32_waitpid' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:401:17: warning: symbol 'sys32_sysfs' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:406:17: warning: symbol 'sys32_sched_rr_get_interval' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:421:17: warning: symbol 'sys32_rt_sigpending' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:445:17: warning: symbol 'sys32_rt_sigqueueinfo' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:472:17: warning: symbol 'sys32_sysctl' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:517:17: warning: symbol 'sys32_pread' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:524:17: warning: symbol 'sys32_pwrite' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:532:17: warning: symbol 'sys32_personality' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:545:17: warning: symbol 'sys32_sendfile' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:565:17: warning: symbol 'sys32_mmap2' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:589:17: warning: symbol 'sys32_olduname' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:626:6: warning: symbol 'sys32_uname' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:641:6: warning: symbol 'sys32_ustat' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:663:17: warning: symbol 'sys32_execve' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:678:17: warning: symbol 'sys32_clone' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:693:6: warning: symbol 'sys32_lseek' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:698:6: warning: symbol 'sys32_kill' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:703:6: warning: symbol 'sys32_fadvise64_64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:712:6: warning: symbol 'sys32_vm86_warning' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:726:6: warning: symbol 'sys32_lookup_dcookie' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:732:20: warning: symbol 'sys32_readahead' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:738:17: warning: symbol 'sys32_sync_file_range' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:746:17: warning: symbol 'sys32_fadvise64' was not declared. Should it be static?
arch/x86/ia32/sys_ia32.c:753:17: warning: symbol 'sys32_fallocate' was not declared. Should it be static?
  CHECK   arch/x86/ia32/ia32_signal.c
arch/x86/ia32/ia32_signal.c:126:17: warning: symbol 'sys32_sigsuspend' was not declared. Should it be static?
arch/x86/ia32/ia32_signal.c:141:17: warning: symbol 'sys32_sigaltstack' was not declared. Should it be static?
arch/x86/ia32/ia32_signal.c:249:17: warning: symbol 'sys32_sigreturn' was not declared. Should it be static?
arch/x86/ia32/ia32_signal.c:279:17: warning: symbol 'sys32_rt_sigreturn' was not declared. Should it be static?
  CHECK   arch/x86/ia32/ipc32.c
arch/x86/ia32/ipc32.c:12:17: warning: symbol 'sys32_ipc' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32_signal.c     |   3 +-
 arch/x86/ia32/ipc32.c           |   1 +
 arch/x86/ia32/sys_ia32.c        |   2 +-
 arch/x86/include/asm/sys_ia32.h | 101 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 104 insertions(+), 3 deletions(-)
 create mode 100644 arch/x86/include/asm/sys_ia32.h

(limited to 'arch/x86')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index b195f85526e..9dabd00e980 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -24,15 +24,14 @@
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
-#include <asm/ia32.h>
 #include <asm/ptrace.h>
 #include <asm/ia32_unistd.h>
 #include <asm/user32.h>
 #include <asm/sigcontext32.h>
 #include <asm/proto.h>
 #include <asm/vdso.h>
-
 #include <asm/sigframe.h>
+#include <asm/sys_ia32.h>
 
 #define DEBUG_SIG 0
 
diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c
index d21991ce606..29cdcd02ead 100644
--- a/arch/x86/ia32/ipc32.c
+++ b/arch/x86/ia32/ipc32.c
@@ -8,6 +8,7 @@
 #include <linux/shm.h>
 #include <linux/ipc.h>
 #include <linux/compat.h>
+#include <asm/sys_ia32.h>
 
 asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
 			  compat_uptr_t ptr, u32 fifth)
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 2e09dcd3c0a..6c0d7f6231a 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -44,8 +44,8 @@
 #include <asm/types.h>
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
-#include <asm/ia32.h>
 #include <asm/vgtod.h>
+#include <asm/sys_ia32.h>
 
 #define AA(__x)		((unsigned long)(__x))
 
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
new file mode 100644
index 00000000000..ffb08be2a53
--- /dev/null
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -0,0 +1,101 @@
+/*
+ * sys_ia32.h - Linux ia32 syscall interfaces
+ *
+ * Copyright (c) 2008 Jaswinder Singh Rajput
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#ifndef _ASM_X86_SYS_IA32_H
+#define _ASM_X86_SYS_IA32_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+#include <asm/compat.h>
+#include <asm/ia32.h>
+
+/* ia32/sys_ia32.c */
+asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long);
+asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
+
+asmlinkage long sys32_stat64(char __user *, struct stat64 __user *);
+asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *);
+asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
+asmlinkage long sys32_fstatat(unsigned int, char __user *,
+			      struct stat64 __user *, int);
+struct mmap_arg_struct;
+asmlinkage long sys32_mmap(struct mmap_arg_struct __user *);
+asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
+
+asmlinkage long sys32_pipe(int __user *);
+struct sigaction32;
+struct old_sigaction32;
+asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
+				   struct sigaction32 __user *, unsigned int);
+asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
+				struct old_sigaction32 __user *);
+asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *,
+				     compat_sigset_t __user *, unsigned int);
+asmlinkage long sys32_alarm(unsigned int);
+
+struct sel_arg_struct;
+asmlinkage long sys32_old_select(struct sel_arg_struct __user *);
+asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
+asmlinkage long sys32_sysfs(int, u32, u32);
+
+asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
+					    struct compat_timespec __user *);
+asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
+asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
+
+#ifdef CONFIG_SYSCTL_SYSCALL
+struct sysctl_ia32;
+asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *);
+#endif
+
+asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
+asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
+
+asmlinkage long sys32_personality(unsigned long);
+asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
+
+asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long,
+			    unsigned long, unsigned long, unsigned long);
+
+struct oldold_utsname;
+struct old_utsname;
+asmlinkage long sys32_olduname(struct oldold_utsname __user *);
+long sys32_uname(struct old_utsname __user *);
+
+long sys32_ustat(unsigned, struct ustat32 __user *);
+
+asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
+			     compat_uptr_t __user *, struct pt_regs *);
+asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
+
+long sys32_lseek(unsigned int, int, unsigned int);
+long sys32_kill(int, int);
+long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
+long sys32_vm86_warning(void);
+long sys32_lookup_dcookie(u32, u32, char __user *, size_t);
+
+asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
+asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
+				      unsigned, unsigned, int);
+asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
+asmlinkage long sys32_fallocate(int, int, unsigned,
+				unsigned, unsigned, unsigned);
+
+/* ia32/ia32_signal.c */
+asmlinkage long sys32_sigsuspend(int, int, old_sigset_t);
+asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *,
+				  stack_ia32_t __user *, struct pt_regs *);
+asmlinkage long sys32_sigreturn(struct pt_regs *);
+asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
+
+/* ia32/ipc32.c */
+asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
+#endif /* _ASM_X86_SYS_IA32_H */
-- 
cgit v1.2.3


From 83bd9243956f30d91851b272988a237999b35b10 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 15 Dec 2008 15:09:50 +0100
Subject: x86/oprofile: fix pci_dev use count for AMD northbridge devices

This patch fixes the PCI device use count for AMD northbridge
devices. In case of an IBS LVT initialization failure, the PCI device
is released now by calling pci_dev_put().

If there are no initialization errors, the devices are released in
pci_get_device() while iterating.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 98658f25f54..c5b5c7fb3ce 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -409,6 +409,7 @@ static int init_ibs_nmi(void)
 				       | IBSCTL_LVTOFFSETVAL);
 		pci_read_config_dword(cpu_cfg, IBSCTL, &value);
 		if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) {
+			pci_dev_put(cpu_cfg);
 			printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
 				"IBSCTL = 0x%08x", value);
 			return 1;
-- 
cgit v1.2.3


From a1ae299dfb6ef219b296b61d1f222732391973b5 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 20:32:52 +0530
Subject: x86: apic.c declare pic_mode before they get used

Impact: cleanup, avoid sparse warning

In asm/mpspec.h moved out pic_mode from CONFIG_X86_32 as it is common
for both 32 and 64 bit.

Fixes this sparse warning for x86_64:

  arch/x86/kernel/apic.c:128:5: warning: symbol 'pic_mode' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 91885c28f66..62d14ce3cd0 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -6,13 +6,13 @@
 #include <asm/mpspec_def.h>
 
 extern int apic_version[MAX_APICS];
+extern int pic_mode;
 
 #ifdef CONFIG_X86_32
 #include <mach_mpspec.h>
 
 extern unsigned int def_to_bigsmp;
 extern u8 apicid_2_node[];
-extern int pic_mode;
 
 #ifdef CONFIG_X86_NUMAQ
 extern int mp_bus_id_to_node[MAX_MP_BUSSES];
-- 
cgit v1.2.3


From 7f3e632f9d8d234819bcdef7a68fc8b84f7d3d3d Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 20:34:35 +0530
Subject: x86: io_apic.c io_apic_sync should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:

  arch/x86/kernel/io_apic.c:709:6: warning: symbol 'io_apic_sync' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 679e7bbbbcd..b8c8a8e9934 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -481,7 +481,7 @@ static void __unmask_IO_APIC_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_X86_64
-void io_apic_sync(struct irq_pin_list *entry)
+static void io_apic_sync(struct irq_pin_list *entry)
 {
 	/*
 	 * Synchronize the IO-APIC and the CPU by doing
-- 
cgit v1.2.3


From cbafbc826bf645f7fbbfbb2ff20138e5ccb4700e Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 20:36:40 +0530
Subject: x86: efi.c declare add_efi_memmap before they get used

Impact: cleanup, avoid sparse warning

Fixes this sparse warning:

  arch/x86/kernel/efi.c:67:5: warning: symbol 'add_efi_memmap' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/efi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index a2e545c91c3..ca5ffb2856b 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size);
 
 #endif /* CONFIG_X86_32 */
 
+extern int add_efi_memmap;
 extern void efi_reserve_early(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
-- 
cgit v1.2.3


From c854c91979e0717c619bc55e124d41d60d5eb3d6 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 20:38:09 +0530
Subject: x86_64: pci-gart_64.c iommu_fullflush should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:

  arch/x86/kernel/pci-gart_64.c:55:5: warning: symbol 'iommu_fullflush' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-gart_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a35eaa379ff..00c2bcd4146 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -52,7 +52,7 @@ static u32 *iommu_gatt_base;		/* Remapping table */
  * to trigger bugs with some popular PCI cards, in particular 3ware (but
  * has been also also seen with Qlogic at least).
  */
-int iommu_fullflush = 1;
+static int iommu_fullflush = 1;
 
 /* Allocation bitmap for the remapping area: */
 static DEFINE_SPINLOCK(iommu_bitmap_lock);
-- 
cgit v1.2.3


From 824877111cd7f2b4fd2fe6947c5c5cbbb3ac5bd8 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sat, 27 Dec 2008 18:32:28 +0530
Subject: x86, pci: move arch/x86/pci/pci.h to arch/x86/include/asm/pci_x86.h

Impact: cleanup

Now that arch/x86/pci/pci.h is used in a number of other places as well,
move the lowlevel x86 pci definitions into the architecture include files.
(not to be confused with the existing arch/x86/include/asm/pci.h file,
which provides public details about x86 PCI)

Tested on: X86_32_UP, X86_32_SMP and X86_64_SMP

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pci_x86.h     | 165 +++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/mmconf-fam10h_64.c |   3 +-
 arch/x86/kernel/reboot.c           |   3 +-
 arch/x86/pci/acpi.c                |   2 +-
 arch/x86/pci/amd_bus.c             |   2 +-
 arch/x86/pci/common.c              |   3 +-
 arch/x86/pci/direct.c              |   2 +-
 arch/x86/pci/early.c               |   2 +-
 arch/x86/pci/fixup.c               |   3 +-
 arch/x86/pci/i386.c                |   2 +-
 arch/x86/pci/init.c                |   2 +-
 arch/x86/pci/irq.c                 |   3 +-
 arch/x86/pci/legacy.c              |   2 +-
 arch/x86/pci/mmconfig-shared.c     |   3 +-
 arch/x86/pci/mmconfig_32.c         |   2 +-
 arch/x86/pci/mmconfig_64.c         |   3 +-
 arch/x86/pci/numaq_32.c            |   2 +-
 arch/x86/pci/olpc.c                |   2 +-
 arch/x86/pci/pcbios.c              |   5 +-
 arch/x86/pci/pci.h                 | 162 ------------------------------------
 arch/x86/pci/visws.c               |   3 +-
 21 files changed, 185 insertions(+), 191 deletions(-)
 create mode 100644 arch/x86/include/asm/pci_x86.h
 delete mode 100644 arch/x86/pci/pci.h

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
new file mode 100644
index 00000000000..e60fd3e14bd
--- /dev/null
+++ b/arch/x86/include/asm/pci_x86.h
@@ -0,0 +1,165 @@
+/*
+ *	Low-Level PCI Access for i386 machines.
+ *
+ *	(c) 1999 Martin Mares <mj@ucw.cz>
+ */
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define PCI_PROBE_BIOS		0x0001
+#define PCI_PROBE_CONF1		0x0002
+#define PCI_PROBE_CONF2		0x0004
+#define PCI_PROBE_MMCONF	0x0008
+#define PCI_PROBE_MASK		0x000f
+#define PCI_PROBE_NOEARLY	0x0010
+
+#define PCI_NO_CHECKS		0x0400
+#define PCI_USE_PIRQ_MASK	0x0800
+#define PCI_ASSIGN_ROMS		0x1000
+#define PCI_BIOS_IRQ_SCAN	0x2000
+#define PCI_ASSIGN_ALL_BUSSES	0x4000
+#define PCI_CAN_SKIP_ISA_ALIGN	0x8000
+#define PCI_USE__CRS		0x10000
+#define PCI_CHECK_ENABLE_AMD_MMCONF	0x20000
+#define PCI_HAS_IO_ECS		0x40000
+#define PCI_NOASSIGN_ROMS	0x80000
+
+extern unsigned int pci_probe;
+extern unsigned long pirq_table_addr;
+
+enum pci_bf_sort_state {
+	pci_bf_sort_default,
+	pci_force_nobf,
+	pci_force_bf,
+	pci_dmi_bf,
+};
+
+/* pci-i386.c */
+
+extern unsigned int pcibios_max_latency;
+
+void pcibios_resource_survey(void);
+
+/* pci-pc.c */
+
+extern int pcibios_last_bus;
+extern struct pci_bus *pci_root_bus;
+extern struct pci_ops pci_root_ops;
+
+/* pci-irq.c */
+
+struct irq_info {
+	u8 bus, devfn;			/* Bus, device and function */
+	struct {
+		u8 link;		/* IRQ line ID, chipset dependent,
+					   0 = not routed */
+		u16 bitmap;		/* Available IRQs */
+	} __attribute__((packed)) irq[4];
+	u8 slot;			/* Slot number, 0=onboard */
+	u8 rfu;
+} __attribute__((packed));
+
+struct irq_routing_table {
+	u32 signature;			/* PIRQ_SIGNATURE should be here */
+	u16 version;			/* PIRQ_VERSION */
+	u16 size;			/* Table size in bytes */
+	u8 rtr_bus, rtr_devfn;		/* Where the interrupt router lies */
+	u16 exclusive_irqs;		/* IRQs devoted exclusively to
+					   PCI usage */
+	u16 rtr_vendor, rtr_device;	/* Vendor and device ID of
+					   interrupt router */
+	u32 miniport_data;		/* Crap */
+	u8 rfu[11];
+	u8 checksum;			/* Modulo 256 checksum must give 0 */
+	struct irq_info slots[0];
+} __attribute__((packed));
+
+extern unsigned int pcibios_irq_mask;
+
+extern int pcibios_scanned;
+extern spinlock_t pci_config_lock;
+
+extern int (*pcibios_enable_irq)(struct pci_dev *dev);
+extern void (*pcibios_disable_irq)(struct pci_dev *dev);
+
+struct pci_raw_ops {
+	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
+						int reg, int len, u32 *val);
+	int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn,
+						int reg, int len, u32 val);
+};
+
+extern struct pci_raw_ops *raw_pci_ops;
+extern struct pci_raw_ops *raw_pci_ext_ops;
+
+extern struct pci_raw_ops pci_direct_conf1;
+extern bool port_cf9_safe;
+
+/* arch_initcall level */
+extern int pci_direct_probe(void);
+extern void pci_direct_init(int type);
+extern void pci_pcbios_init(void);
+extern int pci_olpc_init(void);
+extern void __init dmi_check_pciprobe(void);
+extern void __init dmi_check_skip_isa_align(void);
+
+/* some common used subsys_initcalls */
+extern int __init pci_acpi_init(void);
+extern int __init pcibios_irq_init(void);
+extern int __init pci_visws_init(void);
+extern int __init pci_numaq_init(void);
+extern int __init pcibios_init(void);
+
+/* pci-mmconfig.c */
+
+extern int __init pci_mmcfg_arch_init(void);
+extern void __init pci_mmcfg_arch_free(void);
+
+/*
+ * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
+ * on their northbrige except through the * %eax register. As such, you MUST
+ * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
+ * accessor functions.
+ * In fact just use pci_config_*, nothing else please.
+ */
+static inline unsigned char mmio_config_readb(void __iomem *pos)
+{
+	u8 val;
+	asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
+	return val;
+}
+
+static inline unsigned short mmio_config_readw(void __iomem *pos)
+{
+	u16 val;
+	asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
+	return val;
+}
+
+static inline unsigned int mmio_config_readl(void __iomem *pos)
+{
+	u32 val;
+	asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
+	return val;
+}
+
+static inline void mmio_config_writeb(void __iomem *pos, u8 val)
+{
+	asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
+}
+
+static inline void mmio_config_writew(void __iomem *pos, u16 val)
+{
+	asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
+}
+
+static inline void mmio_config_writel(void __iomem *pos, u32 val)
+{
+	asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
+}
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index efc2f361fe8..666e43df51f 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -13,8 +13,7 @@
 #include <asm/msr.h>
 #include <asm/acpi.h>
 #include <asm/mmconfig.h>
-
-#include "../pci/pci.h"
+#include <asm/pci_x86.h>
 
 struct pci_hostbridge_probe {
 	u32 bus;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index b165eb0884e..a90913cccfb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,7 @@
 #include <asm/proto.h>
 #include <asm/reboot_fixups.h>
 #include <asm/reboot.h>
+#include <asm/pci_x86.h>
 
 #ifdef CONFIG_X86_32
 # include <linux/dmi.h>
@@ -22,8 +23,6 @@
 #endif
 
 #include <mach_ipi.h>
-#include "../pci/pci.h"
-
 
 /*
  * Power off function, if any
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 1d88d2b3977..9e5752fe4d1 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -4,7 +4,7 @@
 #include <linux/irq.h>
 #include <linux/dmi.h>
 #include <asm/numa.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 struct pci_root_info {
 	char *name;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 22e057665e5..9bb09823b36 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -2,7 +2,7 @@
 #include <linux/pci.h>
 #include <linux/topology.h>
 #include <linux/cpu.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/pci-direct.h>
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index bb1a01f089e..62ddb73e09e 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -14,8 +14,7 @@
 #include <asm/segment.h>
 #include <asm/io.h>
 #include <asm/smp.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
 				PCI_PROBE_MMCONF;
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 9a5af6c8fbe..bd13c3e4c6d 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -5,7 +5,7 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/dmi.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /*
  * Functions for accessing PCI base (first 256 bytes) and extended
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 86631ccbc25..f6adf2c6d75 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -2,7 +2,7 @@
 #include <linux/pci.h>
 #include <asm/pci-direct.h>
 #include <asm/io.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* Direct PCI access. This is used for PCI accesses in early boot before
    the PCI subsystem works. */
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 2051dc96b8e..7d388d5cf54 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -6,8 +6,7 @@
 #include <linux/dmi.h>
 #include <linux/pci.h>
 #include <linux/init.h>
-#include "pci.h"
-
+#include <asm/pci_x86.h>
 
 static void __devinit pci_fixup_i450nx(struct pci_dev *d)
 {
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 844df0cbbd3..e51bf2cda4b 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -34,8 +34,8 @@
 
 #include <asm/pat.h>
 #include <asm/e820.h>
+#include <asm/pci_x86.h>
 
-#include "pci.h"
 
 static int
 skip_isa_ioresource_align(struct pci_dev *dev) {
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index d6c950f8185..bec3b048e72 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -1,6 +1,6 @@
 #include <linux/pci.h>
 #include <linux/init.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* arch_initcall has too random ordering, so call the initializers
    in the right sequence from here. */
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index bf69dbe08bf..373b9afe6d4 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -16,8 +16,7 @@
 #include <asm/io_apic.h>
 #include <linux/irq.h>
 #include <linux/acpi.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 #define PIRQ_SIGNATURE	(('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
 #define PIRQ_VERSION 0x0100
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index b722dd481b3..f1065b129e9 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -3,7 +3,7 @@
  */
 #include <linux/init.h>
 #include <linux/pci.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /*
  * Discover remaining PCI buses in case there are peer host bridges.
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 654a2234f8f..89bf9242c80 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -15,8 +15,7 @@
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
 #include <asm/e820.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* aperture is up to 256MB but BIOS may reserve less */
 #define MMCONFIG_APER_MIN	(2 * 1024*1024)
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index f3c761dce69..8b2d561046a 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/acpi.h>
 #include <asm/e820.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* Assume systems with more busses have correct MCFG */
 #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index a1994163c99..30007ffc8e1 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -10,8 +10,7 @@
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
 #include <asm/e820.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /* Static virtual mapping of the MMCONFIG aperture */
 struct mmcfg_virt {
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 1177845d318..2089354968a 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -7,7 +7,7 @@
 #include <linux/nodemask.h>
 #include <mach_apic.h>
 #include <asm/mpspec.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 #define XQUAD_PORTIO_BASE 0xfe400000
 #define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
index e11e9e803d5..b889d824f7c 100644
--- a/arch/x86/pci/olpc.c
+++ b/arch/x86/pci/olpc.c
@@ -29,7 +29,7 @@
 #include <linux/init.h>
 #include <asm/olpc.h>
 #include <asm/geode.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
 
 /*
  * In the tables below, the first two line (8 longwords) are the
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 37472fc6f72..b82cae970df 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -6,9 +6,8 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
-#include "pci.h"
-#include "pci-functions.h"
-
+#include <asm/pci_x86.h>
+#include <asm/mach-default/pci-functions.h>
 
 /* BIOS32 signature: "_32_" */
 #define BIOS32_SIGNATURE	(('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
deleted file mode 100644
index 1959018aac0..00000000000
--- a/arch/x86/pci/pci.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- *	Low-Level PCI Access for i386 machines.
- *
- *	(c) 1999 Martin Mares <mj@ucw.cz>
- */
-
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(x...) printk(x)
-#else
-#define DBG(x...)
-#endif
-
-#define PCI_PROBE_BIOS		0x0001
-#define PCI_PROBE_CONF1		0x0002
-#define PCI_PROBE_CONF2		0x0004
-#define PCI_PROBE_MMCONF	0x0008
-#define PCI_PROBE_MASK		0x000f
-#define PCI_PROBE_NOEARLY	0x0010
-
-#define PCI_NO_CHECKS		0x0400
-#define PCI_USE_PIRQ_MASK	0x0800
-#define PCI_ASSIGN_ROMS		0x1000
-#define PCI_BIOS_IRQ_SCAN	0x2000
-#define PCI_ASSIGN_ALL_BUSSES	0x4000
-#define PCI_CAN_SKIP_ISA_ALIGN	0x8000
-#define PCI_USE__CRS		0x10000
-#define PCI_CHECK_ENABLE_AMD_MMCONF	0x20000
-#define PCI_HAS_IO_ECS		0x40000
-#define PCI_NOASSIGN_ROMS	0x80000
-
-extern unsigned int pci_probe;
-extern unsigned long pirq_table_addr;
-
-enum pci_bf_sort_state {
-	pci_bf_sort_default,
-	pci_force_nobf,
-	pci_force_bf,
-	pci_dmi_bf,
-};
-
-/* pci-i386.c */
-
-extern unsigned int pcibios_max_latency;
-
-void pcibios_resource_survey(void);
-
-/* pci-pc.c */
-
-extern int pcibios_last_bus;
-extern struct pci_bus *pci_root_bus;
-extern struct pci_ops pci_root_ops;
-
-/* pci-irq.c */
-
-struct irq_info {
-	u8 bus, devfn;			/* Bus, device and function */
-	struct {
-		u8 link;		/* IRQ line ID, chipset dependent, 0=not routed */
-		u16 bitmap;		/* Available IRQs */
-	} __attribute__((packed)) irq[4];
-	u8 slot;			/* Slot number, 0=onboard */
-	u8 rfu;
-} __attribute__((packed));
-
-struct irq_routing_table {
-	u32 signature;			/* PIRQ_SIGNATURE should be here */
-	u16 version;			/* PIRQ_VERSION */
-	u16 size;			/* Table size in bytes */
-	u8 rtr_bus, rtr_devfn;		/* Where the interrupt router lies */
-	u16 exclusive_irqs;		/* IRQs devoted exclusively to PCI usage */
-	u16 rtr_vendor, rtr_device;	/* Vendor and device ID of interrupt router */
-	u32 miniport_data;		/* Crap */
-	u8 rfu[11];
-	u8 checksum;			/* Modulo 256 checksum must give zero */
-	struct irq_info slots[0];
-} __attribute__((packed));
-
-extern unsigned int pcibios_irq_mask;
-
-extern int pcibios_scanned;
-extern spinlock_t pci_config_lock;
-
-extern int (*pcibios_enable_irq)(struct pci_dev *dev);
-extern void (*pcibios_disable_irq)(struct pci_dev *dev);
-
-struct pci_raw_ops {
-	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
-						int reg, int len, u32 *val);
-	int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn,
-						int reg, int len, u32 val);
-};
-
-extern struct pci_raw_ops *raw_pci_ops;
-extern struct pci_raw_ops *raw_pci_ext_ops;
-
-extern struct pci_raw_ops pci_direct_conf1;
-extern bool port_cf9_safe;
-
-/* arch_initcall level */
-extern int pci_direct_probe(void);
-extern void pci_direct_init(int type);
-extern void pci_pcbios_init(void);
-extern int pci_olpc_init(void);
-extern void __init dmi_check_pciprobe(void);
-extern void __init dmi_check_skip_isa_align(void);
-
-/* some common used subsys_initcalls */
-extern int __init pci_acpi_init(void);
-extern int __init pcibios_irq_init(void);
-extern int __init pci_visws_init(void);
-extern int __init pci_numaq_init(void);
-extern int __init pcibios_init(void);
-
-/* pci-mmconfig.c */
-
-extern int __init pci_mmcfg_arch_init(void);
-extern void __init pci_mmcfg_arch_free(void);
-
-/*
- * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
- * on their northbrige except through the * %eax register. As such, you MUST
- * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
- * accessor functions.
- * In fact just use pci_config_*, nothing else please.
- */
-static inline unsigned char mmio_config_readb(void __iomem *pos)
-{
-	u8 val;
-	asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
-	return val;
-}
-
-static inline unsigned short mmio_config_readw(void __iomem *pos)
-{
-	u16 val;
-	asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
-	return val;
-}
-
-static inline unsigned int mmio_config_readl(void __iomem *pos)
-{
-	u32 val;
-	asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
-	return val;
-}
-
-static inline void mmio_config_writeb(void __iomem *pos, u8 val)
-{
-	asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory");
-}
-
-static inline void mmio_config_writew(void __iomem *pos, u16 val)
-{
-	asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory");
-}
-
-static inline void mmio_config_writel(void __iomem *pos, u32 val)
-{
-	asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory");
-}
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 42f4cb19fac..16d0c0eb0d1 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -9,11 +9,10 @@
 #include <linux/init.h>
 
 #include <asm/setup.h>
+#include <asm/pci_x86.h>
 #include <asm/visws/cobalt.h>
 #include <asm/visws/lithium.h>
 
-#include "pci.h"
-
 static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
 static void pci_visws_disable_irq(struct pci_dev *dev) { }
 
-- 
cgit v1.2.3


From 58a24566449892dda409b9ad92c2e56c76c5670c Mon Sep 17 00:00:00 2001
From: Matias Zabaljauregui <zabaljauregui@gmail.com>
Date: Mon, 29 Sep 2008 01:40:07 -0300
Subject: lguest: move the initial guest page table creation code to the host

This patch moves the initial guest page table creation code to the host,
so the launcher keeps working with PAE enabled configs.

Signed-off-by: Matias Zabaljauregui <zabaljauregui@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/lguest/i386_head.S | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 5c7cef34c9e..10b9bd35a8f 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -30,21 +30,6 @@ ENTRY(lguest_entry)
 	movl $lguest_data - __PAGE_OFFSET, %edx
 	int $LGUEST_TRAP_ENTRY
 
-	/* The Host put the toplevel pagetable in lguest_data.pgdir.  The movsl
-	 * instruction uses %esi implicitly as the source for the copy we're
-	 * about to do. */
-	movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi
-
-	/* Copy first 32 entries of page directory to __PAGE_OFFSET entries.
-	 * This means the first 128M of kernel memory will be mapped at
-	 * PAGE_OFFSET where the kernel expects to run.  This will get it far
-	 * enough through boot to switch to its own pagetables. */
-	movl $32, %ecx
-	movl %esi, %edi
-	addl $((__PAGE_OFFSET >> 22) * 4), %edi
-	rep
-	movsl
-
 	/* Set up the initial stack so we can run C code. */
 	movl $(init_thread_union+THREAD_SIZE),%esp
 
-- 
cgit v1.2.3


From 412a1be265b894a45cebbfc2b57eb7a593bf34b2 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 21:44:12 +0530
Subject: x86: amd_iommu_init.c: iommu_enable and iommu_enable_event_logging
 should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:
arch/x86/kernel/amd_iommu_init.c:246:13: warning: symbol 'iommu_enable' was not declared. Should it be static?
arch/x86/kernel/amd_iommu_init.c:259:13: warning: symbol 'iommu_enable_event_logging' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/amd_iommu_init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c625800c55c..fb85e8d466c 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -243,7 +243,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 }
 
 /* Function to enable the hardware */
-void __init iommu_enable(struct amd_iommu *iommu)
+static void __init iommu_enable(struct amd_iommu *iommu)
 {
 	printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
 	       "at %02x:%02x.%x cap 0x%hx\n",
@@ -256,7 +256,7 @@ void __init iommu_enable(struct amd_iommu *iommu)
 }
 
 /* Function to enable IOMMU event logging and event interrupts */
-void __init iommu_enable_event_logging(struct amd_iommu *iommu)
+static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
 {
 	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
-- 
cgit v1.2.3


From 557f687c87ddb8adb094b2dad4e1c83c7717982d Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 21:45:22 +0530
Subject: x86: amd_iommu.c: prealloc_protection_domains should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:
arch/x86/kernel/amd_iommu.c:1299:6: warning: symbol 'prealloc_protection_domains' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2e2da717b35..658e29e0f49 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1296,7 +1296,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
  * we don't need to preallocate the protection domains anymore.
  * For now we have to.
  */
-void prealloc_protection_domains(void)
+static void prealloc_protection_domains(void)
 {
 	struct pci_dev *dev = NULL;
 	struct dma_ops_domain *dma_dom;
-- 
cgit v1.2.3


From 4d08d97f5262dab4482af5bc91b30af4ca02269e Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 22:11:40 +0530
Subject: x86: genx2apic_phys.c: x2apic_send_IPI_self and init_x2apic_ldr
 should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warnings

Fixes sparse warnings:
arch/x86/kernel/genx2apic_phys.c:164:6: warning: symbol 'x2apic_send_IPI_self' was not declared. Should it be static?
arch/x86/kernel/genx2apic_phys.c:169:6: warning: symbol 'init_x2apic_ldr' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/genx2apic_phys.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d042211768b..a177c7880ab 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -123,12 +123,12 @@ static unsigned int phys_pkg_id(int index_msb)
 	return current_cpu_data.initial_apicid >> index_msb;
 }
 
-void x2apic_send_IPI_self(int vector)
+static void x2apic_send_IPI_self(int vector)
 {
 	apic_write(APIC_SELF_IPI, vector);
 }
 
-void init_x2apic_ldr(void)
+static void init_x2apic_ldr(void)
 {
 	return;
 }
-- 
cgit v1.2.3


From c62e9d56ea90ef94f9708ce3f11860c20fa5e135 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 22:12:50 +0530
Subject: x86: bios_uv.c: uv_systab should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:
arch/x86/kernel/bios_uv.c:28:18: warning: symbol 'uv_systab' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/bios_uv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 2a0a2a3cac2..f63882728d9 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -25,7 +25,7 @@
 #include <asm/uv/bios.h>
 #include <asm/uv/uv_hub.h>
 
-struct uv_systab uv_systab;
+static struct uv_systab uv_systab;
 
 s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 {
-- 
cgit v1.2.3


From ec8c842a524888fdcccece337d91798e3e8af880 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Tue, 30 Dec 2008 22:46:36 +0530
Subject: x86: apic.c: xapic_icr_read and x2apic_icr_read should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:
arch/x86/kernel/apic.c:270:5: warning: symbol 'x2apic_icr_read' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/apic.h | 1 -
 arch/x86/kernel/apic.c      | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index e644bf6f90d..ab1d51a8855 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -54,7 +54,6 @@ extern int disable_apic;
 extern int is_vsmp_box(void);
 extern void xapic_wait_icr_idle(void);
 extern u32 safe_xapic_wait_icr_idle(void);
-extern u64 xapic_icr_read(void);
 extern void xapic_icr_write(u32, u32);
 extern int setup_profiling_timer(unsigned int);
 
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index c67722f010b..66198cbe464 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -228,7 +228,7 @@ void xapic_icr_write(u32 low, u32 id)
 	apic_write(APIC_ICR, low);
 }
 
-u64 xapic_icr_read(void)
+static u64 xapic_icr_read(void)
 {
 	u32 icr1, icr2;
 
@@ -268,7 +268,7 @@ void x2apic_icr_write(u32 low, u32 id)
 	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
 }
 
-u64 x2apic_icr_read(void)
+static u64 x2apic_icr_read(void)
 {
 	unsigned long val;
 
-- 
cgit v1.2.3


From fa95826fe0ddbc2a55373134d8d1a21b49d13434 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Tue, 30 Dec 2008 20:13:49 +0530
Subject: x86: uv_bau.h: fix dubious bitfield

Impact: cleanup, avoid sparse warnings

declare bitfield as unsigned to avoid dubious bitfield issue

 CHECK   arch/x86/kernel/tlb_64.c
arch/x86/include/asm/uv/uv_bau.h:136:22: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:138:25: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:140:15: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:143:14: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:146:14: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:149:18: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:151:18: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:155:14: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:159:18: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:173:19: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:181:16: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:185:18: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:188:16: error: dubious one-bit signed bitfield

 CHECK   arch/x86/kernel/tlb_uv.c
arch/x86/include/asm/uv/uv_bau.h:136:22: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:138:25: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:140:15: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:143:14: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:146:14: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:149:18: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:151:18: warning: dubious bitfield without explicit `signed' or `unsigned'
arch/x86/include/asm/uv/uv_bau.h:155:14: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:159:18: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:173:19: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:181:16: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:185:18: error: dubious one-bit signed bitfield
arch/x86/include/asm/uv/uv_bau.h:188:16: error: dubious one-bit signed bitfield

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/uv/uv_bau.h | 46 ++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index e2363253bbb..50423c7b56b 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -133,61 +133,61 @@ struct bau_msg_payload {
  * see table 4.2.3.0.1 in broacast_assist spec.
  */
 struct bau_msg_header {
-	int dest_subnodeid:6;	/* must be zero */
+	unsigned int dest_subnodeid:6;	/* must be zero */
 	/* bits 5:0 */
-	int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */
-	/* bits 20:6 */
-	int command:8;		/* message type */
+	unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */
+	/* bits 20:6 */			  /* first bit in node_map */
+	unsigned int command:8;	/* message type */
 	/* bits 28:21 */
 				/* 0x38: SN3net EndPoint Message */
-	int rsvd_1:3;		/* must be zero */
+	unsigned int rsvd_1:3;	/* must be zero */
 	/* bits 31:29 */
 				/* int will align on 32 bits */
-	int rsvd_2:9;		/* must be zero */
+	unsigned int rsvd_2:9;	/* must be zero */
 	/* bits 40:32 */
 				/* Suppl_A is 56-41 */
-	int payload_2a:8;	/* becomes byte 16 of msg */
+	unsigned int payload_2a:8;/* becomes byte 16 of msg */
 	/* bits 48:41 */	/* not currently using */
-	int payload_2b:8;	/* becomes byte 17 of msg */
+	unsigned int payload_2b:8;/* becomes byte 17 of msg */
 	/* bits 56:49 */	/* not currently using */
 				/* Address field (96:57) is never used as an
 				   address (these are address bits 42:3) */
-	int rsvd_3:1;		/* must be zero */
+	unsigned int rsvd_3:1;	/* must be zero */
 	/* bit 57 */
 				/* address bits 27:4 are payload */
 				/* these 24 bits become bytes 12-14 of msg */
-	int replied_to:1;	/* sent as 0 by the source to byte 12 */
+	unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */
 	/* bit 58 */
 
-	int payload_1a:5;	/* not currently used */
+	unsigned int payload_1a:5;/* not currently used */
 	/* bits 63:59 */
-	int payload_1b:8;	/* not currently used */
+	unsigned int payload_1b:8;/* not currently used */
 	/* bits 71:64 */
-	int payload_1c:8;	/* not currently used */
+	unsigned int payload_1c:8;/* not currently used */
 	/* bits 79:72 */
-	int payload_1d:2;	/* not currently used */
+	unsigned int payload_1d:2;/* not currently used */
 	/* bits 81:80 */
 
-	int rsvd_4:7;		/* must be zero */
+	unsigned int rsvd_4:7;	/* must be zero */
 	/* bits 88:82 */
-	int sw_ack_flag:1;	/* software acknowledge flag */
+	unsigned int sw_ack_flag:1;/* software acknowledge flag */
 	/* bit 89 */
 				/* INTD trasactions at destination are to
 				   wait for software acknowledge */
-	int rsvd_5:6;		/* must be zero */
+	unsigned int rsvd_5:6;	/* must be zero */
 	/* bits 95:90 */
-	int rsvd_6:5;		/* must be zero */
+	unsigned int rsvd_6:5;	/* must be zero */
 	/* bits 100:96 */
-	int int_both:1;		/* if 1, interrupt both sockets on the blade */
+	unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */
 	/* bit 101*/
-	int fairness:3;		/* usually zero */
+	unsigned int fairness:3;/* usually zero */
 	/* bits 104:102 */
-	int multilevel:1;	/* multi-level multicast format */
+	unsigned int multilevel:1;	/* multi-level multicast format */
 	/* bit 105 */
 				/* 0 for TLB: endpoint multi-unicast messages */
-	int chaining:1;		/* next descriptor is part of this activation*/
+	unsigned int chaining:1;/* next descriptor is part of this activation*/
 	/* bit 106 */
-	int rsvd_7:21;		/* must be zero */
+	unsigned int rsvd_7:21;	/* must be zero */
 	/* bits 127:107 */
 };
 
-- 
cgit v1.2.3


From 7820b75643a763abf595c99fab963000ffc8b5f0 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Tue, 30 Dec 2008 22:05:55 +0530
Subject: x86: xsave.c: restore_user_xstate should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:
arch/x86/kernel/xsave.c:162:5: warning: symbol 'restore_user_xstate' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/xsave.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 15c3e699918..2b54fe002e9 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf)
  * Restore the extended state if present. Otherwise, restore the FP/SSE
  * state.
  */
-int restore_user_xstate(void __user *buf)
+static int restore_user_xstate(void __user *buf)
 {
 	struct _fpx_sw_bytes fx_sw_user;
 	u64 mask;
-- 
cgit v1.2.3


From 79741dd35713ff4f6fd0eafd59fa94e8a4ba922d Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 31 Dec 2008 15:11:38 +0100
Subject: [PATCH] idle cputime accounting

The cpu time spent by the idle process actually doing something is
currently accounted as idle time. This is plain wrong, the architectures
that support VIRT_CPU_ACCOUNTING=y can do better: distinguish between the
time spent doing nothing and the time spent by idle doing work. The first
is accounted with account_idle_time and the second with account_system_time.
The architectures that use the account_xxx_time interface directly and not
the account_xxx_ticks interface now need to do the check for the idle
process in their arch code. In particular to improve the system vs true
idle time accounting the arch code needs to measure the true idle time
instead of just testing for the idle process.
To improve the tick based accounting as well we would need an architecture
primitive that can tell us if the pt_regs of the interrupted context
points to the magic instruction that halts the cpu.

In addition idle time is no more added to the stime of the idle process.
This field now contains the system time of the idle process as it should
be. On systems without VIRT_CPU_ACCOUNTING this will always be zero as
every tick that occurs while idle is running will be accounted as idle
time.

This patch contains the necessary common code changes to be able to
distinguish idle system time and true idle time. The architectures with
support for VIRT_CPU_ACCOUNTING need some changes to exploit this.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/x86/xen/time.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c9f7cda48ed..732e52dc991 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -132,8 +132,7 @@ static void do_stolen_accounting(void)
 	*snap = state;
 
 	/* Add the appropriate number of ticks of stolen time,
-	   including any left-overs from last time.  Passing NULL to
-	   account_steal_time accounts the time as stolen. */
+	   including any left-overs from last time. */
 	stolen = runnable + offline + __get_cpu_var(residual_stolen);
 
 	if (stolen < 0)
@@ -141,11 +140,10 @@ static void do_stolen_accounting(void)
 
 	ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
 	__get_cpu_var(residual_stolen) = stolen;
-	account_steal_time(NULL, ticks);
+	account_steal_ticks(ticks);
 
 	/* Add the appropriate number of ticks of blocked time,
-	   including any left-overs from last time.  Passing idle to
-	   account_steal_time accounts the time as idle/wait. */
+	   including any left-overs from last time. */
 	blocked += __get_cpu_var(residual_blocked);
 
 	if (blocked < 0)
@@ -153,7 +151,7 @@ static void do_stolen_accounting(void)
 
 	ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
 	__get_cpu_var(residual_blocked) = blocked;
-	account_steal_time(idle_task(smp_processor_id()), ticks);
+	account_idle_ticks(ticks);
 }
 
 /*
-- 
cgit v1.2.3


From 2786b014ec893c301ea52ef9962e7cc60f89f9b3 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Mon, 22 Sep 2008 16:08:06 +0200
Subject: KVM: x86 emulator: consolidate push reg

This patch consolidate the emulation of push reg instruction.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index ea051173b0d..a391e213fe6 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1415,13 +1415,7 @@ special_insn:
 		emulate_1op("dec", c->dst, ctxt->eflags);
 		break;
 	case 0x50 ... 0x57:  /* push reg */
-		c->dst.type  = OP_MEM;
-		c->dst.bytes = c->op_bytes;
-		c->dst.val = c->src.val;
-		register_address_increment(c, &c->regs[VCPU_REGS_RSP],
-					   -c->op_bytes);
-		c->dst.ptr = (void *) register_address(
-			c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]);
+		emulate_push(ctxt);
 		break;
 	case 0x58 ... 0x5f: /* pop reg */
 	pop_instruction:
-- 
cgit v1.2.3


From a26bf12afb608eb5a96192eaee35fc08ffbf85aa Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:45 +0200
Subject: KVM: VMX: include all IRQ window exits in statistics

irq_window_exits only tracks IRQ window exits due to user space
requests, nmi_window_exits include all exits. The latter makes more
sense, so let's adjust irq_window_exits accounting.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a4018b01e1f..ac3453799c1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2767,6 +2767,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 
 	KVMTRACE_0D(PEND_INTR, vcpu, handler);
+	++vcpu->stat.irq_window_exits;
 
 	/*
 	 * If the user space waits to inject interrupts, exit as soon as
@@ -2775,7 +2776,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	if (kvm_run->request_interrupt_window &&
 	    !vcpu->arch.irq_summary) {
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
-		++vcpu->stat.irq_window_exits;
 		return 0;
 	}
 	return 1;
-- 
cgit v1.2.3


From e4a41889ece6c95f390a7fa3a94255ab62470968 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:46 +0200
Subject: KVM: VMX: Use INTR_TYPE_NMI_INTR instead of magic value

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ac3453799c1..81cf12b8d12 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2492,7 +2492,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
 	}
 
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
+	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
 		return 1;  /* already handled by vmx_vcpu_run() */
 
 	if (is_no_device(intr_info)) {
@@ -3337,7 +3337,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
 	/* We need to handle NMIs before interrupts are enabled */
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
+	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
 	    (intr_info & INTR_INFO_VALID_MASK)) {
 		KVMTRACE_0D(NMI, vcpu, handler);
 		asm("int $2");
-- 
cgit v1.2.3


From 60637aacfd95c368e1fbc2157275d1b621b5dcdd Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:47 +0200
Subject: KVM: VMX: Support for NMI task gates

Properly set GUEST_INTR_STATE_NMI and reset nmi_injected when a
task-switch vmexit happened due to a task gate being used for handling
NMIs. Also avoid the false warning about valid vectoring info in
kvm_handle_exit.

Based on original patch by Gleb Natapov.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 81cf12b8d12..8d0fc68fd4e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2832,6 +2832,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long exit_qualification;
 	u16 tss_selector;
 	int reason;
@@ -2839,6 +2840,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
 	reason = (u32)exit_qualification >> 30;
+	if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
+	    (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+	    (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
+	    == INTR_TYPE_NMI_INTR) {
+		vcpu->arch.nmi_injected = false;
+		if (cpu_has_virtual_nmis())
+			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+				      GUEST_INTR_STATE_NMI);
+	}
 	tss_selector = exit_qualification;
 
 	return kvm_task_switch(vcpu, tss_selector, reason);
@@ -3012,9 +3022,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
 			(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
-			exit_reason != EXIT_REASON_EPT_VIOLATION))
-		printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
-		       "exit reason is 0x%x\n", __func__, exit_reason);
+			exit_reason != EXIT_REASON_EPT_VIOLATION &&
+			exit_reason != EXIT_REASON_TASK_SWITCH))
+		printk(KERN_WARNING "%s: unexpected, valid vectoring info "
+		       "(0x%x) and exit reason is 0x%x\n",
+		       __func__, vectoring_info, exit_reason);
 	if (exit_reason < kvm_vmx_max_exit_handlers
 	    && kvm_vmx_exit_handlers[exit_reason])
 		return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
-- 
cgit v1.2.3


From 448fa4a9c5dbc6941dd19ed09692c588d815bb06 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:48 +0200
Subject: KVM: x86: Reset pending/inject NMI state on CPU reset

CPU reset invalidates pending or already injected NMIs, therefore reset
the related state variables.

Based on original patch by Gleb Natapov.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f1f8ff2f1fa..1a71f673559 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3925,6 +3925,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.nmi_pending = false;
+	vcpu->arch.nmi_injected = false;
+
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-- 
cgit v1.2.3


From 33f089ca5a61f7aead26e8e1866dfc961dd88a9e Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:49 +0200
Subject: KVM: VMX: refactor/fix IRQ and NMI injectability determination

There are currently two ways in VMX to check if an IRQ or NMI can be
injected:
 - vmx_{nmi|irq}_enabled and
 - vcpu.arch.{nmi|interrupt}_window_open.
Even worse, one test (at the end of vmx_vcpu_run) uses an inconsistent,
likely incorrect logic.

This patch consolidates and unifies the tests over
{nmi|interrupt}_window_open as cache + vmx_update_window_states
for updating the cache content.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx.c              | 46 +++++++++++++++++++----------------------
 2 files changed, 22 insertions(+), 25 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8346be87cfa..bfbbdea869b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -327,6 +327,7 @@ struct kvm_vcpu_arch {
 
 	bool nmi_pending;
 	bool nmi_injected;
+	bool nmi_window_open;
 
 	u64 mtrr[0x100];
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8d0fc68fd4e..f0866e1d20e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2362,6 +2362,21 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
 }
 
+static void vmx_update_window_states(struct kvm_vcpu *vcpu)
+{
+	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+
+	vcpu->arch.nmi_window_open =
+		!(guest_intr & (GUEST_INTR_STATE_STI |
+				GUEST_INTR_STATE_MOV_SS |
+				GUEST_INTR_STATE_NMI));
+
+	vcpu->arch.interrupt_window_open =
+		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+		 !(guest_intr & (GUEST_INTR_STATE_STI |
+				 GUEST_INTR_STATE_MOV_SS)));
+}
+
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
 	int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2374,15 +2389,12 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 	kvm_queue_interrupt(vcpu, irq);
 }
 
-
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
 	u32 cpu_based_vm_exec_control;
 
-	vcpu->arch.interrupt_window_open =
-		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
-		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
+	vmx_update_window_states(vcpu);
 
 	if (vcpu->arch.interrupt_window_open &&
 	    vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
@@ -3075,22 +3087,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 }
 
-static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
-{
-	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-	return !(guest_intr & (GUEST_INTR_STATE_NMI |
-			       GUEST_INTR_STATE_MOV_SS |
-			       GUEST_INTR_STATE_STI));
-}
-
-static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
-{
-	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-	return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
-			       GUEST_INTR_STATE_STI)) &&
-		(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
-}
-
 static void enable_intr_window(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.nmi_pending)
@@ -3159,11 +3155,13 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 {
 	update_tpr_threshold(vcpu);
 
+	vmx_update_window_states(vcpu);
+
 	if (cpu_has_virtual_nmis()) {
 		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
 			if (vcpu->arch.interrupt.pending) {
 				enable_nmi_window(vcpu);
-			} else if (vmx_nmi_enabled(vcpu)) {
+			} else if (vcpu->arch.nmi_window_open) {
 				vcpu->arch.nmi_pending = false;
 				vcpu->arch.nmi_injected = true;
 			} else {
@@ -3178,7 +3176,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 		}
 	}
 	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
-		if (vmx_irq_enabled(vcpu))
+		if (vcpu->arch.interrupt_window_open)
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
 		else
 			enable_irq_window(vcpu);
@@ -3339,9 +3337,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vmx->rmode.irq.pending)
 		fixup_rmode_irq(vmx);
 
-	vcpu->arch.interrupt_window_open =
-		(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
-		 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
+	vmx_update_window_states(vcpu);
 
 	asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 	vmx->launched = 1;
-- 
cgit v1.2.3


From f460ee43e250b675376246b1c4c9fe9b9af4ab16 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:50 +0200
Subject: KVM: VMX: refactor IRQ and NMI window enabling

do_interrupt_requests and vmx_intr_assist go different way for
achieving the same: enabling the nmi/irq window start notification.
Unify their code over enable_{irq|nmi}_window, get rid of a redundant
call to enable_intr_window instead of direct enable_nmi_window
invocation and unroll enable_intr_window for both in-kernel and user
space irq injection accordingly.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 78 ++++++++++++++++++++++--------------------------------
 1 file changed, 32 insertions(+), 46 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f0866e1d20e..440f56cd4bd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2389,30 +2389,42 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 	kvm_queue_interrupt(vcpu, irq);
 }
 
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
-				       struct kvm_run *kvm_run)
+static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	u32 cpu_based_vm_exec_control;
 
-	vmx_update_window_states(vcpu);
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
 
-	if (vcpu->arch.interrupt_window_open &&
-	    vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
-		kvm_do_inject_irq(vcpu);
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
 
-	if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending)
-		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+	if (!cpu_has_virtual_nmis())
+		return;
 
 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static void do_interrupt_requests(struct kvm_vcpu *vcpu,
+				       struct kvm_run *kvm_run)
+{
+	vmx_update_window_states(vcpu);
+
+	if (vcpu->arch.interrupt_window_open) {
+		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
+			kvm_do_inject_irq(vcpu);
+
+		if (vcpu->arch.interrupt.pending)
+			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+	}
 	if (!vcpu->arch.interrupt_window_open &&
 	    (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
-		/*
-		 * Interrupts blocked.  Wait for unblock.
-		 */
-		cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-	else
-		cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+		enable_irq_window(vcpu);
 }
 
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -3066,35 +3078,6 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu)
 	vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
 }
 
-static void enable_irq_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	if (!cpu_has_virtual_nmis())
-		return;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_intr_window(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->arch.nmi_pending)
-		enable_nmi_window(vcpu);
-	else if (kvm_cpu_has_interrupt(vcpu))
-		enable_irq_window(vcpu);
-}
-
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 {
 	u32 exit_intr_info;
@@ -3165,13 +3148,16 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 				vcpu->arch.nmi_pending = false;
 				vcpu->arch.nmi_injected = true;
 			} else {
-				enable_intr_window(vcpu);
+				enable_nmi_window(vcpu);
 				return;
 			}
 		}
 		if (vcpu->arch.nmi_injected) {
 			vmx_inject_nmi(vcpu);
-			enable_intr_window(vcpu);
+			if (vcpu->arch.nmi_pending)
+				enable_nmi_window(vcpu);
+			else if (kvm_cpu_has_interrupt(vcpu))
+				enable_irq_window(vcpu);
 			return;
 		}
 	}
-- 
cgit v1.2.3


From 66a5a347c2690db4c0756524a8eb5a05e0437aa8 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:51 +0200
Subject: KVM: VMX: fix real-mode NMI support

Fix NMI injection in real-mode with the same pattern we perform IRQ
injection.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 440f56cd4bd..38d13856661 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2358,6 +2358,19 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (vcpu->arch.rmode.active) {
+		vmx->rmode.irq.pending = true;
+		vmx->rmode.irq.vector = NMI_VECTOR;
+		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
+		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+			     NMI_VECTOR | INTR_TYPE_SOFT_INTR |
+			     INTR_INFO_VALID_MASK);
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+		kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
+		return;
+	}
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
 }
-- 
cgit v1.2.3


From 23930f9521c9c4d4aa96cdb9d1e1703f3782bb94 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:52 +0200
Subject: KVM: x86: Enable NMI Watchdog via in-kernel PIT source

LINT0 of the LAPIC can be used to route PIT events as NMI watchdog ticks
into the guest. This patch aligns the in-kernel irqchip emulation with
the user space irqchip with already supports this feature. The trick is
to route PIT interrupts to all LAPIC's LVT0 lines.

Rebased and slightly polished patch originally posted by Sheng Yang.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8254.c | 15 +++++++++++++++
 arch/x86/kvm/irq.h   |  1 +
 arch/x86/kvm/lapic.c | 34 +++++++++++++++++++++++++++++-----
 3 files changed, 45 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 59ebd37ad79..580cc1d01c7 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -603,10 +603,25 @@ void kvm_free_pit(struct kvm *kvm)
 
 static void __inject_pit_timer_intr(struct kvm *kvm)
 {
+	struct kvm_vcpu *vcpu;
+	int i;
+
 	mutex_lock(&kvm->lock);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
 	mutex_unlock(&kvm->lock);
+
+	/*
+	 * Provides NMI watchdog support in IOAPIC mode.
+	 * The route is: PIT -> PIC -> LVT0 in NMI mode,
+	 * timer IRQs will continue to flow through the IOAPIC.
+	 */
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		vcpu = kvm->vcpus[i];
+		if (!vcpu)
+			continue;
+		kvm_apic_local_deliver(vcpu, APIC_LVT0);
+	}
 }
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index f17c8f5bbf3..71e37a530cf 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -87,6 +87,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s);
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
+int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0fc3cab4894..206cc11a1c9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -380,6 +380,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		}
 		break;
 
+	case APIC_DM_EXTINT:
+		/*
+		 * Should only be called by kvm_apic_local_deliver() with LVT0,
+		 * before NMI watchdog was enabled. Already handled by
+		 * kvm_apic_accept_pic_intr().
+		 */
+		break;
+
 	default:
 		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
 		       delivery_mode);
@@ -743,10 +751,13 @@ static void apic_mmio_write(struct kvm_io_device *this,
 		apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
 		break;
 
+	case APIC_LVT0:
+		if (val == APIC_DM_NMI)
+			apic_debug("Receive NMI setting on APIC_LVT0 "
+				"for cpu %d\n", apic->vcpu->vcpu_id);
 	case APIC_LVTT:
 	case APIC_LVTTHMR:
 	case APIC_LVTPC:
-	case APIC_LVT0:
 	case APIC_LVT1:
 	case APIC_LVTERR:
 		/* TODO: Check vector */
@@ -961,12 +972,25 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static int __inject_apic_timer_irq(struct kvm_lapic *apic)
+int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type)
 {
-	int vector;
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	int vector, mode, trig_mode;
+	u32 reg;
+
+	if (apic && apic_enabled(apic)) {
+		reg = apic_get_reg(apic, lvt_type);
+		vector = reg & APIC_VECTOR_MASK;
+		mode = reg & APIC_MODE_MASK;
+		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
+		return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
+	}
+	return 0;
+}
 
-	vector = apic_lvt_vector(apic, APIC_LVTT);
-	return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
+static inline int __inject_apic_timer_irq(struct kvm_lapic *apic)
+{
+	return kvm_apic_local_deliver(apic->vcpu, APIC_LVTT);
 }
 
 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
-- 
cgit v1.2.3


From 0496fbb973ccc9477082e859ed0faab5acb805ba Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:53 +0200
Subject: KVM: x86: VCPU with pending NMI is runnabled

Ensure that a VCPU with pending NMIs is considered runnable.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1a71f673559..1fa9a6db633 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4130,7 +4130,8 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
-	       || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
+	       || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
+	       || vcpu->arch.nmi_pending;
 }
 
 static void vcpu_kick_intr(void *info)
-- 
cgit v1.2.3


From 26df99c6c5807115f06d4e1abae397b7f5f3e00c Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:54 +0200
Subject: KVM: Kick NMI receiving VCPU

Kick the NMI receiving VCPU in case the triggering caller runs in a
different context.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 206cc11a1c9..304f9ddbdd5 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -354,6 +354,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
 	case APIC_DM_NMI:
 		kvm_inject_nmi(vcpu);
+		kvm_vcpu_kick(vcpu);
 		break;
 
 	case APIC_DM_INIT:
-- 
cgit v1.2.3


From c4abb7c9cde24b7351a47328ef866e6a2bbb1ad0 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:55 +0200
Subject: KVM: x86: Support for user space injected NMIs

Introduces the KVM_NMI IOCTL to the generic x86 part of KVM for
injecting NMIs from user space and also extends the statistic report
accordingly.

Based on the original patch by Sheng Yang.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c              | 46 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 46 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bfbbdea869b..a40fa847892 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -398,6 +398,7 @@ struct kvm_vcpu_stat {
 	u32 halt_exits;
 	u32 halt_wakeup;
 	u32 request_irq_exits;
+	u32 request_nmi_exits;
 	u32 irq_exits;
 	u32 host_state_reload;
 	u32 efer_reload;
@@ -406,6 +407,7 @@ struct kvm_vcpu_stat {
 	u32 insn_emulation_fail;
 	u32 hypercalls;
 	u32 irq_injections;
+	u32 nmi_injections;
 };
 
 struct descriptor_table {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1fa9a6db633..07971451b94 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -86,6 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "hypercalls", VCPU_STAT(hypercalls) },
 	{ "request_irq", VCPU_STAT(request_irq_exits) },
+	{ "request_nmi", VCPU_STAT(request_nmi_exits) },
 	{ "irq_exits", VCPU_STAT(irq_exits) },
 	{ "host_state_reload", VCPU_STAT(host_state_reload) },
 	{ "efer_reload", VCPU_STAT(efer_reload) },
@@ -93,6 +94,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "insn_emulation", VCPU_STAT(insn_emulation) },
 	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
 	{ "irq_injections", VCPU_STAT(irq_injections) },
+	{ "nmi_injections", VCPU_STAT(nmi_injections) },
 	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
 	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
 	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -1318,6 +1320,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
+{
+	vcpu_load(vcpu);
+	kvm_inject_nmi(vcpu);
+	vcpu_put(vcpu);
+
+	return 0;
+}
+
 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
 					   struct kvm_tpr_access_ctl *tac)
 {
@@ -1377,6 +1388,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_NMI: {
+		r = kvm_vcpu_ioctl_nmi(vcpu);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
 	case KVM_SET_CPUID: {
 		struct kvm_cpuid __user *cpuid_arg = argp;
 		struct kvm_cpuid cpuid;
@@ -2812,18 +2830,37 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
 }
 
+/*
+ * Check if userspace requested a NMI window, and that the NMI window
+ * is open.
+ *
+ * No need to exit to userspace if we already have a NMI queued.
+ */
+static int dm_request_for_nmi_injection(struct kvm_vcpu *vcpu,
+					struct kvm_run *kvm_run)
+{
+	return (!vcpu->arch.nmi_pending &&
+		kvm_run->request_nmi_window &&
+		vcpu->arch.nmi_window_open);
+}
+
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 			      struct kvm_run *kvm_run)
 {
 	kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
-	if (irqchip_in_kernel(vcpu->kvm))
+	if (irqchip_in_kernel(vcpu->kvm)) {
 		kvm_run->ready_for_interrupt_injection = 1;
-	else
+		kvm_run->ready_for_nmi_injection = 1;
+	} else {
 		kvm_run->ready_for_interrupt_injection =
 					(vcpu->arch.interrupt_window_open &&
 					 vcpu->arch.irq_summary == 0);
+		kvm_run->ready_for_nmi_injection =
+					(vcpu->arch.nmi_window_open &&
+					 vcpu->arch.nmi_pending == 0);
+	}
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
@@ -2999,6 +3036,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 
 		if (r > 0) {
+			if (dm_request_for_nmi_injection(vcpu, kvm_run)) {
+				r = -EINTR;
+				kvm_run->exit_reason = KVM_EXIT_NMI;
+				++vcpu->stat.request_nmi_exits;
+			}
 			if (dm_request_for_irq_injection(vcpu, kvm_run)) {
 				r = -EINTR;
 				kvm_run->exit_reason = KVM_EXIT_INTR;
-- 
cgit v1.2.3


From 487b391d6ea9b1d0e2e0440466fb3130e78c98d9 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:56 +0200
Subject: KVM: VMX: Provide support for user space injected NMIs

This patch adds the required bits to the VMX side for user space
injected NMIs. As with the preexisting in-kernel irqchip support, the
CPU must provide the "virtual NMI" feature for proper tracking of the
NMI blocking state.

Based on the original patch by Sheng Yang.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 38d13856661..f16a62c7926 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2360,6 +2360,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	++vcpu->stat.nmi_injections;
 	if (vcpu->arch.rmode.active) {
 		vmx->rmode.irq.pending = true;
 		vmx->rmode.irq.vector = NMI_VECTOR;
@@ -2428,6 +2429,30 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 {
 	vmx_update_window_states(vcpu);
 
+	if (cpu_has_virtual_nmis()) {
+		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+			if (vcpu->arch.nmi_window_open) {
+				vcpu->arch.nmi_pending = false;
+				vcpu->arch.nmi_injected = true;
+			} else {
+				enable_nmi_window(vcpu);
+				return;
+			}
+		}
+		if (vcpu->arch.nmi_injected) {
+			vmx_inject_nmi(vcpu);
+			if (vcpu->arch.nmi_pending
+			    || kvm_run->request_nmi_window)
+				enable_nmi_window(vcpu);
+			else if (vcpu->arch.irq_summary
+				 || kvm_run->request_interrupt_window)
+				enable_irq_window(vcpu);
+			return;
+		}
+		if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window)
+			enable_nmi_window(vcpu);
+	}
+
 	if (vcpu->arch.interrupt_window_open) {
 		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
 			kvm_do_inject_irq(vcpu);
@@ -2959,6 +2984,14 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 	++vcpu->stat.nmi_window_exits;
 
+	/*
+	 * If the user space waits to inject a NMI, exit as soon as possible
+	 */
+	if (kvm_run->request_nmi_window && !vcpu->arch.nmi_pending) {
+		kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
+		return 0;
+	}
+
 	return 1;
 }
 
-- 
cgit v1.2.3


From 3b86cd9967242f3f3d775ee015fb814a349ed5e6 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:57 +0200
Subject: KVM: VMX: work around lacking VNMI support

Older VMX supporting CPUs do not provide the "Virtual NMI" feature for
tracking the NMI-blocked state after injecting such events. For now
KVM is unable to inject NMIs on those CPUs.

Derived from Sheng Yang's suggestion to use the IRQ window notification
for detecting the end of NMI handlers, this patch implements virtual
NMI support without impact on the host's ability to receive real NMIs.
The downside is that the given approach requires some heuristics that
can cause NMI nesting in vary rare corner cases.

The approach works as follows:
 - inject NMI and set a software-based NMI-blocked flag
 - arm the IRQ window start notification whenever an NMI window is
   requested
 - if the guest exits due to an opening IRQ window, clear the emulated
   NMI-blocked flag
 - if the guest net execution time with NMI-blocked but without an IRQ
   window exceeds 1 second, force NMI-blocked reset and inject anyway

This approach covers most practical scenarios:
 - succeeding NMIs are seperated by at least one open IRQ window
 - the guest may spin with IRQs disabled (e.g. due to a bug), but
   leaving the NMI handler takes much less time than one second
 - the guest does not rely on strict ordering or timing of NMIs
   (would be problematic in virtualized environments anyway)

Successfully tested with the 'nmi n' monitor command, the kgdbts
testsuite on smp guests (additional patches required to add debug
register support to kvm) + the kernel's nmi_watchdog=1, and a Siemens-
specific board emulation (+ guest) that comes with its own NMI
watchdog mechanism.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 174 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 115 insertions(+), 59 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f16a62c7926..2180109d794 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -90,6 +90,11 @@ struct vcpu_vmx {
 	} rmode;
 	int vpid;
 	bool emulation_required;
+
+	/* Support for vnmi-less CPUs */
+	int soft_vnmi_blocked;
+	ktime_t entry_time;
+	s64 vnmi_blocked_time;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -2230,6 +2235,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
 	vmx->vcpu.arch.rmode.active = 0;
 
+	vmx->soft_vnmi_blocked = 0;
+
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	kvm_set_cr8(&vmx->vcpu, 0);
 	msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -2335,6 +2342,29 @@ out:
 	return ret;
 }
 
+static void enable_irq_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	if (!cpu_has_virtual_nmis()) {
+		enable_irq_window(vcpu);
+		return;
+	}
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2360,6 +2390,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	if (!cpu_has_virtual_nmis()) {
+		/*
+		 * Tracking the NMI-blocked state in software is built upon
+		 * finding the next open IRQ window. This, in turn, depends on
+		 * well-behaving guests: They have to keep IRQs disabled at
+		 * least as long as the NMI handler runs. Otherwise we may
+		 * cause NMI nesting, maybe breaking the guest. But as this is
+		 * highly unlikely, we can live with the residual risk.
+		 */
+		vmx->soft_vnmi_blocked = 1;
+		vmx->vnmi_blocked_time = 0;
+	}
+
 	++vcpu->stat.nmi_injections;
 	if (vcpu->arch.rmode.active) {
 		vmx->rmode.irq.pending = true;
@@ -2384,6 +2427,8 @@ static void vmx_update_window_states(struct kvm_vcpu *vcpu)
 		!(guest_intr & (GUEST_INTR_STATE_STI |
 				GUEST_INTR_STATE_MOV_SS |
 				GUEST_INTR_STATE_NMI));
+	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
+		vcpu->arch.nmi_window_open = 0;
 
 	vcpu->arch.interrupt_window_open =
 		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -2403,55 +2448,31 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 	kvm_queue_interrupt(vcpu, irq);
 }
 
-static void enable_irq_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	if (!cpu_has_virtual_nmis())
-		return;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
 	vmx_update_window_states(vcpu);
 
-	if (cpu_has_virtual_nmis()) {
-		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-			if (vcpu->arch.nmi_window_open) {
-				vcpu->arch.nmi_pending = false;
-				vcpu->arch.nmi_injected = true;
-			} else {
-				enable_nmi_window(vcpu);
-				return;
-			}
-		}
-		if (vcpu->arch.nmi_injected) {
-			vmx_inject_nmi(vcpu);
-			if (vcpu->arch.nmi_pending
-			    || kvm_run->request_nmi_window)
-				enable_nmi_window(vcpu);
-			else if (vcpu->arch.irq_summary
-				 || kvm_run->request_interrupt_window)
-				enable_irq_window(vcpu);
+	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+		if (vcpu->arch.nmi_window_open) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+		} else {
+			enable_nmi_window(vcpu);
 			return;
 		}
-		if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window)
+	}
+	if (vcpu->arch.nmi_injected) {
+		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.nmi_pending || kvm_run->request_nmi_window)
 			enable_nmi_window(vcpu);
+		else if (vcpu->arch.irq_summary
+			 || kvm_run->request_interrupt_window)
+			enable_irq_window(vcpu);
+		return;
 	}
+	if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window)
+		enable_nmi_window(vcpu);
 
 	if (vcpu->arch.interrupt_window_open) {
 		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
@@ -3097,6 +3118,37 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		printk(KERN_WARNING "%s: unexpected, valid vectoring info "
 		       "(0x%x) and exit reason is 0x%x\n",
 		       __func__, vectoring_info, exit_reason);
+
+	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
+		if (vcpu->arch.interrupt_window_open) {
+			vmx->soft_vnmi_blocked = 0;
+			vcpu->arch.nmi_window_open = 1;
+		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
+		    (kvm_run->request_nmi_window || vcpu->arch.nmi_pending)) {
+			/*
+			 * This CPU don't support us in finding the end of an
+			 * NMI-blocked window if the guest runs with IRQs
+			 * disabled. So we pull the trigger after 1 s of
+			 * futile waiting, but inform the user about this.
+			 */
+			printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
+			       "state on VCPU %d after 1 s timeout\n",
+			       __func__, vcpu->vcpu_id);
+			vmx->soft_vnmi_blocked = 0;
+			vmx->vcpu.arch.nmi_window_open = 1;
+		}
+
+		/*
+		 * If the user space waits to inject an NNI, exit ASAP
+		 */
+		if (vcpu->arch.nmi_window_open && kvm_run->request_nmi_window
+		    && !vcpu->arch.nmi_pending) {
+			kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
+			++vcpu->stat.nmi_window_exits;
+			return 0;
+		}
+	}
+
 	if (exit_reason < kvm_vmx_max_exit_handlers
 	    && kvm_vmx_exit_handlers[exit_reason])
 		return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -3146,7 +3198,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 		if (unblock_nmi && vector != DF_VECTOR)
 			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 				      GUEST_INTR_STATE_NMI);
-	}
+	} else if (unlikely(vmx->soft_vnmi_blocked))
+		vmx->vnmi_blocked_time +=
+			ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
 
 	idt_vectoring_info = vmx->idt_vectoring_info;
 	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@@ -3186,27 +3240,25 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 
 	vmx_update_window_states(vcpu);
 
-	if (cpu_has_virtual_nmis()) {
-		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-			if (vcpu->arch.interrupt.pending) {
-				enable_nmi_window(vcpu);
-			} else if (vcpu->arch.nmi_window_open) {
-				vcpu->arch.nmi_pending = false;
-				vcpu->arch.nmi_injected = true;
-			} else {
-				enable_nmi_window(vcpu);
-				return;
-			}
-		}
-		if (vcpu->arch.nmi_injected) {
-			vmx_inject_nmi(vcpu);
-			if (vcpu->arch.nmi_pending)
-				enable_nmi_window(vcpu);
-			else if (kvm_cpu_has_interrupt(vcpu))
-				enable_irq_window(vcpu);
+	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+		if (vcpu->arch.interrupt.pending) {
+			enable_nmi_window(vcpu);
+		} else if (vcpu->arch.nmi_window_open) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+		} else {
+			enable_nmi_window(vcpu);
 			return;
 		}
 	}
+	if (vcpu->arch.nmi_injected) {
+		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.nmi_pending)
+			enable_nmi_window(vcpu);
+		else if (kvm_cpu_has_interrupt(vcpu))
+			enable_irq_window(vcpu);
+		return;
+	}
 	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
 		if (vcpu->arch.interrupt_window_open)
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
@@ -3255,6 +3307,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 intr_info;
 
+	/* Record the guest's net vcpu time for enforced NMI injections. */
+	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
+		vmx->entry_time = ktime_get();
+
 	/* Handle invalid guest state instead of entering VMX */
 	if (vmx->emulation_required && emulate_invalid_guest_state) {
 		handle_invalid_guest_state(vcpu, kvm_run);
-- 
cgit v1.2.3


From 5f179287fa02723215eecf681d812b303c243973 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 7 Oct 2008 15:42:33 +0200
Subject: KVM: call kvm_arch_vcpu_reset() instead of the kvm_x86_ops callback

Call kvm_arch_vcpu_reset() instead of directly using arch callback.
The function does additional things.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 07971451b94..a2c4b559455 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3010,7 +3010,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		pr_debug("vcpu %d received sipi with vector # %x\n",
 			 vcpu->vcpu_id, vcpu->arch.sipi_vector);
 		kvm_lapic_reset(vcpu);
-		r = kvm_x86_ops->vcpu_reset(vcpu);
+		r = kvm_arch_vcpu_reset(vcpu);
 		if (r)
 			return r;
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-- 
cgit v1.2.3


From b558bc0a25c82ef2a9d2683b0beb3e4b87cea20b Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 9 Oct 2008 16:01:52 +0800
Subject: x86: Rename mtrr_state struct and macro names

Prepare for exporting them.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kernel/cpu/mtrr/generic.c | 8 ++++----
 arch/x86/kernel/cpu/mtrr/main.c    | 4 ++--
 arch/x86/kernel/cpu/mtrr/mtrr.h    | 7 ++++---
 3 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 4e8d77f01ee..90db91e1593 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,9 +14,9 @@
 #include <asm/pat.h>
 #include "mtrr.h"
 
-struct mtrr_state {
-	struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
-	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
+struct mtrr_state_type {
+	struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
+	mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
 	unsigned char enabled;
 	unsigned char have_fixed;
 	mtrr_type def_type;
@@ -35,7 +35,7 @@ static struct fixed_range_block fixed_range_blocks[] = {
 };
 
 static unsigned long smp_changes_mask;
-static struct mtrr_state mtrr_state = {};
+static struct mtrr_state_type mtrr_state = {};
 static int mtrr_state_set;
 u64 mtrr_tom2;
 
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 1159e269e59..d6ec7ec3027 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -49,7 +49,7 @@
 
 u32 num_var_ranges = 0;
 
-unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
 static DEFINE_MUTEX(mtrr_mutex);
 
 u64 size_or_mask, size_and_mask;
@@ -574,7 +574,7 @@ struct mtrr_value {
 	unsigned long	lsize;
 };
 
-static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
+static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
 
 static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
 {
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2dc4ec656b2..988538202dd 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -11,8 +11,9 @@
 #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
 #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
 
-#define NUM_FIXED_RANGES 88
-#define MAX_VAR_RANGES 256
+#define MTRR_NUM_FIXED_RANGES 88
+#define MTRR_MAX_VAR_RANGES 256
+
 #define MTRRfix64K_00000_MSR 0x250
 #define MTRRfix16K_80000_MSR 0x258
 #define MTRRfix16K_A0000_MSR 0x259
@@ -33,7 +34,7 @@
    an 8 bit field: */
 typedef u8 mtrr_type;
 
-extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
 
 struct mtrr_ops {
 	u32	vendor;
-- 
cgit v1.2.3


From 932d27a7913fc6b3c64c6e6082628b0a1561dec9 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 9 Oct 2008 16:01:53 +0800
Subject: x86: Export some definition of MTRR

For KVM can reuse the type define, and need them to support shadow MTRR.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/mtrr.h        | 25 +++++++++++++++++++++++++
 arch/x86/kernel/cpu/mtrr/generic.c | 12 +++---------
 arch/x86/kernel/cpu/mtrr/mtrr.h    | 17 -----------------
 3 files changed, 28 insertions(+), 26 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 7c1e4258b31..cb988aab716 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -57,6 +57,31 @@ struct mtrr_gentry {
 };
 #endif /* !__i386__ */
 
+struct mtrr_var_range {
+	u32 base_lo;
+	u32 base_hi;
+	u32 mask_lo;
+	u32 mask_hi;
+};
+
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+   an 8 bit field: */
+typedef u8 mtrr_type;
+
+#define MTRR_NUM_FIXED_RANGES 88
+#define MTRR_MAX_VAR_RANGES 256
+
+struct mtrr_state_type {
+	struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
+	mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
+	unsigned char enabled;
+	unsigned char have_fixed;
+	mtrr_type def_type;
+};
+
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+
 /*  These are the various ioctls  */
 #define MTRRIOC_ADD_ENTRY        _IOW(MTRR_IOCTL_BASE,  0, struct mtrr_sentry)
 #define MTRRIOC_SET_ENTRY        _IOW(MTRR_IOCTL_BASE,  1, struct mtrr_sentry)
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 90db91e1593..b59ddcc88cd 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,14 +14,6 @@
 #include <asm/pat.h>
 #include "mtrr.h"
 
-struct mtrr_state_type {
-	struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
-	mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
-	unsigned char enabled;
-	unsigned char have_fixed;
-	mtrr_type def_type;
-};
-
 struct fixed_range_block {
 	int base_msr; /* start address of an MTRR block */
 	int ranges;   /* number of MTRRs in this block  */
@@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = {
 };
 
 static unsigned long smp_changes_mask;
-static struct mtrr_state_type mtrr_state = {};
 static int mtrr_state_set;
 u64 mtrr_tom2;
 
+struct mtrr_state_type mtrr_state = {};
+EXPORT_SYMBOL_GPL(mtrr_state);
+
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "mtrr."
 
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 988538202dd..ffd60409cc6 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -8,12 +8,6 @@
 #define MTRRcap_MSR     0x0fe
 #define MTRRdefType_MSR 0x2ff
 
-#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
-#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
-
-#define MTRR_NUM_FIXED_RANGES 88
-#define MTRR_MAX_VAR_RANGES 256
-
 #define MTRRfix64K_00000_MSR 0x250
 #define MTRRfix16K_80000_MSR 0x258
 #define MTRRfix16K_A0000_MSR 0x259
@@ -30,10 +24,6 @@
 #define MTRR_CHANGE_MASK_VARIABLE  0x02
 #define MTRR_CHANGE_MASK_DEFTYPE   0x04
 
-/* In the Intel processor's MTRR interface, the MTRR type is always held in
-   an 8 bit field: */
-typedef u8 mtrr_type;
-
 extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
 
 struct mtrr_ops {
@@ -71,13 +61,6 @@ struct set_mtrr_context {
 	u32 ccr3;
 };
 
-struct mtrr_var_range {
-	u32 base_lo;
-	u32 base_hi;
-	u32 mask_lo;
-	u32 mask_hi;
-};
-
 void set_mtrr_done(struct set_mtrr_context *ctxt);
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
-- 
cgit v1.2.3


From 0bed3b568b68e5835ef5da888a372b9beabf7544 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 9 Oct 2008 16:01:54 +0800
Subject: KVM: Improve MTRR structure

As well as reset mmu context when set MTRR.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  5 +++-
 arch/x86/kvm/x86.c              | 61 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 63 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a40fa847892..8082e87f628 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -21,6 +21,7 @@
 
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
+#include <asm/mtrr.h>
 
 #define KVM_MAX_VCPUS 16
 #define KVM_MEMORY_SLOTS 32
@@ -86,6 +87,7 @@
 #define KVM_MIN_FREE_MMU_PAGES 5
 #define KVM_REFILL_PAGES 25
 #define KVM_MAX_CPUID_ENTRIES 40
+#define KVM_NR_FIXED_MTRR_REGION 88
 #define KVM_NR_VAR_MTRR 8
 
 extern spinlock_t kvm_lock;
@@ -329,7 +331,8 @@ struct kvm_vcpu_arch {
 	bool nmi_injected;
 	bool nmi_window_open;
 
-	u64 mtrr[0x100];
+	struct mtrr_state_type mtrr_state;
+	u32 pat;
 };
 
 struct kvm_mem_alias {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a2c4b559455..f5b2334c6bd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -39,6 +39,7 @@
 #include <asm/uaccess.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
+#include <asm/mtrr.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS						\
@@ -650,10 +651,38 @@ static bool msr_mtrr_valid(unsigned msr)
 
 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
+	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
 	if (!msr_mtrr_valid(msr))
 		return 1;
 
-	vcpu->arch.mtrr[msr - 0x200] = data;
+	if (msr == MSR_MTRRdefType) {
+		vcpu->arch.mtrr_state.def_type = data;
+		vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
+	} else if (msr == MSR_MTRRfix64K_00000)
+		p[0] = data;
+	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+		p[1 + msr - MSR_MTRRfix16K_80000] = data;
+	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+		p[3 + msr - MSR_MTRRfix4K_C0000] = data;
+	else if (msr == MSR_IA32_CR_PAT)
+		vcpu->arch.pat = data;
+	else {	/* Variable MTRRs */
+		int idx, is_mtrr_mask;
+		u64 *pt;
+
+		idx = (msr - 0x200) / 2;
+		is_mtrr_mask = msr - 0x200 - 2 * idx;
+		if (!is_mtrr_mask)
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+		else
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+		*pt = data;
+	}
+
+	kvm_mmu_reset_context(vcpu);
 	return 0;
 }
 
@@ -749,10 +778,37 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
 static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
+	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
 	if (!msr_mtrr_valid(msr))
 		return 1;
 
-	*pdata = vcpu->arch.mtrr[msr - 0x200];
+	if (msr == MSR_MTRRdefType)
+		*pdata = vcpu->arch.mtrr_state.def_type +
+			 (vcpu->arch.mtrr_state.enabled << 10);
+	else if (msr == MSR_MTRRfix64K_00000)
+		*pdata = p[0];
+	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+		*pdata = p[1 + msr - MSR_MTRRfix16K_80000];
+	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+		*pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
+	else if (msr == MSR_IA32_CR_PAT)
+		*pdata = vcpu->arch.pat;
+	else {	/* Variable MTRRs */
+		int idx, is_mtrr_mask;
+		u64 *pt;
+
+		idx = (msr - 0x200) / 2;
+		is_mtrr_mask = msr - 0x200 - 2 * idx;
+		if (!is_mtrr_mask)
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+		else
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+		*pdata = *pt;
+	}
+
 	return 0;
 }
 
@@ -3942,6 +3998,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	/* We do fxsave: this must be aligned. */
 	BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
 
+	vcpu->arch.mtrr_state.have_fixed = 1;
 	vcpu_load(vcpu);
 	r = kvm_arch_vcpu_reset(vcpu);
 	if (r == 0)
-- 
cgit v1.2.3


From 468d472f3f65100d5fb88c8d45043c85b874c294 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 9 Oct 2008 16:01:55 +0800
Subject: KVM: VMX: Add PAT support for EPT

GUEST_PAT support is a new feature introduced by Intel Core i7 architecture.
With this, cpu would save/load guest and host PAT automatically, for EPT memory
type in guest depends on MSR_IA32_CR_PAT.

Also add save/restore for MSR_IA32_CR_PAT.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 29 ++++++++++++++++++++++++++---
 arch/x86/kvm/vmx.h |  7 +++++++
 arch/x86/kvm/x86.c |  2 +-
 3 files changed, 34 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2180109d794..b4c95a501cc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -962,6 +962,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 		pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
 
 		break;
+	case MSR_IA32_CR_PAT:
+		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+			vmcs_write64(GUEST_IA32_PAT, data);
+			vcpu->arch.pat = data;
+			break;
+		}
+		/* Otherwise falls through to kvm_set_msr_common */
 	default:
 		vmx_load_host_state(vmx);
 		msr = find_msr_entry(vmx, msr_index);
@@ -1181,12 +1188,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 #ifdef CONFIG_X86_64
 	min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
 #endif
-	opt = 0;
+	opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
 				&_vmexit_control) < 0)
 		return -EIO;
 
-	min = opt = 0;
+	min = 0;
+	opt = VM_ENTRY_LOAD_IA32_PAT;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
 				&_vmentry_control) < 0)
 		return -EIO;
@@ -2092,8 +2100,9 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
  */
 static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 {
-	u32 host_sysenter_cs;
+	u32 host_sysenter_cs, msr_low, msr_high;
 	u32 junk;
+	u64 host_pat;
 	unsigned long a;
 	struct descriptor_table dt;
 	int i;
@@ -2181,6 +2190,20 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	rdmsrl(MSR_IA32_SYSENTER_EIP, a);
 	vmcs_writel(HOST_IA32_SYSENTER_EIP, a);   /* 22.2.3 */
 
+	if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
+		rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
+		host_pat = msr_low | ((u64) msr_high << 32);
+		vmcs_write64(HOST_IA32_PAT, host_pat);
+	}
+	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+		rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
+		host_pat = msr_low | ((u64) msr_high << 32);
+		/* Write the default value follow host pat */
+		vmcs_write64(GUEST_IA32_PAT, host_pat);
+		/* Keep arch.pat sync with GUEST_IA32_PAT */
+		vmx->vcpu.arch.pat = host_pat;
+	}
+
 	for (i = 0; i < NR_VMX_MSR; ++i) {
 		u32 index = vmx_msr_index[i];
 		u32 data_low, data_high;
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index ec5edc339da..18598afe52e 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -63,10 +63,13 @@
 
 #define VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200
 #define VM_EXIT_ACK_INTR_ON_EXIT                0x00008000
+#define VM_EXIT_SAVE_IA32_PAT			0x00040000
+#define VM_EXIT_LOAD_IA32_PAT			0x00080000
 
 #define VM_ENTRY_IA32E_MODE                     0x00000200
 #define VM_ENTRY_SMM                            0x00000400
 #define VM_ENTRY_DEACT_DUAL_MONITOR             0x00000800
+#define VM_ENTRY_LOAD_IA32_PAT			0x00004000
 
 /* VMCS Encodings */
 enum vmcs_field {
@@ -112,6 +115,8 @@ enum vmcs_field {
 	VMCS_LINK_POINTER_HIGH          = 0x00002801,
 	GUEST_IA32_DEBUGCTL             = 0x00002802,
 	GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
+	GUEST_IA32_PAT			= 0x00002804,
+	GUEST_IA32_PAT_HIGH		= 0x00002805,
 	GUEST_PDPTR0                    = 0x0000280a,
 	GUEST_PDPTR0_HIGH               = 0x0000280b,
 	GUEST_PDPTR1                    = 0x0000280c,
@@ -120,6 +125,8 @@ enum vmcs_field {
 	GUEST_PDPTR2_HIGH               = 0x0000280f,
 	GUEST_PDPTR3                    = 0x00002810,
 	GUEST_PDPTR3_HIGH               = 0x00002811,
+	HOST_IA32_PAT			= 0x00002c00,
+	HOST_IA32_PAT_HIGH		= 0x00002c01,
 	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
 	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
 	EXCEPTION_BITMAP                = 0x00004004,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f5b2334c6bd..0edf75339f3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -452,7 +452,7 @@ static u32 msrs_to_save[] = {
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
 	MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
-	MSR_IA32_PERF_STATUS,
+	MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
 };
 
 static unsigned num_msrs_to_save;
-- 
cgit v1.2.3


From 74be52e3e6285fc6e872a2a7baea544106f399ea Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 9 Oct 2008 16:01:56 +0800
Subject: KVM: Add local get_mtrr_type() to support MTRR

For EPT memory type support.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 410ddbc1aa2..ac2304fd173 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1393,6 +1393,110 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 	return page;
 }
 
+/*
+ * The function is based on mtrr_type_lookup() in
+ * arch/x86/kernel/cpu/mtrr/generic.c
+ */
+static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
+			 u64 start, u64 end)
+{
+	int i;
+	u64 base, mask;
+	u8 prev_match, curr_match;
+	int num_var_ranges = KVM_NR_VAR_MTRR;
+
+	if (!mtrr_state->enabled)
+		return 0xFF;
+
+	/* Make end inclusive end, instead of exclusive */
+	end--;
+
+	/* Look in fixed ranges. Just return the type as per start */
+	if (mtrr_state->have_fixed && (start < 0x100000)) {
+		int idx;
+
+		if (start < 0x80000) {
+			idx = 0;
+			idx += (start >> 16);
+			return mtrr_state->fixed_ranges[idx];
+		} else if (start < 0xC0000) {
+			idx = 1 * 8;
+			idx += ((start - 0x80000) >> 14);
+			return mtrr_state->fixed_ranges[idx];
+		} else if (start < 0x1000000) {
+			idx = 3 * 8;
+			idx += ((start - 0xC0000) >> 12);
+			return mtrr_state->fixed_ranges[idx];
+		}
+	}
+
+	/*
+	 * Look in variable ranges
+	 * Look of multiple ranges matching this address and pick type
+	 * as per MTRR precedence
+	 */
+	if (!(mtrr_state->enabled & 2))
+		return mtrr_state->def_type;
+
+	prev_match = 0xFF;
+	for (i = 0; i < num_var_ranges; ++i) {
+		unsigned short start_state, end_state;
+
+		if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
+			continue;
+
+		base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
+		       (mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
+		mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
+		       (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
+
+		start_state = ((start & mask) == (base & mask));
+		end_state = ((end & mask) == (base & mask));
+		if (start_state != end_state)
+			return 0xFE;
+
+		if ((start & mask) != (base & mask))
+			continue;
+
+		curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
+		if (prev_match == 0xFF) {
+			prev_match = curr_match;
+			continue;
+		}
+
+		if (prev_match == MTRR_TYPE_UNCACHABLE ||
+		    curr_match == MTRR_TYPE_UNCACHABLE)
+			return MTRR_TYPE_UNCACHABLE;
+
+		if ((prev_match == MTRR_TYPE_WRBACK &&
+		     curr_match == MTRR_TYPE_WRTHROUGH) ||
+		    (prev_match == MTRR_TYPE_WRTHROUGH &&
+		     curr_match == MTRR_TYPE_WRBACK)) {
+			prev_match = MTRR_TYPE_WRTHROUGH;
+			curr_match = MTRR_TYPE_WRTHROUGH;
+		}
+
+		if (prev_match != curr_match)
+			return MTRR_TYPE_UNCACHABLE;
+	}
+
+	if (prev_match != 0xFF)
+		return prev_match;
+
+	return mtrr_state->def_type;
+}
+
+static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	u8 mtrr;
+
+	mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
+			     (gfn << PAGE_SHIFT) + PAGE_SIZE);
+	if (mtrr == 0xfe || mtrr == 0xff)
+		mtrr = MTRR_TYPE_WRBACK;
+	return mtrr;
+}
+
 static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
 	unsigned index;
-- 
cgit v1.2.3


From 64d4d521757117aa5c1cfe79d3baa6cf57703f81 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 9 Oct 2008 16:01:57 +0800
Subject: KVM: Enable MTRR for EPT

The effective memory type of EPT is the mixture of MSR_IA32_CR_PAT and memory
type field of EPT entry.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/kvm/mmu.c              | 11 ++++++++++-
 arch/x86/kvm/svm.c              |  6 ++++++
 arch/x86/kvm/vmx.c              | 10 ++++++++--
 arch/x86/kvm/x86.c              |  2 +-
 5 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8082e87f628..93040b5eed9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -483,6 +483,7 @@ struct kvm_x86_ops {
 
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
+	int (*get_mt_mask_shift)(void);
 };
 
 extern struct kvm_x86_ops *kvm_x86_ops;
@@ -496,7 +497,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
 void kvm_mmu_set_base_ptes(u64 base_pte);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask);
+		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ac2304fd173..09d05f57bf6 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -168,6 +168,7 @@ static u64 __read_mostly shadow_x_mask;	/* mutual exclusive with nx_mask */
 static u64 __read_mostly shadow_user_mask;
 static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
+static u64 __read_mostly shadow_mt_mask;
 
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
@@ -183,13 +184,14 @@ void kvm_mmu_set_base_ptes(u64 base_pte)
 EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
 
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask)
+		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
 {
 	shadow_user_mask = user_mask;
 	shadow_accessed_mask = accessed_mask;
 	shadow_dirty_mask = dirty_mask;
 	shadow_nx_mask = nx_mask;
 	shadow_x_mask = x_mask;
+	shadow_mt_mask = mt_mask;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
@@ -1546,6 +1548,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 {
 	u64 spte;
 	int ret = 0;
+	u64 mt_mask = shadow_mt_mask;
+
 	/*
 	 * We don't set the accessed bit, since we sometimes want to see
 	 * whether the guest actually used the pte (in order to detect
@@ -1564,6 +1568,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		spte |= shadow_user_mask;
 	if (largepage)
 		spte |= PT_PAGE_SIZE_MASK;
+	if (mt_mask) {
+		mt_mask = get_memory_type(vcpu, gfn) <<
+			  kvm_x86_ops->get_mt_mask_shift();
+		spte |= mt_mask;
+	}
 
 	spte |= (u64)pfn << PAGE_SHIFT;
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9c4ce657d96..05efc4ef75a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1912,6 +1912,11 @@ static int get_npt_level(void)
 #endif
 }
 
+static int svm_get_mt_mask_shift(void)
+{
+	return 0;
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -1967,6 +1972,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
+	.get_mt_mask_shift = svm_get_mt_mask_shift,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b4c95a501cc..dae134fa09e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3574,6 +3574,11 @@ static int get_ept_level(void)
 	return VMX_EPT_DEFAULT_GAW + 1;
 }
 
+static int vmx_get_mt_mask_shift(void)
+{
+	return VMX_EPT_MT_EPTE_SHIFT;
+}
+
 static struct kvm_x86_ops vmx_x86_ops = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -3629,6 +3634,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
+	.get_mt_mask_shift = vmx_get_mt_mask_shift,
 };
 
 static int __init vmx_init(void)
@@ -3685,10 +3691,10 @@ static int __init vmx_init(void)
 		bypass_guest_pf = 0;
 		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
 			VMX_EPT_WRITABLE_MASK |
-			VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
 			VMX_EPT_IGMT_BIT);
 		kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
-				VMX_EPT_EXECUTABLE_MASK);
+				VMX_EPT_EXECUTABLE_MASK,
+				VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
 		kvm_enable_tdp();
 	} else
 		kvm_disable_tdp();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0edf75339f3..f175b796c2a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2615,7 +2615,7 @@ int kvm_arch_init(void *opaque)
 	kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
 	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
-			PT_DIRTY_MASK, PT64_NX_MASK, 0);
+			PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
 	return 0;
 
 out:
-- 
cgit v1.2.3


From d73fa29a9b75b2af7f69dae276d2c602a23b329b Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Tue, 14 Oct 2008 15:59:10 +0800
Subject: KVM: Clean up kvm_x86_emulate.h

Remove one left improper comment of removed CR2.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_x86_emulate.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index 25179a29f20..16a002655f3 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -146,22 +146,18 @@ struct x86_emulate_ctxt {
 	/* Register state before/after emulation. */
 	struct kvm_vcpu *vcpu;
 
-	/* Linear faulting address (if emulating a page-faulting instruction) */
 	unsigned long eflags;
-
 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
 	int mode;
-
 	u32 cs_base;
 
 	/* decode cache */
-
 	struct decode_cache decode;
 };
 
 /* Repeat String Operation Prefix */
-#define REPE_PREFIX  1
-#define REPNE_PREFIX    2
+#define REPE_PREFIX	1
+#define REPNE_PREFIX	2
 
 /* Execution mode, passed to the emulator. */
 #define X86EMUL_MODE_REAL     0	/* Real mode.             */
@@ -170,7 +166,7 @@ struct x86_emulate_ctxt {
 #define X86EMUL_MODE_PROT64   8	/* 64-bit (long) mode.    */
 
 /* Host execution mode. */
-#if defined(__i386__)
+#if defined(CONFIG_X86_32)
 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
 #elif defined(CONFIG_X86_64)
 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
-- 
cgit v1.2.3


From 291f26bc0f89518ad7ee3207c09eb8a743ac8fcc Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 16 Oct 2008 17:30:57 +0800
Subject: KVM: MMU: Extend kvm_mmu_page->slot_bitmap size

Otherwise set_bit() for private memory slot(above KVM_MEMORY_SLOTS) would
corrupted memory in 32bit host.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 8 +++++---
 arch/x86/kvm/mmu.c              | 6 +++---
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 93040b5eed9..59c3ae10de6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -192,9 +192,11 @@ struct kvm_mmu_page {
 	u64 *spt;
 	/* hold the gfn of each spte inside spt */
 	gfn_t *gfns;
-	unsigned long slot_bitmap; /* One bit set per slot which has memory
-				    * in this shadow page.
-				    */
+	/*
+	 * One bit set per slot which has memory
+	 * in this shadow page.
+	 */
+	DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
 	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
 	bool unsync;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 09d05f57bf6..8687758b529 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -789,7 +789,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
 	ASSERT(is_empty_shadow_page(sp->spt));
-	sp->slot_bitmap = 0;
+	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
 	sp->multimapped = 0;
 	sp->parent_pte = parent_pte;
 	--vcpu->kvm->arch.n_free_mmu_pages;
@@ -1364,7 +1364,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
 	int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
 	struct kvm_mmu_page *sp = page_header(__pa(pte));
 
-	__set_bit(slot, &sp->slot_bitmap);
+	__set_bit(slot, sp->slot_bitmap);
 }
 
 static void mmu_convert_notrap(struct kvm_mmu_page *sp)
@@ -2564,7 +2564,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 		int i;
 		u64 *pt;
 
-		if (!test_bit(slot, &sp->slot_bitmap))
+		if (!test_bit(slot, sp->slot_bitmap))
 			continue;
 
 		pt = sp->spt;
-- 
cgit v1.2.3


From 6fe639792c7b8e462baeaac39ecc33541fd5da6e Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 16 Oct 2008 17:30:58 +0800
Subject: KVM: VMX: Move private memory slot position

PCI device assignment would map guest MMIO spaces as separate slot, so it is
possible that the device has more than 2 MMIO spaces and overwrite current
private memslot.

The patch move private memory slot to the top of userspace visible memory slots.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 arch/x86/kvm/vmx.h | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index dae134fa09e..7623eb7b68d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2513,7 +2513,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
 	int ret;
 	struct kvm_userspace_memory_region tss_mem = {
-		.slot = 8,
+		.slot = TSS_PRIVATE_MEMSLOT,
 		.guest_phys_addr = addr,
 		.memory_size = PAGE_SIZE * 3,
 		.flags = 0,
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 18598afe52e..3db236c26fa 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -338,8 +338,9 @@ enum vmcs_field {
 
 #define AR_RESERVD_MASK 0xfffe0f00
 
-#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	9
-#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	10
+#define TSS_PRIVATE_MEMSLOT			(KVM_MEMORY_SLOTS + 0)
+#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 1)
+#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 2)
 
 #define VMX_NR_VPIDS				(1 << 16)
 #define VMX_VPID_EXTENT_SINGLE_CONTEXT		1
-- 
cgit v1.2.3


From 291fd39bfc2089c2dae79cf2d7cfca81b14ca769 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Mon, 20 Oct 2008 13:11:58 +0200
Subject: KVM: x86 emulator: Add decode entries for 0x04 and 0x05 opcodes (add
 acc, imm)

Add decode entries for 0x04 and 0x05 (ADD) opcodes, execution is already
implemented.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index a391e213fe6..57d7cc45be4 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -80,7 +80,7 @@ static u16 opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
-	0, 0, 0, 0,
+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
 	/* 0x08 - 0x0F */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
-- 
cgit v1.2.3


From 8fdb2351d51b040146f10a624387bbd102d851c0 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 20 Oct 2008 10:20:02 +0200
Subject: KVM: x86: Fix and refactor NMI watchdog emulation

This patch refactors the NMI watchdog delivery patch, consolidating
tests and providing a proper API for delivering watchdog events.

An included micro-optimization is to check only for apic_hw_enabled in
kvm_apic_local_deliver (the test for LVT mask is covering the
soft-disabled case already).

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Acked-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8254.c | 15 +++++++++------
 arch/x86/kvm/irq.h   |  2 +-
 arch/x86/kvm/lapic.c | 20 ++++++++++----------
 3 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 580cc1d01c7..b6fcf5a9e50 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -612,15 +612,18 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 	mutex_unlock(&kvm->lock);
 
 	/*
-	 * Provides NMI watchdog support in IOAPIC mode.
-	 * The route is: PIT -> PIC -> LVT0 in NMI mode,
-	 * timer IRQs will continue to flow through the IOAPIC.
+	 * Provides NMI watchdog support via Virtual Wire mode.
+	 * The route is: PIT -> PIC -> LVT0 in NMI mode.
+	 *
+	 * Note: Our Virtual Wire implementation is simplified, only
+	 * propagating PIT interrupts to all VCPUs when they have set
+	 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
+	 * VCPU0, and only if its LVT0 is in EXTINT mode.
 	 */
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		vcpu = kvm->vcpus[i];
-		if (!vcpu)
-			continue;
-		kvm_apic_local_deliver(vcpu, APIC_LVT0);
+		if (vcpu)
+			kvm_apic_nmi_wd_deliver(vcpu);
 	}
 }
 
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 71e37a530cf..b9e9051650e 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -87,7 +87,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s);
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
-int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type);
+void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 304f9ddbdd5..0b0d413f0af 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -973,14 +973,12 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type)
+static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
+	u32 reg = apic_get_reg(apic, lvt_type);
 	int vector, mode, trig_mode;
-	u32 reg;
 
-	if (apic && apic_enabled(apic)) {
-		reg = apic_get_reg(apic, lvt_type);
+	if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
 		vector = reg & APIC_VECTOR_MASK;
 		mode = reg & APIC_MODE_MASK;
 		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
@@ -989,9 +987,12 @@ int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type)
 	return 0;
 }
 
-static inline int __inject_apic_timer_irq(struct kvm_lapic *apic)
+void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
 {
-	return kvm_apic_local_deliver(apic->vcpu, APIC_LVTT);
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (apic)
+		kvm_apic_local_deliver(apic, APIC_LVT0);
 }
 
 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
@@ -1086,9 +1087,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (apic && apic_lvt_enabled(apic, APIC_LVTT) &&
-		atomic_read(&apic->timer.pending) > 0) {
-		if (__inject_apic_timer_irq(apic))
+	if (apic && atomic_read(&apic->timer.pending) > 0) {
+		if (kvm_apic_local_deliver(apic, APIC_LVTT))
 			atomic_dec(&apic->timer.pending);
 	}
 }
-- 
cgit v1.2.3


From cc6e462cd54e64858ea25816df87d033229efe56 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 20 Oct 2008 10:20:03 +0200
Subject: KVM: x86: Optimize NMI watchdog delivery

As suggested by Avi, this patch introduces a counter of VCPUs that have
LVT0 set to NMI mode. Only if the counter > 0, we push the PIT ticks via
all LAPIC LVT0 lines to enable NMI watchdog support.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Acked-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/i8254.c            | 11 ++++++-----
 arch/x86/kvm/lapic.c            | 23 ++++++++++++++++++++---
 3 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 59c3ae10de6..09e6c56572c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -361,6 +361,7 @@ struct kvm_arch{
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
 	struct hlist_head irq_ack_notifier_list;
+	int vapics_in_nmi_mode;
 
 	int round_robin_prev_vcpu;
 	unsigned int tss_addr;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index b6fcf5a9e50..e665d1c623c 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -620,11 +620,12 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 	 * LVT0 to NMI delivery. Other PIC interrupts are just sent to
 	 * VCPU0, and only if its LVT0 is in EXTINT mode.
 	 */
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		vcpu = kvm->vcpus[i];
-		if (vcpu)
-			kvm_apic_nmi_wd_deliver(vcpu);
-	}
+	if (kvm->arch.vapics_in_nmi_mode > 0)
+		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+			vcpu = kvm->vcpus[i];
+			if (vcpu)
+				kvm_apic_nmi_wd_deliver(vcpu);
+		}
 }
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0b0d413f0af..afac68c0815 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -130,6 +130,11 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic)
 	return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
 }
 
+static inline int apic_lvt_nmi_mode(u32 lvt_val)
+{
+	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
+}
+
 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 	LVT_MASK | APIC_LVT_TIMER_PERIODIC,	/* LVTT */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
@@ -672,6 +677,20 @@ static void start_apic_timer(struct kvm_lapic *apic)
 					apic->timer.period)));
 }
 
+static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
+{
+	int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
+
+	if (apic_lvt_nmi_mode(lvt0_val)) {
+		if (!nmi_wd_enabled) {
+			apic_debug("Receive NMI setting on APIC_LVT0 "
+				   "for cpu %d\n", apic->vcpu->vcpu_id);
+			apic->vcpu->kvm->arch.vapics_in_nmi_mode++;
+		}
+	} else if (nmi_wd_enabled)
+		apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
+}
+
 static void apic_mmio_write(struct kvm_io_device *this,
 			    gpa_t address, int len, const void *data)
 {
@@ -753,9 +772,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 		break;
 
 	case APIC_LVT0:
-		if (val == APIC_DM_NMI)
-			apic_debug("Receive NMI setting on APIC_LVT0 "
-				"for cpu %d\n", apic->vcpu->vcpu_id);
+		apic_manage_nmi_watchdog(apic, val);
 	case APIC_LVTT:
 	case APIC_LVTTHMR:
 	case APIC_LVTPC:
-- 
cgit v1.2.3


From b8222ad2e52fd2c0c4e5e1c53e65d131f911b767 Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Wed, 22 Oct 2008 16:39:47 +0530
Subject: KVM: x86: Fix typo in function name

get_segment_descritptor_dtable() contains an obvious type.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f175b796c2a..ceeac889714 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3373,9 +3373,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
 	kvm_desct->padding = 0;
 }
 
-static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
-					   u16 selector,
-					   struct descriptor_table *dtable)
+static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
+					  u16 selector,
+					  struct descriptor_table *dtable)
 {
 	if (selector & 1 << 2) {
 		struct kvm_segment kvm_seg;
@@ -3400,7 +3400,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
 	struct descriptor_table dtable;
 	u16 index = selector >> 3;
 
-	get_segment_descritptor_dtable(vcpu, selector, &dtable);
+	get_segment_descriptor_dtable(vcpu, selector, &dtable);
 
 	if (dtable.limit < index * 8 + 7) {
 		kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
@@ -3419,7 +3419,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
 	struct descriptor_table dtable;
 	u16 index = selector >> 3;
 
-	get_segment_descritptor_dtable(vcpu, selector, &dtable);
+	get_segment_descriptor_dtable(vcpu, selector, &dtable);
 
 	if (dtable.limit < index * 8 + 7)
 		return 1;
-- 
cgit v1.2.3


From 25022acc3dd5f0b54071c7ba7c371860f2971b52 Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Mon, 27 Oct 2008 09:04:17 +0000
Subject: KVM: SVM: Set the 'g' bit of the cs selector for cross-vendor
 migration

The hardware does not set the 'g' bit of the cs selector and this breaks
migration from amd hosts to intel hosts. Set this bit if the segment
limit is beyond 1 MB.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 05efc4ef75a..665008d9785 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -772,6 +772,15 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
 	var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
 	var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
 	var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
+
+	/*
+	 * SVM always stores 0 for the 'G' bit in the CS selector in
+	 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
+	 * Intel's VMENTRY has a check on the 'G' bit.
+	 */
+	if (seg == VCPU_SREG_CS)
+		var->g = s->limit > 0xfffff;
+
 	var->unusable = !var->present;
 }
 
-- 
cgit v1.2.3


From c0d09828c870f90c6bc72070ada281568f89c63b Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Mon, 27 Oct 2008 09:04:18 +0000
Subject: KVM: SVM: Set the 'busy' flag of the TR selector

The busy flag of the TR selector is not set by the hardware. This breaks
migration from amd hosts to intel hosts.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 665008d9785..743aebd7bfc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -781,6 +781,13 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
 	if (seg == VCPU_SREG_CS)
 		var->g = s->limit > 0xfffff;
 
+	/*
+	 * Work around a bug where the busy flag in the tr selector
+	 * isn't exposed
+	 */
+	if (seg == VCPU_SREG_TR)
+		var->type |= 0x2;
+
 	var->unusable = !var->present;
 }
 
-- 
cgit v1.2.3


From e93f36bcfaa9e899c595e1c446c784a69021854a Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Tue, 28 Oct 2008 10:51:30 +0100
Subject: KVM: allow emulator to adjust rip for emulated pio instructions

If we call the emulator we shouldn't call skip_emulated_instruction()
in the first place, since the emulator already computes the next rip
for us. Thus we move ->skip_emulated_instruction() out of
kvm_emulate_pio() and into handle_io() (and the svm equivalent). We
also replaced "return 0" by "break" in the "do_io:" case because now
the shadow register state needs to be committed. Otherwise eip will never
be updated.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c         | 1 +
 arch/x86/kvm/vmx.c         | 1 +
 arch/x86/kvm/x86.c         | 2 --
 arch/x86/kvm/x86_emulate.c | 2 +-
 4 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 743aebd7bfc..f0ad4d4217e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1115,6 +1115,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
 	down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
 
+	skip_emulated_instruction(&svm->vcpu);
 	return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7623eb7b68d..816d23185fb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2687,6 +2687,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	rep = (exit_qualification & 32) != 0;
 	port = exit_qualification >> 16;
 
+	skip_emulated_instruction(vcpu);
 	return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ceeac889714..38f79b6aaf1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2478,8 +2478,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	memcpy(vcpu->arch.pio_data, &val, 4);
 
-	kvm_x86_ops->skip_emulated_instruction(vcpu);
-
 	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
 	if (pio_dev) {
 		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 57d7cc45be4..8f60ace1387 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1772,7 +1772,7 @@ special_insn:
 			c->eip = saved_eip;
 			goto cannot_emulate;
 		}
-		return 0;
+		break;
 	case 0xf4:              /* hlt */
 		ctxt->vcpu->arch.halt_request = 1;
 		break;
-- 
cgit v1.2.3


From 1d5a4d9b92028d9fe77da34037bd5a1ebfecc733 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Wed, 29 Oct 2008 09:39:42 +0100
Subject: KVM: VMX: Handle mmio emulation when guest state is invalid

If emulate_invalid_guest_state is enabled, the emulator is called
when guest state is invalid.  Until now, we reported an mmio failure
when emulate_instruction() returned EMULATE_DO_MMIO.  This patch adds
the case where emulate_instruction() failed and an MMIO emulation
is needed.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 816d23185fb..427dbc14fae 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3052,16 +3052,12 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
 	while (!guest_state_valid(vcpu)) {
 		err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
 
-		switch (err) {
-			case EMULATE_DONE:
-				break;
-			case EMULATE_DO_MMIO:
-				kvm_report_emulation_failure(vcpu, "mmio");
-				/* TODO: Handle MMIO */
-				return;
-			default:
-				kvm_report_emulation_failure(vcpu, "emulation failure");
-				return;
+		if (err == EMULATE_DO_MMIO)
+			break;
+
+		if (err != EMULATE_DONE) {
+			kvm_report_emulation_failure(vcpu, "emulation failure");
+			return;
 		}
 
 		if (signal_pending(current))
@@ -3073,8 +3069,10 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
 	local_irq_disable();
 	preempt_disable();
 
-	/* Guest state should be valid now, no more emulation should be needed */
-	vmx->emulation_required = 0;
+	/* Guest state should be valid now except if we need to
+	 * emulate an MMIO */
+	if (guest_state_valid(vcpu))
+		vmx->emulation_required = 0;
 }
 
 /*
@@ -3121,6 +3119,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
 		    (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
 
+	/* If we need to emulate an MMIO from handle_invalid_guest_state
+	 * we just return 0 */
+	if (vmx->emulation_required && emulate_invalid_guest_state)
+		return 0;
+
 	/* Access CR3 don't cause VMExit in paging mode, so we need
 	 * to sync with guest real CR3. */
 	if (vm_need_ept() && is_paging(vcpu)) {
-- 
cgit v1.2.3


From 6eb55818c043b097c83828da8430fcb9a02fdb89 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Fri, 31 Oct 2008 12:37:41 +0800
Subject: KVM: Enable Function Level Reset for assigned device

Ideally, every assigned device should in a clear condition before and after
assignment, so that the former state of device won't affect later work.
Some devices provide a mechanism named Function Level Reset, which is
defined in PCI/PCI-e document. We should execute it before and after device
assignment.

(But sadly, the feature is new, and most device on the market now don't
support it. We are considering using D0/D3hot transmit to emulate it later,
but not that elegant and reliable as FLR itself.)

[Update: Reminded by Xiantao, execute FLR after we ensure that the device can
be assigned to the guest.]

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 38f79b6aaf1..9a4a39cfe6e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4148,8 +4148,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-	kvm_iommu_unmap_guest(kvm);
 	kvm_free_all_assigned_devices(kvm);
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
 	kfree(kvm->arch.vioapic);
-- 
cgit v1.2.3


From 2843099fee32a6020e1caa95c6026f28b5d43bff Mon Sep 17 00:00:00 2001
From: Izik Eidus <ieidus@redhat.com>
Date: Fri, 3 Oct 2008 17:40:32 +0300
Subject: KVM: MMU: Fix aliased gfns treated as unaliased

Some areas of kvm x86 mmu are using gfn offset inside a slot without
unaliasing the gfn first.  This patch makes sure that the gfn will be
unaliased and add gfn_to_memslot_unaliased() to save the calculating
of the gfn unaliasing in case we have it unaliased already.

Signed-off-by: Izik Eidus <ieidus@redhat.com>
Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/mmu.c              | 14 ++++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 09e6c56572c..99e3cc149d2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -617,6 +617,8 @@ void kvm_disable_tdp(void);
 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
 int complete_pio(struct kvm_vcpu *vcpu);
 
+struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
+
 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
 {
 	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8687758b529..8904e8ada97 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -386,7 +386,9 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
 {
 	int *write_count;
 
-	write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
+	gfn = unalias_gfn(kvm, gfn);
+	write_count = slot_largepage_idx(gfn,
+					 gfn_to_memslot_unaliased(kvm, gfn));
 	*write_count += 1;
 }
 
@@ -394,16 +396,20 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
 {
 	int *write_count;
 
-	write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
+	gfn = unalias_gfn(kvm, gfn);
+	write_count = slot_largepage_idx(gfn,
+					 gfn_to_memslot_unaliased(kvm, gfn));
 	*write_count -= 1;
 	WARN_ON(*write_count < 0);
 }
 
 static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
 {
-	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+	struct kvm_memory_slot *slot;
 	int *largepage_idx;
 
+	gfn = unalias_gfn(kvm, gfn);
+	slot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (slot) {
 		largepage_idx = slot_largepage_idx(gfn, slot);
 		return *largepage_idx;
@@ -2973,8 +2979,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
 		if (sp->role.metaphysical)
 			continue;
 
-		slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
 		gfn = unalias_gfn(vcpu->kvm, sp->gfn);
+		slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
 		rmapp = &slot->rmap[gfn - slot->base_gfn];
 		if (*rmapp)
 			printk(KERN_ERR "%s: (%s) shadow page has writable"
-- 
cgit v1.2.3


From 0853d2c1d849ef69884d2447d90d04007590b72b Mon Sep 17 00:00:00 2001
From: Nitin A Kamble <nitin.a.kamble@intel.com>
Date: Wed, 5 Nov 2008 15:37:36 -0800
Subject: KVM: Fix cpuid leaf 0xb loop termination

For cpuid leaf 0xb the bits 8-15 in ECX register define the end of counting
leaf.      The previous code was using bits 0-7 for this purpose, which is
a bug.

Signed-off-by: Nitin A Kamble <nitin.a.kamble@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9a4a39cfe6e..2889a0f359e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1276,7 +1276,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 		/* read more entries until level_type is zero */
 		for (i = 1; *nent < maxnent; ++i) {
-			level_type = entry[i - 1].ecx & 0xff;
+			level_type = entry[i - 1].ecx & 0xff00;
 			if (!level_type)
 				break;
 			do_cpuid_1_ent(&entry[i], function, i);
-- 
cgit v1.2.3


From 0fdf8e59faa5c60e9d77c8e14abe3a0f8bfcf586 Mon Sep 17 00:00:00 2001
From: Nitin A Kamble <nitin.a.kamble@intel.com>
Date: Wed, 5 Nov 2008 15:56:21 -0800
Subject: KVM: Fix cpuid iteration on multiple leaves per eac

The code to traverse the cpuid data array list for counting type of leaves is
currently broken.

This patches fixes the 2 things in it.

 1. Set the 1st counting entry's flag KVM_CPUID_FLAG_STATE_READ_NEXT. Without
    it the code will never find a valid entry.

 2. Also the stop condition in the for loop while looking for the next unflaged
    entry is broken. It needs to stop when it find one matching entry;
    and in the case of count of 1, it will be the same entry found in this
    iteration.

Signed-Off-By: Nitin A Kamble <nitin.a.kamble@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2889a0f359e..7a2aeba0bfb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1246,6 +1246,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		int t, times = entry->eax & 0xff;
 
 		entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+		entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
 		for (t = 1; t < times && *nent < maxnent; ++t) {
 			do_cpuid_1_ent(&entry[t], function, 0);
 			entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
@@ -2801,7 +2802,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
 
 	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
 	/* when no next entry is found, the current entry[i] is reselected */
-	for (j = i + 1; j == i; j = (j + 1) % nent) {
+	for (j = i + 1; ; j = (j + 1) % nent) {
 		struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
 		if (ej->function == e->function) {
 			ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
-- 
cgit v1.2.3


From 13673a90f1cf88296f726265cc7cf3ec76ecba30 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:13 -0200
Subject: KVM: VMX: move vmx.h to include/asm

vmx.h will be used by core code that is independent of KVM, so I am
moving it outside the arch/x86/kvm directory.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/vmx.h | 367 +++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu.c         |   2 +-
 arch/x86/kvm/vmx.c         |   2 +-
 arch/x86/kvm/vmx.h         | 367 ---------------------------------------------
 4 files changed, 369 insertions(+), 369 deletions(-)
 create mode 100644 arch/x86/include/asm/vmx.h
 delete mode 100644 arch/x86/kvm/vmx.h

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
new file mode 100644
index 00000000000..3db236c26fa
--- /dev/null
+++ b/arch/x86/include/asm/vmx.h
@@ -0,0 +1,367 @@
+#ifndef VMX_H
+#define VMX_H
+
+/*
+ * vmx.h: VMX Architecture related definitions
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * A few random additions are:
+ * Copyright (C) 2006 Qumranet
+ *    Avi Kivity <avi@qumranet.com>
+ *    Yaniv Kamay <yaniv@qumranet.com>
+ *
+ */
+
+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+#define CPU_BASED_VIRTUAL_INTR_PENDING          0x00000004
+#define CPU_BASED_USE_TSC_OFFSETING             0x00000008
+#define CPU_BASED_HLT_EXITING                   0x00000080
+#define CPU_BASED_INVLPG_EXITING                0x00000200
+#define CPU_BASED_MWAIT_EXITING                 0x00000400
+#define CPU_BASED_RDPMC_EXITING                 0x00000800
+#define CPU_BASED_RDTSC_EXITING                 0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING		0x00008000
+#define CPU_BASED_CR3_STORE_EXITING		0x00010000
+#define CPU_BASED_CR8_LOAD_EXITING              0x00080000
+#define CPU_BASED_CR8_STORE_EXITING             0x00100000
+#define CPU_BASED_TPR_SHADOW                    0x00200000
+#define CPU_BASED_VIRTUAL_NMI_PENDING		0x00400000
+#define CPU_BASED_MOV_DR_EXITING                0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING             0x01000000
+#define CPU_BASED_USE_IO_BITMAPS                0x02000000
+#define CPU_BASED_USE_MSR_BITMAPS               0x10000000
+#define CPU_BASED_MONITOR_EXITING               0x20000000
+#define CPU_BASED_PAUSE_EXITING                 0x40000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS   0x80000000
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT               0x00000002
+#define SECONDARY_EXEC_ENABLE_VPID              0x00000020
+#define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
+
+
+#define PIN_BASED_EXT_INTR_MASK                 0x00000001
+#define PIN_BASED_NMI_EXITING                   0x00000008
+#define PIN_BASED_VIRTUAL_NMIS                  0x00000020
+
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200
+#define VM_EXIT_ACK_INTR_ON_EXIT                0x00008000
+#define VM_EXIT_SAVE_IA32_PAT			0x00040000
+#define VM_EXIT_LOAD_IA32_PAT			0x00080000
+
+#define VM_ENTRY_IA32E_MODE                     0x00000200
+#define VM_ENTRY_SMM                            0x00000400
+#define VM_ENTRY_DEACT_DUAL_MONITOR             0x00000800
+#define VM_ENTRY_LOAD_IA32_PAT			0x00004000
+
+/* VMCS Encodings */
+enum vmcs_field {
+	VIRTUAL_PROCESSOR_ID            = 0x00000000,
+	GUEST_ES_SELECTOR               = 0x00000800,
+	GUEST_CS_SELECTOR               = 0x00000802,
+	GUEST_SS_SELECTOR               = 0x00000804,
+	GUEST_DS_SELECTOR               = 0x00000806,
+	GUEST_FS_SELECTOR               = 0x00000808,
+	GUEST_GS_SELECTOR               = 0x0000080a,
+	GUEST_LDTR_SELECTOR             = 0x0000080c,
+	GUEST_TR_SELECTOR               = 0x0000080e,
+	HOST_ES_SELECTOR                = 0x00000c00,
+	HOST_CS_SELECTOR                = 0x00000c02,
+	HOST_SS_SELECTOR                = 0x00000c04,
+	HOST_DS_SELECTOR                = 0x00000c06,
+	HOST_FS_SELECTOR                = 0x00000c08,
+	HOST_GS_SELECTOR                = 0x00000c0a,
+	HOST_TR_SELECTOR                = 0x00000c0c,
+	IO_BITMAP_A                     = 0x00002000,
+	IO_BITMAP_A_HIGH                = 0x00002001,
+	IO_BITMAP_B                     = 0x00002002,
+	IO_BITMAP_B_HIGH                = 0x00002003,
+	MSR_BITMAP                      = 0x00002004,
+	MSR_BITMAP_HIGH                 = 0x00002005,
+	VM_EXIT_MSR_STORE_ADDR          = 0x00002006,
+	VM_EXIT_MSR_STORE_ADDR_HIGH     = 0x00002007,
+	VM_EXIT_MSR_LOAD_ADDR           = 0x00002008,
+	VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
+	VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
+	VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
+	TSC_OFFSET                      = 0x00002010,
+	TSC_OFFSET_HIGH                 = 0x00002011,
+	VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
+	VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
+	APIC_ACCESS_ADDR		= 0x00002014,
+	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
+	EPT_POINTER                     = 0x0000201a,
+	EPT_POINTER_HIGH                = 0x0000201b,
+	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
+	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
+	VMCS_LINK_POINTER               = 0x00002800,
+	VMCS_LINK_POINTER_HIGH          = 0x00002801,
+	GUEST_IA32_DEBUGCTL             = 0x00002802,
+	GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
+	GUEST_IA32_PAT			= 0x00002804,
+	GUEST_IA32_PAT_HIGH		= 0x00002805,
+	GUEST_PDPTR0                    = 0x0000280a,
+	GUEST_PDPTR0_HIGH               = 0x0000280b,
+	GUEST_PDPTR1                    = 0x0000280c,
+	GUEST_PDPTR1_HIGH               = 0x0000280d,
+	GUEST_PDPTR2                    = 0x0000280e,
+	GUEST_PDPTR2_HIGH               = 0x0000280f,
+	GUEST_PDPTR3                    = 0x00002810,
+	GUEST_PDPTR3_HIGH               = 0x00002811,
+	HOST_IA32_PAT			= 0x00002c00,
+	HOST_IA32_PAT_HIGH		= 0x00002c01,
+	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
+	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
+	EXCEPTION_BITMAP                = 0x00004004,
+	PAGE_FAULT_ERROR_CODE_MASK      = 0x00004006,
+	PAGE_FAULT_ERROR_CODE_MATCH     = 0x00004008,
+	CR3_TARGET_COUNT                = 0x0000400a,
+	VM_EXIT_CONTROLS                = 0x0000400c,
+	VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
+	VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
+	VM_ENTRY_CONTROLS               = 0x00004012,
+	VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
+	VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
+	VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
+	VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
+	TPR_THRESHOLD                   = 0x0000401c,
+	SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
+	VM_INSTRUCTION_ERROR            = 0x00004400,
+	VM_EXIT_REASON                  = 0x00004402,
+	VM_EXIT_INTR_INFO               = 0x00004404,
+	VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
+	IDT_VECTORING_INFO_FIELD        = 0x00004408,
+	IDT_VECTORING_ERROR_CODE        = 0x0000440a,
+	VM_EXIT_INSTRUCTION_LEN         = 0x0000440c,
+	VMX_INSTRUCTION_INFO            = 0x0000440e,
+	GUEST_ES_LIMIT                  = 0x00004800,
+	GUEST_CS_LIMIT                  = 0x00004802,
+	GUEST_SS_LIMIT                  = 0x00004804,
+	GUEST_DS_LIMIT                  = 0x00004806,
+	GUEST_FS_LIMIT                  = 0x00004808,
+	GUEST_GS_LIMIT                  = 0x0000480a,
+	GUEST_LDTR_LIMIT                = 0x0000480c,
+	GUEST_TR_LIMIT                  = 0x0000480e,
+	GUEST_GDTR_LIMIT                = 0x00004810,
+	GUEST_IDTR_LIMIT                = 0x00004812,
+	GUEST_ES_AR_BYTES               = 0x00004814,
+	GUEST_CS_AR_BYTES               = 0x00004816,
+	GUEST_SS_AR_BYTES               = 0x00004818,
+	GUEST_DS_AR_BYTES               = 0x0000481a,
+	GUEST_FS_AR_BYTES               = 0x0000481c,
+	GUEST_GS_AR_BYTES               = 0x0000481e,
+	GUEST_LDTR_AR_BYTES             = 0x00004820,
+	GUEST_TR_AR_BYTES               = 0x00004822,
+	GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
+	GUEST_ACTIVITY_STATE            = 0X00004826,
+	GUEST_SYSENTER_CS               = 0x0000482A,
+	HOST_IA32_SYSENTER_CS           = 0x00004c00,
+	CR0_GUEST_HOST_MASK             = 0x00006000,
+	CR4_GUEST_HOST_MASK             = 0x00006002,
+	CR0_READ_SHADOW                 = 0x00006004,
+	CR4_READ_SHADOW                 = 0x00006006,
+	CR3_TARGET_VALUE0               = 0x00006008,
+	CR3_TARGET_VALUE1               = 0x0000600a,
+	CR3_TARGET_VALUE2               = 0x0000600c,
+	CR3_TARGET_VALUE3               = 0x0000600e,
+	EXIT_QUALIFICATION              = 0x00006400,
+	GUEST_LINEAR_ADDRESS            = 0x0000640a,
+	GUEST_CR0                       = 0x00006800,
+	GUEST_CR3                       = 0x00006802,
+	GUEST_CR4                       = 0x00006804,
+	GUEST_ES_BASE                   = 0x00006806,
+	GUEST_CS_BASE                   = 0x00006808,
+	GUEST_SS_BASE                   = 0x0000680a,
+	GUEST_DS_BASE                   = 0x0000680c,
+	GUEST_FS_BASE                   = 0x0000680e,
+	GUEST_GS_BASE                   = 0x00006810,
+	GUEST_LDTR_BASE                 = 0x00006812,
+	GUEST_TR_BASE                   = 0x00006814,
+	GUEST_GDTR_BASE                 = 0x00006816,
+	GUEST_IDTR_BASE                 = 0x00006818,
+	GUEST_DR7                       = 0x0000681a,
+	GUEST_RSP                       = 0x0000681c,
+	GUEST_RIP                       = 0x0000681e,
+	GUEST_RFLAGS                    = 0x00006820,
+	GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
+	GUEST_SYSENTER_ESP              = 0x00006824,
+	GUEST_SYSENTER_EIP              = 0x00006826,
+	HOST_CR0                        = 0x00006c00,
+	HOST_CR3                        = 0x00006c02,
+	HOST_CR4                        = 0x00006c04,
+	HOST_FS_BASE                    = 0x00006c06,
+	HOST_GS_BASE                    = 0x00006c08,
+	HOST_TR_BASE                    = 0x00006c0a,
+	HOST_GDTR_BASE                  = 0x00006c0c,
+	HOST_IDTR_BASE                  = 0x00006c0e,
+	HOST_IA32_SYSENTER_ESP          = 0x00006c10,
+	HOST_IA32_SYSENTER_EIP          = 0x00006c12,
+	HOST_RSP                        = 0x00006c14,
+	HOST_RIP                        = 0x00006c16,
+};
+
+#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI       0
+#define EXIT_REASON_EXTERNAL_INTERRUPT  1
+#define EXIT_REASON_TRIPLE_FAULT        2
+
+#define EXIT_REASON_PENDING_INTERRUPT   7
+#define EXIT_REASON_NMI_WINDOW		8
+#define EXIT_REASON_TASK_SWITCH         9
+#define EXIT_REASON_CPUID               10
+#define EXIT_REASON_HLT                 12
+#define EXIT_REASON_INVLPG              14
+#define EXIT_REASON_RDPMC               15
+#define EXIT_REASON_RDTSC               16
+#define EXIT_REASON_VMCALL              18
+#define EXIT_REASON_VMCLEAR             19
+#define EXIT_REASON_VMLAUNCH            20
+#define EXIT_REASON_VMPTRLD             21
+#define EXIT_REASON_VMPTRST             22
+#define EXIT_REASON_VMREAD              23
+#define EXIT_REASON_VMRESUME            24
+#define EXIT_REASON_VMWRITE             25
+#define EXIT_REASON_VMOFF               26
+#define EXIT_REASON_VMON                27
+#define EXIT_REASON_CR_ACCESS           28
+#define EXIT_REASON_DR_ACCESS           29
+#define EXIT_REASON_IO_INSTRUCTION      30
+#define EXIT_REASON_MSR_READ            31
+#define EXIT_REASON_MSR_WRITE           32
+#define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS         44
+#define EXIT_REASON_EPT_VIOLATION       48
+#define EXIT_REASON_EPT_MISCONFIG       49
+#define EXIT_REASON_WBINVD		54
+
+/*
+ * Interruption-information format
+ */
+#define INTR_INFO_VECTOR_MASK           0xff            /* 7:0 */
+#define INTR_INFO_INTR_TYPE_MASK        0x700           /* 10:8 */
+#define INTR_INFO_DELIVER_CODE_MASK     0x800           /* 11 */
+#define INTR_INFO_UNBLOCK_NMI		0x1000		/* 12 */
+#define INTR_INFO_VALID_MASK            0x80000000      /* 31 */
+#define INTR_INFO_RESVD_BITS_MASK       0x7ffff000
+
+#define VECTORING_INFO_VECTOR_MASK           	INTR_INFO_VECTOR_MASK
+#define VECTORING_INFO_TYPE_MASK        	INTR_INFO_INTR_TYPE_MASK
+#define VECTORING_INFO_DELIVER_CODE_MASK    	INTR_INFO_DELIVER_CODE_MASK
+#define VECTORING_INFO_VALID_MASK       	INTR_INFO_VALID_MASK
+
+#define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
+#define INTR_TYPE_NMI_INTR		(2 << 8) /* NMI */
+#define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
+#define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
+
+/* GUEST_INTERRUPTIBILITY_INFO flags. */
+#define GUEST_INTR_STATE_STI		0x00000001
+#define GUEST_INTR_STATE_MOV_SS		0x00000002
+#define GUEST_INTR_STATE_SMI		0x00000004
+#define GUEST_INTR_STATE_NMI		0x00000008
+
+/*
+ * Exit Qualifications for MOV for Control Register Access
+ */
+#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control reg.*/
+#define CONTROL_REG_ACCESS_TYPE         0x30    /* 5:4, access type */
+#define CONTROL_REG_ACCESS_REG          0xf00   /* 10:8, general purpose reg. */
+#define LMSW_SOURCE_DATA_SHIFT 16
+#define LMSW_SOURCE_DATA  (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */
+#define REG_EAX                         (0 << 8)
+#define REG_ECX                         (1 << 8)
+#define REG_EDX                         (2 << 8)
+#define REG_EBX                         (3 << 8)
+#define REG_ESP                         (4 << 8)
+#define REG_EBP                         (5 << 8)
+#define REG_ESI                         (6 << 8)
+#define REG_EDI                         (7 << 8)
+#define REG_R8                         (8 << 8)
+#define REG_R9                         (9 << 8)
+#define REG_R10                        (10 << 8)
+#define REG_R11                        (11 << 8)
+#define REG_R12                        (12 << 8)
+#define REG_R13                        (13 << 8)
+#define REG_R14                        (14 << 8)
+#define REG_R15                        (15 << 8)
+
+/*
+ * Exit Qualifications for MOV for Debug Register Access
+ */
+#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug reg. */
+#define DEBUG_REG_ACCESS_TYPE           0x10    /* 4, direction of access */
+#define TYPE_MOV_TO_DR                  (0 << 4)
+#define TYPE_MOV_FROM_DR                (1 << 4)
+#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose reg. */
+
+
+/* segment AR */
+#define SEGMENT_AR_L_MASK (1 << 13)
+
+#define AR_TYPE_ACCESSES_MASK 1
+#define AR_TYPE_READABLE_MASK (1 << 1)
+#define AR_TYPE_WRITEABLE_MASK (1 << 2)
+#define AR_TYPE_CODE_MASK (1 << 3)
+#define AR_TYPE_MASK 0x0f
+#define AR_TYPE_BUSY_64_TSS 11
+#define AR_TYPE_BUSY_32_TSS 11
+#define AR_TYPE_BUSY_16_TSS 3
+#define AR_TYPE_LDT 2
+
+#define AR_UNUSABLE_MASK (1 << 16)
+#define AR_S_MASK (1 << 4)
+#define AR_P_MASK (1 << 7)
+#define AR_L_MASK (1 << 13)
+#define AR_DB_MASK (1 << 14)
+#define AR_G_MASK (1 << 15)
+#define AR_DPL_SHIFT 5
+#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3)
+
+#define AR_RESERVD_MASK 0xfffe0f00
+
+#define TSS_PRIVATE_MEMSLOT			(KVM_MEMORY_SLOTS + 0)
+#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 1)
+#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 2)
+
+#define VMX_NR_VPIDS				(1 << 16)
+#define VMX_VPID_EXTENT_SINGLE_CONTEXT		1
+#define VMX_VPID_EXTENT_ALL_CONTEXT		2
+
+#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR		0
+#define VMX_EPT_EXTENT_CONTEXT			1
+#define VMX_EPT_EXTENT_GLOBAL			2
+#define VMX_EPT_EXTENT_INDIVIDUAL_BIT		(1ull << 24)
+#define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
+#define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
+#define VMX_EPT_DEFAULT_GAW			3
+#define VMX_EPT_MAX_GAW				0x4
+#define VMX_EPT_MT_EPTE_SHIFT			3
+#define VMX_EPT_GAW_EPTP_SHIFT			3
+#define VMX_EPT_DEFAULT_MT			0x6ull
+#define VMX_EPT_READABLE_MASK			0x1ull
+#define VMX_EPT_WRITABLE_MASK			0x2ull
+#define VMX_EPT_EXECUTABLE_MASK			0x4ull
+#define VMX_EPT_IGMT_BIT    			(1ull << 6)
+
+#define VMX_EPT_IDENTITY_PAGETABLE_ADDR		0xfffbc000ul
+
+#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8904e8ada97..fa3486d6407 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -17,7 +17,6 @@
  *
  */
 
-#include "vmx.h"
 #include "mmu.h"
 
 #include <linux/kvm_host.h>
@@ -33,6 +32,7 @@
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
 #include <asm/io.h>
+#include <asm/vmx.h>
 
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 427dbc14fae..ec71f6464cf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -16,7 +16,6 @@
  */
 
 #include "irq.h"
-#include "vmx.h"
 #include "mmu.h"
 
 #include <linux/kvm_host.h>
@@ -31,6 +30,7 @@
 
 #include <asm/io.h>
 #include <asm/desc.h>
+#include <asm/vmx.h>
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
deleted file mode 100644
index 3db236c26fa..00000000000
--- a/arch/x86/kvm/vmx.h
+++ /dev/null
@@ -1,367 +0,0 @@
-#ifndef VMX_H
-#define VMX_H
-
-/*
- * vmx.h: VMX Architecture related definitions
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * A few random additions are:
- * Copyright (C) 2006 Qumranet
- *    Avi Kivity <avi@qumranet.com>
- *    Yaniv Kamay <yaniv@qumranet.com>
- *
- */
-
-/*
- * Definitions of Primary Processor-Based VM-Execution Controls.
- */
-#define CPU_BASED_VIRTUAL_INTR_PENDING          0x00000004
-#define CPU_BASED_USE_TSC_OFFSETING             0x00000008
-#define CPU_BASED_HLT_EXITING                   0x00000080
-#define CPU_BASED_INVLPG_EXITING                0x00000200
-#define CPU_BASED_MWAIT_EXITING                 0x00000400
-#define CPU_BASED_RDPMC_EXITING                 0x00000800
-#define CPU_BASED_RDTSC_EXITING                 0x00001000
-#define CPU_BASED_CR3_LOAD_EXITING		0x00008000
-#define CPU_BASED_CR3_STORE_EXITING		0x00010000
-#define CPU_BASED_CR8_LOAD_EXITING              0x00080000
-#define CPU_BASED_CR8_STORE_EXITING             0x00100000
-#define CPU_BASED_TPR_SHADOW                    0x00200000
-#define CPU_BASED_VIRTUAL_NMI_PENDING		0x00400000
-#define CPU_BASED_MOV_DR_EXITING                0x00800000
-#define CPU_BASED_UNCOND_IO_EXITING             0x01000000
-#define CPU_BASED_USE_IO_BITMAPS                0x02000000
-#define CPU_BASED_USE_MSR_BITMAPS               0x10000000
-#define CPU_BASED_MONITOR_EXITING               0x20000000
-#define CPU_BASED_PAUSE_EXITING                 0x40000000
-#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS   0x80000000
-/*
- * Definitions of Secondary Processor-Based VM-Execution Controls.
- */
-#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
-#define SECONDARY_EXEC_ENABLE_EPT               0x00000002
-#define SECONDARY_EXEC_ENABLE_VPID              0x00000020
-#define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
-
-
-#define PIN_BASED_EXT_INTR_MASK                 0x00000001
-#define PIN_BASED_NMI_EXITING                   0x00000008
-#define PIN_BASED_VIRTUAL_NMIS                  0x00000020
-
-#define VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200
-#define VM_EXIT_ACK_INTR_ON_EXIT                0x00008000
-#define VM_EXIT_SAVE_IA32_PAT			0x00040000
-#define VM_EXIT_LOAD_IA32_PAT			0x00080000
-
-#define VM_ENTRY_IA32E_MODE                     0x00000200
-#define VM_ENTRY_SMM                            0x00000400
-#define VM_ENTRY_DEACT_DUAL_MONITOR             0x00000800
-#define VM_ENTRY_LOAD_IA32_PAT			0x00004000
-
-/* VMCS Encodings */
-enum vmcs_field {
-	VIRTUAL_PROCESSOR_ID            = 0x00000000,
-	GUEST_ES_SELECTOR               = 0x00000800,
-	GUEST_CS_SELECTOR               = 0x00000802,
-	GUEST_SS_SELECTOR               = 0x00000804,
-	GUEST_DS_SELECTOR               = 0x00000806,
-	GUEST_FS_SELECTOR               = 0x00000808,
-	GUEST_GS_SELECTOR               = 0x0000080a,
-	GUEST_LDTR_SELECTOR             = 0x0000080c,
-	GUEST_TR_SELECTOR               = 0x0000080e,
-	HOST_ES_SELECTOR                = 0x00000c00,
-	HOST_CS_SELECTOR                = 0x00000c02,
-	HOST_SS_SELECTOR                = 0x00000c04,
-	HOST_DS_SELECTOR                = 0x00000c06,
-	HOST_FS_SELECTOR                = 0x00000c08,
-	HOST_GS_SELECTOR                = 0x00000c0a,
-	HOST_TR_SELECTOR                = 0x00000c0c,
-	IO_BITMAP_A                     = 0x00002000,
-	IO_BITMAP_A_HIGH                = 0x00002001,
-	IO_BITMAP_B                     = 0x00002002,
-	IO_BITMAP_B_HIGH                = 0x00002003,
-	MSR_BITMAP                      = 0x00002004,
-	MSR_BITMAP_HIGH                 = 0x00002005,
-	VM_EXIT_MSR_STORE_ADDR          = 0x00002006,
-	VM_EXIT_MSR_STORE_ADDR_HIGH     = 0x00002007,
-	VM_EXIT_MSR_LOAD_ADDR           = 0x00002008,
-	VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
-	VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
-	VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
-	TSC_OFFSET                      = 0x00002010,
-	TSC_OFFSET_HIGH                 = 0x00002011,
-	VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
-	VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
-	APIC_ACCESS_ADDR		= 0x00002014,
-	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
-	EPT_POINTER                     = 0x0000201a,
-	EPT_POINTER_HIGH                = 0x0000201b,
-	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
-	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
-	VMCS_LINK_POINTER               = 0x00002800,
-	VMCS_LINK_POINTER_HIGH          = 0x00002801,
-	GUEST_IA32_DEBUGCTL             = 0x00002802,
-	GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
-	GUEST_IA32_PAT			= 0x00002804,
-	GUEST_IA32_PAT_HIGH		= 0x00002805,
-	GUEST_PDPTR0                    = 0x0000280a,
-	GUEST_PDPTR0_HIGH               = 0x0000280b,
-	GUEST_PDPTR1                    = 0x0000280c,
-	GUEST_PDPTR1_HIGH               = 0x0000280d,
-	GUEST_PDPTR2                    = 0x0000280e,
-	GUEST_PDPTR2_HIGH               = 0x0000280f,
-	GUEST_PDPTR3                    = 0x00002810,
-	GUEST_PDPTR3_HIGH               = 0x00002811,
-	HOST_IA32_PAT			= 0x00002c00,
-	HOST_IA32_PAT_HIGH		= 0x00002c01,
-	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
-	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
-	EXCEPTION_BITMAP                = 0x00004004,
-	PAGE_FAULT_ERROR_CODE_MASK      = 0x00004006,
-	PAGE_FAULT_ERROR_CODE_MATCH     = 0x00004008,
-	CR3_TARGET_COUNT                = 0x0000400a,
-	VM_EXIT_CONTROLS                = 0x0000400c,
-	VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
-	VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
-	VM_ENTRY_CONTROLS               = 0x00004012,
-	VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
-	VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
-	VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
-	VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
-	TPR_THRESHOLD                   = 0x0000401c,
-	SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
-	VM_INSTRUCTION_ERROR            = 0x00004400,
-	VM_EXIT_REASON                  = 0x00004402,
-	VM_EXIT_INTR_INFO               = 0x00004404,
-	VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
-	IDT_VECTORING_INFO_FIELD        = 0x00004408,
-	IDT_VECTORING_ERROR_CODE        = 0x0000440a,
-	VM_EXIT_INSTRUCTION_LEN         = 0x0000440c,
-	VMX_INSTRUCTION_INFO            = 0x0000440e,
-	GUEST_ES_LIMIT                  = 0x00004800,
-	GUEST_CS_LIMIT                  = 0x00004802,
-	GUEST_SS_LIMIT                  = 0x00004804,
-	GUEST_DS_LIMIT                  = 0x00004806,
-	GUEST_FS_LIMIT                  = 0x00004808,
-	GUEST_GS_LIMIT                  = 0x0000480a,
-	GUEST_LDTR_LIMIT                = 0x0000480c,
-	GUEST_TR_LIMIT                  = 0x0000480e,
-	GUEST_GDTR_LIMIT                = 0x00004810,
-	GUEST_IDTR_LIMIT                = 0x00004812,
-	GUEST_ES_AR_BYTES               = 0x00004814,
-	GUEST_CS_AR_BYTES               = 0x00004816,
-	GUEST_SS_AR_BYTES               = 0x00004818,
-	GUEST_DS_AR_BYTES               = 0x0000481a,
-	GUEST_FS_AR_BYTES               = 0x0000481c,
-	GUEST_GS_AR_BYTES               = 0x0000481e,
-	GUEST_LDTR_AR_BYTES             = 0x00004820,
-	GUEST_TR_AR_BYTES               = 0x00004822,
-	GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
-	GUEST_ACTIVITY_STATE            = 0X00004826,
-	GUEST_SYSENTER_CS               = 0x0000482A,
-	HOST_IA32_SYSENTER_CS           = 0x00004c00,
-	CR0_GUEST_HOST_MASK             = 0x00006000,
-	CR4_GUEST_HOST_MASK             = 0x00006002,
-	CR0_READ_SHADOW                 = 0x00006004,
-	CR4_READ_SHADOW                 = 0x00006006,
-	CR3_TARGET_VALUE0               = 0x00006008,
-	CR3_TARGET_VALUE1               = 0x0000600a,
-	CR3_TARGET_VALUE2               = 0x0000600c,
-	CR3_TARGET_VALUE3               = 0x0000600e,
-	EXIT_QUALIFICATION              = 0x00006400,
-	GUEST_LINEAR_ADDRESS            = 0x0000640a,
-	GUEST_CR0                       = 0x00006800,
-	GUEST_CR3                       = 0x00006802,
-	GUEST_CR4                       = 0x00006804,
-	GUEST_ES_BASE                   = 0x00006806,
-	GUEST_CS_BASE                   = 0x00006808,
-	GUEST_SS_BASE                   = 0x0000680a,
-	GUEST_DS_BASE                   = 0x0000680c,
-	GUEST_FS_BASE                   = 0x0000680e,
-	GUEST_GS_BASE                   = 0x00006810,
-	GUEST_LDTR_BASE                 = 0x00006812,
-	GUEST_TR_BASE                   = 0x00006814,
-	GUEST_GDTR_BASE                 = 0x00006816,
-	GUEST_IDTR_BASE                 = 0x00006818,
-	GUEST_DR7                       = 0x0000681a,
-	GUEST_RSP                       = 0x0000681c,
-	GUEST_RIP                       = 0x0000681e,
-	GUEST_RFLAGS                    = 0x00006820,
-	GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
-	GUEST_SYSENTER_ESP              = 0x00006824,
-	GUEST_SYSENTER_EIP              = 0x00006826,
-	HOST_CR0                        = 0x00006c00,
-	HOST_CR3                        = 0x00006c02,
-	HOST_CR4                        = 0x00006c04,
-	HOST_FS_BASE                    = 0x00006c06,
-	HOST_GS_BASE                    = 0x00006c08,
-	HOST_TR_BASE                    = 0x00006c0a,
-	HOST_GDTR_BASE                  = 0x00006c0c,
-	HOST_IDTR_BASE                  = 0x00006c0e,
-	HOST_IA32_SYSENTER_ESP          = 0x00006c10,
-	HOST_IA32_SYSENTER_EIP          = 0x00006c12,
-	HOST_RSP                        = 0x00006c14,
-	HOST_RIP                        = 0x00006c16,
-};
-
-#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
-
-#define EXIT_REASON_EXCEPTION_NMI       0
-#define EXIT_REASON_EXTERNAL_INTERRUPT  1
-#define EXIT_REASON_TRIPLE_FAULT        2
-
-#define EXIT_REASON_PENDING_INTERRUPT   7
-#define EXIT_REASON_NMI_WINDOW		8
-#define EXIT_REASON_TASK_SWITCH         9
-#define EXIT_REASON_CPUID               10
-#define EXIT_REASON_HLT                 12
-#define EXIT_REASON_INVLPG              14
-#define EXIT_REASON_RDPMC               15
-#define EXIT_REASON_RDTSC               16
-#define EXIT_REASON_VMCALL              18
-#define EXIT_REASON_VMCLEAR             19
-#define EXIT_REASON_VMLAUNCH            20
-#define EXIT_REASON_VMPTRLD             21
-#define EXIT_REASON_VMPTRST             22
-#define EXIT_REASON_VMREAD              23
-#define EXIT_REASON_VMRESUME            24
-#define EXIT_REASON_VMWRITE             25
-#define EXIT_REASON_VMOFF               26
-#define EXIT_REASON_VMON                27
-#define EXIT_REASON_CR_ACCESS           28
-#define EXIT_REASON_DR_ACCESS           29
-#define EXIT_REASON_IO_INSTRUCTION      30
-#define EXIT_REASON_MSR_READ            31
-#define EXIT_REASON_MSR_WRITE           32
-#define EXIT_REASON_MWAIT_INSTRUCTION   36
-#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
-#define EXIT_REASON_APIC_ACCESS         44
-#define EXIT_REASON_EPT_VIOLATION       48
-#define EXIT_REASON_EPT_MISCONFIG       49
-#define EXIT_REASON_WBINVD		54
-
-/*
- * Interruption-information format
- */
-#define INTR_INFO_VECTOR_MASK           0xff            /* 7:0 */
-#define INTR_INFO_INTR_TYPE_MASK        0x700           /* 10:8 */
-#define INTR_INFO_DELIVER_CODE_MASK     0x800           /* 11 */
-#define INTR_INFO_UNBLOCK_NMI		0x1000		/* 12 */
-#define INTR_INFO_VALID_MASK            0x80000000      /* 31 */
-#define INTR_INFO_RESVD_BITS_MASK       0x7ffff000
-
-#define VECTORING_INFO_VECTOR_MASK           	INTR_INFO_VECTOR_MASK
-#define VECTORING_INFO_TYPE_MASK        	INTR_INFO_INTR_TYPE_MASK
-#define VECTORING_INFO_DELIVER_CODE_MASK    	INTR_INFO_DELIVER_CODE_MASK
-#define VECTORING_INFO_VALID_MASK       	INTR_INFO_VALID_MASK
-
-#define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
-#define INTR_TYPE_NMI_INTR		(2 << 8) /* NMI */
-#define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
-#define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
-
-/* GUEST_INTERRUPTIBILITY_INFO flags. */
-#define GUEST_INTR_STATE_STI		0x00000001
-#define GUEST_INTR_STATE_MOV_SS		0x00000002
-#define GUEST_INTR_STATE_SMI		0x00000004
-#define GUEST_INTR_STATE_NMI		0x00000008
-
-/*
- * Exit Qualifications for MOV for Control Register Access
- */
-#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control reg.*/
-#define CONTROL_REG_ACCESS_TYPE         0x30    /* 5:4, access type */
-#define CONTROL_REG_ACCESS_REG          0xf00   /* 10:8, general purpose reg. */
-#define LMSW_SOURCE_DATA_SHIFT 16
-#define LMSW_SOURCE_DATA  (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */
-#define REG_EAX                         (0 << 8)
-#define REG_ECX                         (1 << 8)
-#define REG_EDX                         (2 << 8)
-#define REG_EBX                         (3 << 8)
-#define REG_ESP                         (4 << 8)
-#define REG_EBP                         (5 << 8)
-#define REG_ESI                         (6 << 8)
-#define REG_EDI                         (7 << 8)
-#define REG_R8                         (8 << 8)
-#define REG_R9                         (9 << 8)
-#define REG_R10                        (10 << 8)
-#define REG_R11                        (11 << 8)
-#define REG_R12                        (12 << 8)
-#define REG_R13                        (13 << 8)
-#define REG_R14                        (14 << 8)
-#define REG_R15                        (15 << 8)
-
-/*
- * Exit Qualifications for MOV for Debug Register Access
- */
-#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug reg. */
-#define DEBUG_REG_ACCESS_TYPE           0x10    /* 4, direction of access */
-#define TYPE_MOV_TO_DR                  (0 << 4)
-#define TYPE_MOV_FROM_DR                (1 << 4)
-#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose reg. */
-
-
-/* segment AR */
-#define SEGMENT_AR_L_MASK (1 << 13)
-
-#define AR_TYPE_ACCESSES_MASK 1
-#define AR_TYPE_READABLE_MASK (1 << 1)
-#define AR_TYPE_WRITEABLE_MASK (1 << 2)
-#define AR_TYPE_CODE_MASK (1 << 3)
-#define AR_TYPE_MASK 0x0f
-#define AR_TYPE_BUSY_64_TSS 11
-#define AR_TYPE_BUSY_32_TSS 11
-#define AR_TYPE_BUSY_16_TSS 3
-#define AR_TYPE_LDT 2
-
-#define AR_UNUSABLE_MASK (1 << 16)
-#define AR_S_MASK (1 << 4)
-#define AR_P_MASK (1 << 7)
-#define AR_L_MASK (1 << 13)
-#define AR_DB_MASK (1 << 14)
-#define AR_G_MASK (1 << 15)
-#define AR_DPL_SHIFT 5
-#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3)
-
-#define AR_RESERVD_MASK 0xfffe0f00
-
-#define TSS_PRIVATE_MEMSLOT			(KVM_MEMORY_SLOTS + 0)
-#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 1)
-#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	(KVM_MEMORY_SLOTS + 2)
-
-#define VMX_NR_VPIDS				(1 << 16)
-#define VMX_VPID_EXTENT_SINGLE_CONTEXT		1
-#define VMX_VPID_EXTENT_ALL_CONTEXT		2
-
-#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR		0
-#define VMX_EPT_EXTENT_CONTEXT			1
-#define VMX_EPT_EXTENT_GLOBAL			2
-#define VMX_EPT_EXTENT_INDIVIDUAL_BIT		(1ull << 24)
-#define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
-#define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
-#define VMX_EPT_DEFAULT_GAW			3
-#define VMX_EPT_MAX_GAW				0x4
-#define VMX_EPT_MT_EPTE_SHIFT			3
-#define VMX_EPT_GAW_EPTP_SHIFT			3
-#define VMX_EPT_DEFAULT_MT			0x6ull
-#define VMX_EPT_READABLE_MASK			0x1ull
-#define VMX_EPT_WRITABLE_MASK			0x2ull
-#define VMX_EPT_EXECUTABLE_MASK			0x4ull
-#define VMX_EPT_IGMT_BIT    			(1ull << 6)
-
-#define VMX_EPT_IDENTITY_PAGETABLE_ADDR		0xfffbc000ul
-
-#endif
-- 
cgit v1.2.3


From c2cedf7be2017e3264c93a4c0d75b1d96d0d7104 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:14 -0200
Subject: KVM: SVM: move svm.h to include/asm

svm.h will be used by core code that is independent of KVM, so I am
moving it outside the arch/x86/kvm directory.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/svm.h | 328 +++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/kvm_svm.h     |   2 +-
 arch/x86/kvm/svm.h         | 328 ---------------------------------------------
 3 files changed, 329 insertions(+), 329 deletions(-)
 create mode 100644 arch/x86/include/asm/svm.h
 delete mode 100644 arch/x86/kvm/svm.h

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
new file mode 100644
index 00000000000..1b8afa78e86
--- /dev/null
+++ b/arch/x86/include/asm/svm.h
@@ -0,0 +1,328 @@
+#ifndef __SVM_H
+#define __SVM_H
+
+enum {
+	INTERCEPT_INTR,
+	INTERCEPT_NMI,
+	INTERCEPT_SMI,
+	INTERCEPT_INIT,
+	INTERCEPT_VINTR,
+	INTERCEPT_SELECTIVE_CR0,
+	INTERCEPT_STORE_IDTR,
+	INTERCEPT_STORE_GDTR,
+	INTERCEPT_STORE_LDTR,
+	INTERCEPT_STORE_TR,
+	INTERCEPT_LOAD_IDTR,
+	INTERCEPT_LOAD_GDTR,
+	INTERCEPT_LOAD_LDTR,
+	INTERCEPT_LOAD_TR,
+	INTERCEPT_RDTSC,
+	INTERCEPT_RDPMC,
+	INTERCEPT_PUSHF,
+	INTERCEPT_POPF,
+	INTERCEPT_CPUID,
+	INTERCEPT_RSM,
+	INTERCEPT_IRET,
+	INTERCEPT_INTn,
+	INTERCEPT_INVD,
+	INTERCEPT_PAUSE,
+	INTERCEPT_HLT,
+	INTERCEPT_INVLPG,
+	INTERCEPT_INVLPGA,
+	INTERCEPT_IOIO_PROT,
+	INTERCEPT_MSR_PROT,
+	INTERCEPT_TASK_SWITCH,
+	INTERCEPT_FERR_FREEZE,
+	INTERCEPT_SHUTDOWN,
+	INTERCEPT_VMRUN,
+	INTERCEPT_VMMCALL,
+	INTERCEPT_VMLOAD,
+	INTERCEPT_VMSAVE,
+	INTERCEPT_STGI,
+	INTERCEPT_CLGI,
+	INTERCEPT_SKINIT,
+	INTERCEPT_RDTSCP,
+	INTERCEPT_ICEBP,
+	INTERCEPT_WBINVD,
+	INTERCEPT_MONITOR,
+	INTERCEPT_MWAIT,
+	INTERCEPT_MWAIT_COND,
+};
+
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+	u16 intercept_cr_read;
+	u16 intercept_cr_write;
+	u16 intercept_dr_read;
+	u16 intercept_dr_write;
+	u32 intercept_exceptions;
+	u64 intercept;
+	u8 reserved_1[44];
+	u64 iopm_base_pa;
+	u64 msrpm_base_pa;
+	u64 tsc_offset;
+	u32 asid;
+	u8 tlb_ctl;
+	u8 reserved_2[3];
+	u32 int_ctl;
+	u32 int_vector;
+	u32 int_state;
+	u8 reserved_3[4];
+	u32 exit_code;
+	u32 exit_code_hi;
+	u64 exit_info_1;
+	u64 exit_info_2;
+	u32 exit_int_info;
+	u32 exit_int_info_err;
+	u64 nested_ctl;
+	u8 reserved_4[16];
+	u32 event_inj;
+	u32 event_inj_err;
+	u64 nested_cr3;
+	u64 lbr_ctl;
+	u8 reserved_5[832];
+};
+
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+	u16 selector;
+	u16 attrib;
+	u32 limit;
+	u64 base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+	struct vmcb_seg es;
+	struct vmcb_seg cs;
+	struct vmcb_seg ss;
+	struct vmcb_seg ds;
+	struct vmcb_seg fs;
+	struct vmcb_seg gs;
+	struct vmcb_seg gdtr;
+	struct vmcb_seg ldtr;
+	struct vmcb_seg idtr;
+	struct vmcb_seg tr;
+	u8 reserved_1[43];
+	u8 cpl;
+	u8 reserved_2[4];
+	u64 efer;
+	u8 reserved_3[112];
+	u64 cr4;
+	u64 cr3;
+	u64 cr0;
+	u64 dr7;
+	u64 dr6;
+	u64 rflags;
+	u64 rip;
+	u8 reserved_4[88];
+	u64 rsp;
+	u8 reserved_5[24];
+	u64 rax;
+	u64 star;
+	u64 lstar;
+	u64 cstar;
+	u64 sfmask;
+	u64 kernel_gs_base;
+	u64 sysenter_cs;
+	u64 sysenter_esp;
+	u64 sysenter_eip;
+	u64 cr2;
+	u8 reserved_6[32];
+	u64 g_pat;
+	u64 dbgctl;
+	u64 br_from;
+	u64 br_to;
+	u64 last_excp_from;
+	u64 last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+	struct vmcb_control_area control;
+	struct vmcb_save_area save;
+};
+
+#define SVM_CPUID_FEATURE_SHIFT 2
+#define SVM_CPUID_FUNC 0x8000000a
+
+#define MSR_EFER_SVME_MASK (1ULL << 12)
+#define MSR_VM_CR       0xc0010114
+#define MSR_VM_HSAVE_PA 0xc0010117ULL
+
+#define SVM_VM_CR_SVM_DISABLE 4
+
+#define SVM_SELECTOR_S_SHIFT 4
+#define SVM_SELECTOR_DPL_SHIFT 5
+#define SVM_SELECTOR_P_SHIFT 7
+#define SVM_SELECTOR_AVL_SHIFT 8
+#define SVM_SELECTOR_L_SHIFT 9
+#define SVM_SELECTOR_DB_SHIFT 10
+#define SVM_SELECTOR_G_SHIFT 11
+
+#define SVM_SELECTOR_TYPE_MASK (0xf)
+#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
+#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
+#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
+#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
+#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
+#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
+#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
+
+#define SVM_SELECTOR_WRITE_MASK (1 << 1)
+#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
+#define SVM_SELECTOR_CODE_MASK (1 << 3)
+
+#define INTERCEPT_CR0_MASK 1
+#define INTERCEPT_CR3_MASK (1 << 3)
+#define INTERCEPT_CR4_MASK (1 << 4)
+#define INTERCEPT_CR8_MASK (1 << 8)
+
+#define INTERCEPT_DR0_MASK 1
+#define INTERCEPT_DR1_MASK (1 << 1)
+#define INTERCEPT_DR2_MASK (1 << 2)
+#define INTERCEPT_DR3_MASK (1 << 3)
+#define INTERCEPT_DR4_MASK (1 << 4)
+#define INTERCEPT_DR5_MASK (1 << 5)
+#define INTERCEPT_DR6_MASK (1 << 6)
+#define INTERCEPT_DR7_MASK (1 << 7)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+
+#define	SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define	SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define	SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define	SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
+#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
+
+#define	SVM_EXIT_READ_CR0 	0x000
+#define	SVM_EXIT_READ_CR3 	0x003
+#define	SVM_EXIT_READ_CR4 	0x004
+#define	SVM_EXIT_READ_CR8 	0x008
+#define	SVM_EXIT_WRITE_CR0 	0x010
+#define	SVM_EXIT_WRITE_CR3 	0x013
+#define	SVM_EXIT_WRITE_CR4 	0x014
+#define	SVM_EXIT_WRITE_CR8 	0x018
+#define	SVM_EXIT_READ_DR0 	0x020
+#define	SVM_EXIT_READ_DR1 	0x021
+#define	SVM_EXIT_READ_DR2 	0x022
+#define	SVM_EXIT_READ_DR3 	0x023
+#define	SVM_EXIT_READ_DR4 	0x024
+#define	SVM_EXIT_READ_DR5 	0x025
+#define	SVM_EXIT_READ_DR6 	0x026
+#define	SVM_EXIT_READ_DR7 	0x027
+#define	SVM_EXIT_WRITE_DR0 	0x030
+#define	SVM_EXIT_WRITE_DR1 	0x031
+#define	SVM_EXIT_WRITE_DR2 	0x032
+#define	SVM_EXIT_WRITE_DR3 	0x033
+#define	SVM_EXIT_WRITE_DR4 	0x034
+#define	SVM_EXIT_WRITE_DR5 	0x035
+#define	SVM_EXIT_WRITE_DR6 	0x036
+#define	SVM_EXIT_WRITE_DR7 	0x037
+#define SVM_EXIT_EXCP_BASE      0x040
+#define SVM_EXIT_INTR		0x060
+#define SVM_EXIT_NMI		0x061
+#define SVM_EXIT_SMI		0x062
+#define SVM_EXIT_INIT		0x063
+#define SVM_EXIT_VINTR		0x064
+#define SVM_EXIT_CR0_SEL_WRITE	0x065
+#define SVM_EXIT_IDTR_READ	0x066
+#define SVM_EXIT_GDTR_READ	0x067
+#define SVM_EXIT_LDTR_READ	0x068
+#define SVM_EXIT_TR_READ	0x069
+#define SVM_EXIT_IDTR_WRITE	0x06a
+#define SVM_EXIT_GDTR_WRITE	0x06b
+#define SVM_EXIT_LDTR_WRITE	0x06c
+#define SVM_EXIT_TR_WRITE	0x06d
+#define SVM_EXIT_RDTSC		0x06e
+#define SVM_EXIT_RDPMC		0x06f
+#define SVM_EXIT_PUSHF		0x070
+#define SVM_EXIT_POPF		0x071
+#define SVM_EXIT_CPUID		0x072
+#define SVM_EXIT_RSM		0x073
+#define SVM_EXIT_IRET		0x074
+#define SVM_EXIT_SWINT		0x075
+#define SVM_EXIT_INVD		0x076
+#define SVM_EXIT_PAUSE		0x077
+#define SVM_EXIT_HLT		0x078
+#define SVM_EXIT_INVLPG		0x079
+#define SVM_EXIT_INVLPGA	0x07a
+#define SVM_EXIT_IOIO		0x07b
+#define SVM_EXIT_MSR		0x07c
+#define SVM_EXIT_TASK_SWITCH	0x07d
+#define SVM_EXIT_FERR_FREEZE	0x07e
+#define SVM_EXIT_SHUTDOWN	0x07f
+#define SVM_EXIT_VMRUN		0x080
+#define SVM_EXIT_VMMCALL	0x081
+#define SVM_EXIT_VMLOAD		0x082
+#define SVM_EXIT_VMSAVE		0x083
+#define SVM_EXIT_STGI		0x084
+#define SVM_EXIT_CLGI		0x085
+#define SVM_EXIT_SKINIT		0x086
+#define SVM_EXIT_RDTSCP		0x087
+#define SVM_EXIT_ICEBP		0x088
+#define SVM_EXIT_WBINVD		0x089
+#define SVM_EXIT_MONITOR	0x08a
+#define SVM_EXIT_MWAIT		0x08b
+#define SVM_EXIT_MWAIT_COND	0x08c
+#define SVM_EXIT_NPF  		0x400
+
+#define SVM_EXIT_ERR		-1
+
+#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
+
+#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
+#define SVM_VMRUN  ".byte 0x0f, 0x01, 0xd8"
+#define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
+#define SVM_CLGI   ".byte 0x0f, 0x01, 0xdd"
+#define SVM_STGI   ".byte 0x0f, 0x01, 0xdc"
+#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
+
+#endif
+
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 65ef0fc2c03..8e5ee99551f 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -7,7 +7,7 @@
 #include <linux/kvm_host.h>
 #include <asm/msr.h>
 
-#include "svm.h"
+#include <asm/svm.h>
 
 static const u32 host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/svm.h b/arch/x86/kvm/svm.h
deleted file mode 100644
index 1b8afa78e86..00000000000
--- a/arch/x86/kvm/svm.h
+++ /dev/null
@@ -1,328 +0,0 @@
-#ifndef __SVM_H
-#define __SVM_H
-
-enum {
-	INTERCEPT_INTR,
-	INTERCEPT_NMI,
-	INTERCEPT_SMI,
-	INTERCEPT_INIT,
-	INTERCEPT_VINTR,
-	INTERCEPT_SELECTIVE_CR0,
-	INTERCEPT_STORE_IDTR,
-	INTERCEPT_STORE_GDTR,
-	INTERCEPT_STORE_LDTR,
-	INTERCEPT_STORE_TR,
-	INTERCEPT_LOAD_IDTR,
-	INTERCEPT_LOAD_GDTR,
-	INTERCEPT_LOAD_LDTR,
-	INTERCEPT_LOAD_TR,
-	INTERCEPT_RDTSC,
-	INTERCEPT_RDPMC,
-	INTERCEPT_PUSHF,
-	INTERCEPT_POPF,
-	INTERCEPT_CPUID,
-	INTERCEPT_RSM,
-	INTERCEPT_IRET,
-	INTERCEPT_INTn,
-	INTERCEPT_INVD,
-	INTERCEPT_PAUSE,
-	INTERCEPT_HLT,
-	INTERCEPT_INVLPG,
-	INTERCEPT_INVLPGA,
-	INTERCEPT_IOIO_PROT,
-	INTERCEPT_MSR_PROT,
-	INTERCEPT_TASK_SWITCH,
-	INTERCEPT_FERR_FREEZE,
-	INTERCEPT_SHUTDOWN,
-	INTERCEPT_VMRUN,
-	INTERCEPT_VMMCALL,
-	INTERCEPT_VMLOAD,
-	INTERCEPT_VMSAVE,
-	INTERCEPT_STGI,
-	INTERCEPT_CLGI,
-	INTERCEPT_SKINIT,
-	INTERCEPT_RDTSCP,
-	INTERCEPT_ICEBP,
-	INTERCEPT_WBINVD,
-	INTERCEPT_MONITOR,
-	INTERCEPT_MWAIT,
-	INTERCEPT_MWAIT_COND,
-};
-
-
-struct __attribute__ ((__packed__)) vmcb_control_area {
-	u16 intercept_cr_read;
-	u16 intercept_cr_write;
-	u16 intercept_dr_read;
-	u16 intercept_dr_write;
-	u32 intercept_exceptions;
-	u64 intercept;
-	u8 reserved_1[44];
-	u64 iopm_base_pa;
-	u64 msrpm_base_pa;
-	u64 tsc_offset;
-	u32 asid;
-	u8 tlb_ctl;
-	u8 reserved_2[3];
-	u32 int_ctl;
-	u32 int_vector;
-	u32 int_state;
-	u8 reserved_3[4];
-	u32 exit_code;
-	u32 exit_code_hi;
-	u64 exit_info_1;
-	u64 exit_info_2;
-	u32 exit_int_info;
-	u32 exit_int_info_err;
-	u64 nested_ctl;
-	u8 reserved_4[16];
-	u32 event_inj;
-	u32 event_inj_err;
-	u64 nested_cr3;
-	u64 lbr_ctl;
-	u8 reserved_5[832];
-};
-
-
-#define TLB_CONTROL_DO_NOTHING 0
-#define TLB_CONTROL_FLUSH_ALL_ASID 1
-
-#define V_TPR_MASK 0x0f
-
-#define V_IRQ_SHIFT 8
-#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
-
-#define V_INTR_PRIO_SHIFT 16
-#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
-
-#define V_IGN_TPR_SHIFT 20
-#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
-
-#define V_INTR_MASKING_SHIFT 24
-#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
-
-#define SVM_INTERRUPT_SHADOW_MASK 1
-
-#define SVM_IOIO_STR_SHIFT 2
-#define SVM_IOIO_REP_SHIFT 3
-#define SVM_IOIO_SIZE_SHIFT 4
-#define SVM_IOIO_ASIZE_SHIFT 7
-
-#define SVM_IOIO_TYPE_MASK 1
-#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
-#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
-#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
-#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
-
-struct __attribute__ ((__packed__)) vmcb_seg {
-	u16 selector;
-	u16 attrib;
-	u32 limit;
-	u64 base;
-};
-
-struct __attribute__ ((__packed__)) vmcb_save_area {
-	struct vmcb_seg es;
-	struct vmcb_seg cs;
-	struct vmcb_seg ss;
-	struct vmcb_seg ds;
-	struct vmcb_seg fs;
-	struct vmcb_seg gs;
-	struct vmcb_seg gdtr;
-	struct vmcb_seg ldtr;
-	struct vmcb_seg idtr;
-	struct vmcb_seg tr;
-	u8 reserved_1[43];
-	u8 cpl;
-	u8 reserved_2[4];
-	u64 efer;
-	u8 reserved_3[112];
-	u64 cr4;
-	u64 cr3;
-	u64 cr0;
-	u64 dr7;
-	u64 dr6;
-	u64 rflags;
-	u64 rip;
-	u8 reserved_4[88];
-	u64 rsp;
-	u8 reserved_5[24];
-	u64 rax;
-	u64 star;
-	u64 lstar;
-	u64 cstar;
-	u64 sfmask;
-	u64 kernel_gs_base;
-	u64 sysenter_cs;
-	u64 sysenter_esp;
-	u64 sysenter_eip;
-	u64 cr2;
-	u8 reserved_6[32];
-	u64 g_pat;
-	u64 dbgctl;
-	u64 br_from;
-	u64 br_to;
-	u64 last_excp_from;
-	u64 last_excp_to;
-};
-
-struct __attribute__ ((__packed__)) vmcb {
-	struct vmcb_control_area control;
-	struct vmcb_save_area save;
-};
-
-#define SVM_CPUID_FEATURE_SHIFT 2
-#define SVM_CPUID_FUNC 0x8000000a
-
-#define MSR_EFER_SVME_MASK (1ULL << 12)
-#define MSR_VM_CR       0xc0010114
-#define MSR_VM_HSAVE_PA 0xc0010117ULL
-
-#define SVM_VM_CR_SVM_DISABLE 4
-
-#define SVM_SELECTOR_S_SHIFT 4
-#define SVM_SELECTOR_DPL_SHIFT 5
-#define SVM_SELECTOR_P_SHIFT 7
-#define SVM_SELECTOR_AVL_SHIFT 8
-#define SVM_SELECTOR_L_SHIFT 9
-#define SVM_SELECTOR_DB_SHIFT 10
-#define SVM_SELECTOR_G_SHIFT 11
-
-#define SVM_SELECTOR_TYPE_MASK (0xf)
-#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
-#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
-#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
-#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
-#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
-#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
-#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
-
-#define SVM_SELECTOR_WRITE_MASK (1 << 1)
-#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
-#define SVM_SELECTOR_CODE_MASK (1 << 3)
-
-#define INTERCEPT_CR0_MASK 1
-#define INTERCEPT_CR3_MASK (1 << 3)
-#define INTERCEPT_CR4_MASK (1 << 4)
-#define INTERCEPT_CR8_MASK (1 << 8)
-
-#define INTERCEPT_DR0_MASK 1
-#define INTERCEPT_DR1_MASK (1 << 1)
-#define INTERCEPT_DR2_MASK (1 << 2)
-#define INTERCEPT_DR3_MASK (1 << 3)
-#define INTERCEPT_DR4_MASK (1 << 4)
-#define INTERCEPT_DR5_MASK (1 << 5)
-#define INTERCEPT_DR6_MASK (1 << 6)
-#define INTERCEPT_DR7_MASK (1 << 7)
-
-#define SVM_EVTINJ_VEC_MASK 0xff
-
-#define SVM_EVTINJ_TYPE_SHIFT 8
-#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_VALID (1 << 31)
-#define SVM_EVTINJ_VALID_ERR (1 << 11)
-
-#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
-
-#define	SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
-#define	SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
-#define	SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
-#define	SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
-
-#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
-#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
-
-#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
-#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
-
-#define	SVM_EXIT_READ_CR0 	0x000
-#define	SVM_EXIT_READ_CR3 	0x003
-#define	SVM_EXIT_READ_CR4 	0x004
-#define	SVM_EXIT_READ_CR8 	0x008
-#define	SVM_EXIT_WRITE_CR0 	0x010
-#define	SVM_EXIT_WRITE_CR3 	0x013
-#define	SVM_EXIT_WRITE_CR4 	0x014
-#define	SVM_EXIT_WRITE_CR8 	0x018
-#define	SVM_EXIT_READ_DR0 	0x020
-#define	SVM_EXIT_READ_DR1 	0x021
-#define	SVM_EXIT_READ_DR2 	0x022
-#define	SVM_EXIT_READ_DR3 	0x023
-#define	SVM_EXIT_READ_DR4 	0x024
-#define	SVM_EXIT_READ_DR5 	0x025
-#define	SVM_EXIT_READ_DR6 	0x026
-#define	SVM_EXIT_READ_DR7 	0x027
-#define	SVM_EXIT_WRITE_DR0 	0x030
-#define	SVM_EXIT_WRITE_DR1 	0x031
-#define	SVM_EXIT_WRITE_DR2 	0x032
-#define	SVM_EXIT_WRITE_DR3 	0x033
-#define	SVM_EXIT_WRITE_DR4 	0x034
-#define	SVM_EXIT_WRITE_DR5 	0x035
-#define	SVM_EXIT_WRITE_DR6 	0x036
-#define	SVM_EXIT_WRITE_DR7 	0x037
-#define SVM_EXIT_EXCP_BASE      0x040
-#define SVM_EXIT_INTR		0x060
-#define SVM_EXIT_NMI		0x061
-#define SVM_EXIT_SMI		0x062
-#define SVM_EXIT_INIT		0x063
-#define SVM_EXIT_VINTR		0x064
-#define SVM_EXIT_CR0_SEL_WRITE	0x065
-#define SVM_EXIT_IDTR_READ	0x066
-#define SVM_EXIT_GDTR_READ	0x067
-#define SVM_EXIT_LDTR_READ	0x068
-#define SVM_EXIT_TR_READ	0x069
-#define SVM_EXIT_IDTR_WRITE	0x06a
-#define SVM_EXIT_GDTR_WRITE	0x06b
-#define SVM_EXIT_LDTR_WRITE	0x06c
-#define SVM_EXIT_TR_WRITE	0x06d
-#define SVM_EXIT_RDTSC		0x06e
-#define SVM_EXIT_RDPMC		0x06f
-#define SVM_EXIT_PUSHF		0x070
-#define SVM_EXIT_POPF		0x071
-#define SVM_EXIT_CPUID		0x072
-#define SVM_EXIT_RSM		0x073
-#define SVM_EXIT_IRET		0x074
-#define SVM_EXIT_SWINT		0x075
-#define SVM_EXIT_INVD		0x076
-#define SVM_EXIT_PAUSE		0x077
-#define SVM_EXIT_HLT		0x078
-#define SVM_EXIT_INVLPG		0x079
-#define SVM_EXIT_INVLPGA	0x07a
-#define SVM_EXIT_IOIO		0x07b
-#define SVM_EXIT_MSR		0x07c
-#define SVM_EXIT_TASK_SWITCH	0x07d
-#define SVM_EXIT_FERR_FREEZE	0x07e
-#define SVM_EXIT_SHUTDOWN	0x07f
-#define SVM_EXIT_VMRUN		0x080
-#define SVM_EXIT_VMMCALL	0x081
-#define SVM_EXIT_VMLOAD		0x082
-#define SVM_EXIT_VMSAVE		0x083
-#define SVM_EXIT_STGI		0x084
-#define SVM_EXIT_CLGI		0x085
-#define SVM_EXIT_SKINIT		0x086
-#define SVM_EXIT_RDTSCP		0x087
-#define SVM_EXIT_ICEBP		0x088
-#define SVM_EXIT_WBINVD		0x089
-#define SVM_EXIT_MONITOR	0x08a
-#define SVM_EXIT_MWAIT		0x08b
-#define SVM_EXIT_MWAIT_COND	0x08c
-#define SVM_EXIT_NPF  		0x400
-
-#define SVM_EXIT_ERR		-1
-
-#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
-
-#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
-#define SVM_VMRUN  ".byte 0x0f, 0x01, 0xd8"
-#define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
-#define SVM_CLGI   ".byte 0x0f, 0x01, 0xdd"
-#define SVM_STGI   ".byte 0x0f, 0x01, 0xdc"
-#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
-
-#endif
-
-- 
cgit v1.2.3


From eca70fc5671b226966dfb7ee9953d59199288566 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:15 -0200
Subject: KVM: VMX: move ASM_VMX_* definitions from asm/kvm_host.h to asm/vmx.h

Those definitions will be used by code outside KVM, so move it outside
of a KVM-specific source file.

Those definitions are used only on kvm/vmx.c, that already includes
asm/vmx.h, so they can be moved safely.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 12 ------------
 arch/x86/include/asm/vmx.h      | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 99e3cc149d2..f58f7ebdea8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -714,18 +714,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
 }
 
-#define ASM_VMX_VMCLEAR_RAX       ".byte 0x66, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
-#define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
-#define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
-#define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
-#define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
-#define ASM_VMX_VMXOFF            ".byte 0x0f, 0x01, 0xc4"
-#define ASM_VMX_VMXON_RAX         ".byte 0xf3, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_INVEPT		  ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
-#define ASM_VMX_INVVPID		  ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
-
 #define MSR_IA32_TIME_STAMP_COUNTER		0x010
 
 #define TSS_IOPB_BASE_OFFSET 0x66
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 3db236c26fa..d0238e6151d 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -364,4 +364,19 @@ enum vmcs_field {
 
 #define VMX_EPT_IDENTITY_PAGETABLE_ADDR		0xfffbc000ul
 
+
+#define ASM_VMX_VMCLEAR_RAX       ".byte 0x66, 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
+#define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
+#define ASM_VMX_VMPTRLD_RAX       ".byte 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMREAD_RDX_RAX    ".byte 0x0f, 0x78, 0xd0"
+#define ASM_VMX_VMWRITE_RAX_RDX   ".byte 0x0f, 0x79, 0xd0"
+#define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
+#define ASM_VMX_VMXOFF            ".byte 0x0f, 0x01, 0xc4"
+#define ASM_VMX_VMXON_RAX         ".byte 0xf3, 0x0f, 0xc7, 0x30"
+#define ASM_VMX_INVEPT		  ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
+#define ASM_VMX_INVVPID		  ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
+
+
+
 #endif
-- 
cgit v1.2.3


From 6210e37b122583643da335c0389f74098713e5ca Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:16 -0200
Subject: KVM: VMX: move cpu_has_kvm_support() to an inline on asm/virtext.h

It will be used by core code on kdump and reboot, to disable
vmx if needed.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/virtext.h | 31 +++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx.c             |  4 ++--
 2 files changed, 33 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/include/asm/virtext.h

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
new file mode 100644
index 00000000000..298b6a06110
--- /dev/null
+++ b/arch/x86/include/asm/virtext.h
@@ -0,0 +1,31 @@
+/* CPU virtualization extensions handling
+ *
+ * This should carry the code for handling CPU virtualization extensions
+ * that needs to live in the kernel core.
+ *
+ * Author: Eduardo Habkost <ehabkost@redhat.com>
+ *
+ * Copyright (C) 2008, Red Hat Inc.
+ *
+ * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+#ifndef _ASM_X86_VIRTEX_H
+#define _ASM_X86_VIRTEX_H
+
+#include <asm/processor.h>
+#include <asm/system.h>
+
+/*
+ * VMX functions:
+ */
+
+static inline int cpu_has_vmx(void)
+{
+	unsigned long ecx = cpuid_ecx(1);
+	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
+}
+
+#endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ec71f6464cf..defaeeb3f75 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -31,6 +31,7 @@
 #include <asm/io.h>
 #include <asm/desc.h>
 #include <asm/vmx.h>
+#include <asm/virtext.h>
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
@@ -1044,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu)
 
 static __init int cpu_has_kvm_support(void)
 {
-	unsigned long ecx = cpuid_ecx(1);
-	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
+	return cpu_has_vmx();
 }
 
 static __init int vmx_disabled_by_bios(void)
-- 
cgit v1.2.3


From 1e9931146c748420343aeefadb3bb17bd1c14a37 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:17 -0200
Subject: x86: asm/virtext.h: add cpu_vmxoff() inline function

Unfortunately we can't use exactly the same code from vmx
hardware_disable(), because the KVM function uses the
__kvm_handle_fault_on_reboot() tricks.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/virtext.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 298b6a06110..7dee5b59930 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -18,6 +18,8 @@
 #include <asm/processor.h>
 #include <asm/system.h>
 
+#include <asm/vmx.h>
+
 /*
  * VMX functions:
  */
@@ -28,4 +30,17 @@ static inline int cpu_has_vmx(void)
 	return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
 }
 
+
+/** Disable VMX on the current CPU
+ *
+ * vmxoff causes a undefined-opcode exception if vmxon was not run
+ * on the CPU previously. Only call this function if you know VMX
+ * is enabled.
+ */
+static inline void cpu_vmxoff(void)
+{
+	asm volatile (ASM_VMX_VMXOFF : : : "cc");
+	write_cr4(read_cr4() & ~X86_CR4_VMXE);
+}
+
 #endif /* _ASM_X86_VIRTEX_H */
-- 
cgit v1.2.3


From 710ff4a855d0f3bf74b5b4a20328e2858a8a2968 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:18 -0200
Subject: KVM: VMX: extract kvm_cpu_vmxoff() from hardware_disable()

Along with some comments on why it is different from the core cpu_vmxoff()
function.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index defaeeb3f75..f5958a7823f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1091,13 +1091,22 @@ static void vmclear_local_vcpus(void)
 		__vcpu_clear(vmx);
 }
 
-static void hardware_disable(void *garbage)
+
+/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
+ * tricks.
+ */
+static void kvm_cpu_vmxoff(void)
 {
-	vmclear_local_vcpus();
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 	write_cr4(read_cr4() & ~X86_CR4_VMXE);
 }
 
+static void hardware_disable(void *garbage)
+{
+	vmclear_local_vcpus();
+	kvm_cpu_vmxoff();
+}
+
 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
 				      u32 msr, u32 *result)
 {
-- 
cgit v1.2.3


From 6aa07a0d77f6aafbe69e4e8609ffaf2b7ee1b591 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:19 -0200
Subject: x86: cpu_emergency_vmxoff() function

Add cpu_emergency_vmxoff() and its friends: cpu_vmx_enabled() and
__cpu_emergency_vmxoff().

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/virtext.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 7dee5b59930..6bcf0acb4ef 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -43,4 +43,27 @@ static inline void cpu_vmxoff(void)
 	write_cr4(read_cr4() & ~X86_CR4_VMXE);
 }
 
+static inline int cpu_vmx_enabled(void)
+{
+	return read_cr4() & X86_CR4_VMXE;
+}
+
+/** Disable VMX if it is enabled on the current CPU
+ *
+ * You shouldn't call this if cpu_has_vmx() returns 0.
+ */
+static inline void __cpu_emergency_vmxoff(void)
+{
+	if (cpu_vmx_enabled())
+		cpu_vmxoff();
+}
+
+/** Disable VMX if it is supported and enabled on the current CPU
+ */
+static inline void cpu_emergency_vmxoff(void)
+{
+	if (cpu_has_vmx())
+		__cpu_emergency_vmxoff();
+}
+
 #endif /* _ASM_X86_VIRTEX_H */
-- 
cgit v1.2.3


From 63d1142f8f69e39468bc6079ab2239e902828134 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:20 -0200
Subject: KVM: SVM: move has_svm() code to asm/virtext.h

Use a trick to keep the printk()s on has_svm() working as before. gcc
will take care of not generating code for the 'msg' stuff when the
function is called with a NULL msg argument.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/virtext.h | 41 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/svm.c             | 19 +++++--------------
 2 files changed, 46 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 6bcf0acb4ef..6f0d409c368 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -19,6 +19,7 @@
 #include <asm/system.h>
 
 #include <asm/vmx.h>
+#include <asm/svm.h>
 
 /*
  * VMX functions:
@@ -66,4 +67,44 @@ static inline void cpu_emergency_vmxoff(void)
 		__cpu_emergency_vmxoff();
 }
 
+
+
+
+/*
+ * SVM functions:
+ */
+
+/** Check if the CPU has SVM support
+ *
+ * You can use the 'msg' arg to get a message describing the problem,
+ * if the function returns zero. Simply pass NULL if you are not interested
+ * on the messages; gcc should take care of not generating code for
+ * the messages on this case.
+ */
+static inline int cpu_has_svm(const char **msg)
+{
+	uint32_t eax, ebx, ecx, edx;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+		if (msg)
+			*msg = "not amd";
+		return 0;
+	}
+
+	cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+	if (eax < SVM_CPUID_FUNC) {
+		if (msg)
+			*msg = "can't execute cpuid_8000000a";
+		return 0;
+	}
+
+	cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+	if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
+		if (msg)
+			*msg = "svm not available";
+		return 0;
+	}
+	return 1;
+}
+
 #endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f0ad4d4217e..0667c6d13d3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -28,6 +28,8 @@
 
 #include <asm/desc.h>
 
+#include <asm/virtext.h>
+
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
 MODULE_AUTHOR("Qumranet");
@@ -245,24 +247,13 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 
 static int has_svm(void)
 {
-	uint32_t eax, ebx, ecx, edx;
-
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
-		printk(KERN_INFO "has_svm: not amd\n");
-		return 0;
-	}
+	const char *msg;
 
-	cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
-	if (eax < SVM_CPUID_FUNC) {
-		printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
+	if (!cpu_has_svm(&msg)) {
+		printk(KERN_INFO "has_svn: %s\n", msg);
 		return 0;
 	}
 
-	cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
-	if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
-		printk(KERN_DEBUG "has_svm: svm not available\n");
-		return 0;
-	}
 	return 1;
 }
 
-- 
cgit v1.2.3


From 2c8dceebb238680d5577500f8283397d41ca5590 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:21 -0200
Subject: KVM: SVM: move svm_hardware_disable() code to asm/virtext.h

Create cpu_svm_disable() function.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/virtext.h | 14 ++++++++++++++
 arch/x86/kvm/svm.c             |  6 +-----
 2 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 6f0d409c368..2cfe363729c 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -107,4 +107,18 @@ static inline int cpu_has_svm(const char **msg)
 	return 1;
 }
 
+
+/** Disable SVM on the current CPU
+ *
+ * You should call this only if cpu_has_svm() returned true.
+ */
+static inline void cpu_svm_disable(void)
+{
+	uint64_t efer;
+
+	wrmsrl(MSR_VM_HSAVE_PA, 0);
+	rdmsrl(MSR_EFER, efer);
+	wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+}
+
 #endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0667c6d13d3..1452851ae25 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -259,11 +259,7 @@ static int has_svm(void)
 
 static void svm_hardware_disable(void *garbage)
 {
-	uint64_t efer;
-
-	wrmsrl(MSR_VM_HSAVE_PA, 0);
-	rdmsrl(MSR_EFER, efer);
-	wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+	cpu_svm_disable();
 }
 
 static void svm_hardware_enable(void *garbage)
-- 
cgit v1.2.3


From 0f3e9eeba0ea212bbea88790729d054b700ab91e Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:22 -0200
Subject: x86: cpu_emergency_svm_disable() function

This function can be used by the reboot or kdump code to forcibly
disable SVM on the CPU.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/virtext.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 2cfe363729c..59363627523 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -121,4 +121,12 @@ static inline void cpu_svm_disable(void)
 	wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
 }
 
+/** Makes sure SVM is disabled, if it is supported on the CPU
+ */
+static inline void cpu_emergency_svm_disable(void)
+{
+	if (cpu_has_svm(NULL))
+		cpu_svm_disable();
+}
+
 #endif /* _ASM_X86_VIRTEX_H */
-- 
cgit v1.2.3


From 2340b62f77c782c305e6ae7748675a638436d1ef Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:23 -0200
Subject: kdump: forcibly disable VMX and SVM on machine_crash_shutdown()

We need to disable virtualization extensions on all CPUs before booting
the kdump kernel, otherwise the kdump kernel booting will fail, and
rebooting after the kdump kernel did its task may also fail.

We do it using cpu_emergency_vmxoff() and cpu_emergency_svm_disable(),
that should always work, because those functions check if the CPUs
support SVM or VMX before doing their tasks.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kernel/crash.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index d84a852e4cd..c689d19e35a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,6 +26,7 @@
 #include <linux/kdebug.h>
 #include <asm/smp.h>
 #include <asm/reboot.h>
+#include <asm/virtext.h>
 
 #include <mach_ipi.h>
 
@@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
 #endif
 	crash_save_cpu(regs, cpu);
 
+	/* Disable VMX or SVM if needed.
+	 *
+	 * We need to disable virtualization on all CPUs.
+	 * Having VMX or SVM enabled on any CPU may break rebooting
+	 * after the kdump kernel has finished its task.
+	 */
+	cpu_emergency_vmxoff();
+	cpu_emergency_svm_disable();
+
 	disable_local_APIC();
 }
 
@@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 	local_irq_disable();
 
 	kdump_nmi_shootdown_cpus();
+
+	/* Booting kdump kernel with VMX or SVM enabled won't work,
+	 * because (among other limitations) we can't disable paging
+	 * with the virt flags.
+	 */
+	cpu_emergency_vmxoff();
+	cpu_emergency_svm_disable();
+
 	lapic_shutdown();
 #if defined(CONFIG_X86_IO_APIC)
 	disable_IO_APIC();
-- 
cgit v1.2.3


From d176720d34c72f7a8474a12204add93e54fe3ef1 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 17 Nov 2008 19:03:24 -0200
Subject: x86: disable VMX on all CPUs on reboot

On emergency_restart, we may need to use an NMI to disable virtualization
on all CPUs. We do that using nmi_shootdown_cpus() if VMX is enabled.

Note: With this patch, we will run the NMI stuff only when the CPU where
emergency_restart() was called has VMX enabled. This should work on most
cases because KVM enables VMX on all CPUs, but we may miss the small
window where KVM is doing that. Also, I don't know if all code using
VMX out there always enable VMX on all CPUs like KVM does. We have two
other alternatives for that:

a) Have an API that all code that enables VMX on any CPU should use
   to tell the kernel core that it is going to enable VMX on the CPUs.
b) Always call nmi_shootdown_cpus() if the CPU supports VMX. This is
   a bit intrusive and more risky, as it would run nmi_shootdown_cpus()
   on emergency_reboot() even on systems where virtualization is never
   enabled.

Finding a proper point to hook the nmi_shootdown_cpus() call isn't
trivial, as the non-emergency machine_restart() (that doesn't need the
NMI tricks) uses machine_emergency_restart() directly.

The solution to make this work without adding a new function or argument
to machine_ops was setting a 'reboot_emergency' flag that tells if
native_machine_emergency_restart() needs to do the virt cleanup or not.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kernel/reboot.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 60 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 61f718df6ee..72e0e4e712d 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,7 @@
 #include <asm/proto.h>
 #include <asm/reboot_fixups.h>
 #include <asm/reboot.h>
+#include <asm/virtext.h>
 
 #ifdef CONFIG_X86_32
 # include <linux/dmi.h>
@@ -39,6 +40,12 @@ int reboot_force;
 static int reboot_cpu = -1;
 #endif
 
+/* This is set if we need to go through the 'emergency' path.
+ * When machine_emergency_restart() is called, we may be on
+ * an inconsistent state and won't be able to do a clean cleanup
+ */
+static int reboot_emergency;
+
 /* This is set by the PCI code if either type 1 or type 2 PCI is detected */
 bool port_cf9_safe = false;
 
@@ -368,6 +375,48 @@ static inline void kb_wait(void)
 	}
 }
 
+static void vmxoff_nmi(int cpu, struct die_args *args)
+{
+	cpu_emergency_vmxoff();
+}
+
+/* Use NMIs as IPIs to tell all CPUs to disable virtualization
+ */
+static void emergency_vmx_disable_all(void)
+{
+	/* Just make sure we won't change CPUs while doing this */
+	local_irq_disable();
+
+	/* We need to disable VMX on all CPUs before rebooting, otherwise
+	 * we risk hanging up the machine, because the CPU ignore INIT
+	 * signals when VMX is enabled.
+	 *
+	 * We can't take any locks and we may be on an inconsistent
+	 * state, so we use NMIs as IPIs to tell the other CPUs to disable
+	 * VMX and halt.
+	 *
+	 * For safety, we will avoid running the nmi_shootdown_cpus()
+	 * stuff unnecessarily, but we don't have a way to check
+	 * if other CPUs have VMX enabled. So we will call it only if the
+	 * CPU we are running on has VMX enabled.
+	 *
+	 * We will miss cases where VMX is not enabled on all CPUs. This
+	 * shouldn't do much harm because KVM always enable VMX on all
+	 * CPUs anyway. But we can miss it on the small window where KVM
+	 * is still enabling VMX.
+	 */
+	if (cpu_has_vmx() && cpu_vmx_enabled()) {
+		/* Disable VMX on this CPU.
+		 */
+		cpu_vmxoff();
+
+		/* Halt and disable VMX on the other CPUs */
+		nmi_shootdown_cpus(vmxoff_nmi);
+
+	}
+}
+
+
 void __attribute__((weak)) mach_reboot_fixups(void)
 {
 }
@@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void)
 {
 	int i;
 
+	if (reboot_emergency)
+		emergency_vmx_disable_all();
+
 	/* Tell the BIOS if we want cold or warm reboot */
 	*((unsigned short *)__va(0x472)) = reboot_mode;
 
@@ -482,13 +534,19 @@ void native_machine_shutdown(void)
 #endif
 }
 
+static void __machine_emergency_restart(int emergency)
+{
+	reboot_emergency = emergency;
+	machine_ops.emergency_restart();
+}
+
 static void native_machine_restart(char *__unused)
 {
 	printk("machine restart\n");
 
 	if (!reboot_force)
 		machine_shutdown();
-	machine_emergency_restart();
+	__machine_emergency_restart(0);
 }
 
 static void native_machine_halt(void)
@@ -532,7 +590,7 @@ void machine_shutdown(void)
 
 void machine_emergency_restart(void)
 {
-	machine_ops.emergency_restart();
+	__machine_emergency_restart(1);
 }
 
 void machine_restart(char *cmd)
-- 
cgit v1.2.3


From df203ec9a77a7236cb90456664d714423b98a977 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 23 Nov 2008 18:08:57 +0200
Subject: KVM: VMX: Conditionally request interrupt window after injecting irq

If we're injecting an interrupt, and another one is pending, request
an interrupt window notification so we don't have excess latency on the
second interrupt.

This shouldn't happen in practice since an EOI will be issued, giving a second
chance to request an interrupt window, but...

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f5958a7823f..7ea485543cf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3304,6 +3304,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.interrupt.pending) {
 		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
 		kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
+		if (kvm_cpu_has_interrupt(vcpu))
+			enable_irq_window(vcpu);
 	}
 }
 
-- 
cgit v1.2.3


From 423cd25a5ade17b8a5cc85e6f0a0f37028d2c4a2 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@redhat.com>
Date: Mon, 24 Nov 2008 15:45:23 -0200
Subject: x86: KVM guest: sign kvmclock as paravirt

Currently, we only set the KVM paravirt signature in case
of CONFIG_KVM_GUEST. However, it is possible to have it turned
off, while CONFIG_KVM_CLOCK is turned on. This is also a paravirt
case, and should be shown accordingly.

Signed-off-by: Glauber Costa <glommer@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kernel/kvmclock.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e169ae9b6a6..b38e801014e 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -194,5 +194,7 @@ void __init kvmclock_init(void)
 #endif
 		kvm_get_preset_lpj();
 		clocksource_register(&kvm_clock);
+		pv_info.paravirt_enabled = 1;
+		pv_info.name = "KVM";
 	}
 }
-- 
cgit v1.2.3


From ecc5589f19a52e7e6501fe449047b19087ae11bb Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 25 Nov 2008 15:58:07 +0100
Subject: KVM: MMU: optimize set_spte for page sync

The write protect verification in set_spte is unnecessary for page sync.

Its guaranteed that, if the unsync spte was writable, the target page
does not have a write protected shadow (if it had, the spte would have
been write protected under mmu_lock by rmap_write_protect before).

Same reasoning applies to mark_page_dirty: the gfn has been marked as
dirty via the pagefault path.

The cost of hash table and memslot lookups are quite significant if the
workload is pagetable write intensive resulting in increased mmu_lock
contention.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fa3486d6407..dd20b199a7c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1593,6 +1593,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 
 		spte |= PT_WRITABLE_MASK;
 
+		/*
+		 * Optimization: for pte sync, if spte was writable the hash
+		 * lookup is unnecessary (and expensive). Write protection
+		 * is responsibility of mmu_get_page / kvm_sync_page.
+		 * Same reasoning can be applied to dirty page accounting.
+		 */
+		if (!can_unsync && is_writeble_pte(*shadow_pte))
+			goto set_pte;
+
 		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
 			pgprintk("%s: found shadow page for %lx, marking ro\n",
 				 __func__, gfn);
-- 
cgit v1.2.3


From dda96d8f1b3de692cce09969ce28fe22e58e5acf Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 26 Nov 2008 15:14:10 +0200
Subject: KVM: x86 emulator: reduce duplication in one operand emulation thunks

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 66 ++++++++++++++++------------------------------
 1 file changed, 23 insertions(+), 43 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 8f60ace1387..5f87d3e5919 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -359,6 +359,12 @@ static u16 group2_table[] = {
 	"andl %"_msk",%"_LO32 _tmp"; "		\
 	"orl  %"_LO32 _tmp",%"_sav"; "
 
+#ifdef CONFIG_X86_64
+#define ON64(x) x
+#else
+#define ON64(x)
+#endif
+
 /* Raw emulation: instruction has two explicit operands. */
 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
 	do { 								    \
@@ -425,42 +431,27 @@ static u16 group2_table[] = {
 	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
 			     "w", "r", _LO32, "r", "", "r")
 
-/* Instruction has only one explicit operand (no source operand). */
-#define emulate_1op(_op, _dst, _eflags)                                    \
+#define __emulate_1op(_op, _dst, _eflags, _suffix)			\
 	do {								\
 		unsigned long _tmp;					\
 									\
+		__asm__ __volatile__ (					\
+			_PRE_EFLAGS("0", "3", "2")			\
+			_op _suffix " %1; "				\
+			_POST_EFLAGS("0", "3", "2")			\
+			: "=m" (_eflags), "+m" ((_dst).val),		\
+			  "=&r" (_tmp)					\
+			: "i" (EFLAGS_MASK));				\
+	} while (0)
+
+/* Instruction has only one explicit operand (no source operand). */
+#define emulate_1op(_op, _dst, _eflags)                                    \
+	do {								\
 		switch ((_dst).bytes) {				        \
-		case 1:							\
-			__asm__ __volatile__ (				\
-				_PRE_EFLAGS("0", "3", "2")		\
-				_op"b %1; "				\
-				_POST_EFLAGS("0", "3", "2")		\
-				: "=m" (_eflags), "=m" ((_dst).val),	\
-				  "=&r" (_tmp)				\
-				: "i" (EFLAGS_MASK));			\
-			break;						\
-		case 2:							\
-			__asm__ __volatile__ (				\
-				_PRE_EFLAGS("0", "3", "2")		\
-				_op"w %1; "				\
-				_POST_EFLAGS("0", "3", "2")		\
-				: "=m" (_eflags), "=m" ((_dst).val),	\
-				  "=&r" (_tmp)				\
-				: "i" (EFLAGS_MASK));			\
-			break;						\
-		case 4:							\
-			__asm__ __volatile__ (				\
-				_PRE_EFLAGS("0", "3", "2")		\
-				_op"l %1; "				\
-				_POST_EFLAGS("0", "3", "2")		\
-				: "=m" (_eflags), "=m" ((_dst).val),	\
-				  "=&r" (_tmp)				\
-				: "i" (EFLAGS_MASK));			\
-			break;						\
-		case 8:							\
-			__emulate_1op_8byte(_op, _dst, _eflags);	\
-			break;						\
+		case 1:	__emulate_1op(_op, _dst, _eflags, "b"); break;	\
+		case 2:	__emulate_1op(_op, _dst, _eflags, "w"); break;	\
+		case 4:	__emulate_1op(_op, _dst, _eflags, "l"); break;	\
+		case 8:	ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
 		}							\
 	} while (0)
 
@@ -476,19 +467,8 @@ static u16 group2_table[] = {
 			: _qy ((_src).val), "i" (EFLAGS_MASK));		\
 	} while (0)
 
-#define __emulate_1op_8byte(_op, _dst, _eflags)                           \
-	do {								  \
-		__asm__ __volatile__ (					  \
-			_PRE_EFLAGS("0", "3", "2")			  \
-			_op"q %1; "					  \
-			_POST_EFLAGS("0", "3", "2")			  \
-			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
-			: "i" (EFLAGS_MASK));				  \
-	} while (0)
-
 #elif defined(__i386__)
 #define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
-#define __emulate_1op_8byte(_op, _dst, _eflags)
 #endif				/* __i386__ */
 
 /* Fetch next part of the instruction being emulated. */
-- 
cgit v1.2.3


From 6b7ad61ffb9ca110add6f7fb36cc8a4dd89696a4 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 26 Nov 2008 15:30:45 +0200
Subject: KVM: x86 emulator: consolidate emulation of two operand instructions

No need to repeat the same assembly block over and over.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 79 ++++++++++++++++------------------------------
 1 file changed, 28 insertions(+), 51 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 5f87d3e5919..a11af6f74d6 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -365,49 +365,42 @@ static u16 group2_table[] = {
 #define ON64(x)
 #endif
 
+#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix)	\
+	do {								\
+		__asm__ __volatile__ (					\
+			_PRE_EFLAGS("0", "4", "2")			\
+			_op _suffix " %"_x"3,%1; "			\
+			_POST_EFLAGS("0", "4", "2")			\
+			: "=m" (_eflags), "=m" ((_dst).val),		\
+			  "=&r" (_tmp)					\
+			: _y ((_src).val), "i" (EFLAGS_MASK));		\
+	} while (0);
+
+
 /* Raw emulation: instruction has two explicit operands. */
 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
-	do { 								    \
-		unsigned long _tmp;					    \
-									    \
-		switch ((_dst).bytes) {					    \
-		case 2:							    \
-			__asm__ __volatile__ (				    \
-				_PRE_EFLAGS("0", "4", "2")		    \
-				_op"w %"_wx"3,%1; "			    \
-				_POST_EFLAGS("0", "4", "2")		    \
-				: "=m" (_eflags), "=m" ((_dst).val),        \
-				  "=&r" (_tmp)				    \
-				: _wy ((_src).val), "i" (EFLAGS_MASK));     \
-			break;						    \
-		case 4:							    \
-			__asm__ __volatile__ (				    \
-				_PRE_EFLAGS("0", "4", "2")		    \
-				_op"l %"_lx"3,%1; "			    \
-				_POST_EFLAGS("0", "4", "2")		    \
-				: "=m" (_eflags), "=m" ((_dst).val),	    \
-				  "=&r" (_tmp)				    \
-				: _ly ((_src).val), "i" (EFLAGS_MASK));     \
-			break;						    \
-		case 8:							    \
-			__emulate_2op_8byte(_op, _src, _dst,		    \
-					    _eflags, _qx, _qy);		    \
-			break;						    \
-		}							    \
+	do {								\
+		unsigned long _tmp;					\
+									\
+		switch ((_dst).bytes) {					\
+		case 2:							\
+			____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
+			break;						\
+		case 4:							\
+			____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
+			break;						\
+		case 8:							\
+			ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
+			break;						\
+		}							\
 	} while (0)
 
 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
 	do {								     \
-		unsigned long __tmp;					     \
+		unsigned long _tmp;					     \
 		switch ((_dst).bytes) {				             \
 		case 1:							     \
-			__asm__ __volatile__ (				     \
-				_PRE_EFLAGS("0", "4", "2")		     \
-				_op"b %"_bx"3,%1; "			     \
-				_POST_EFLAGS("0", "4", "2")		     \
-				: "=m" (_eflags), "=m" ((_dst).val),	     \
-				  "=&r" (__tmp)				     \
-				: _by ((_src).val), "i" (EFLAGS_MASK));      \
+			____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b");  \
 			break;						     \
 		default:						     \
 			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
@@ -455,22 +448,6 @@ static u16 group2_table[] = {
 		}							\
 	} while (0)
 
-/* Emulate an instruction with quadword operands (x86/64 only). */
-#if defined(CONFIG_X86_64)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)           \
-	do {								  \
-		__asm__ __volatile__ (					  \
-			_PRE_EFLAGS("0", "4", "2")			  \
-			_op"q %"_qx"3,%1; "				  \
-			_POST_EFLAGS("0", "4", "2")			  \
-			: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
-			: _qy ((_src).val), "i" (EFLAGS_MASK));		\
-	} while (0)
-
-#elif defined(__i386__)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
-#endif				/* __i386__ */
-
 /* Fetch next part of the instruction being emulated. */
 #define insn_fetch(_type, _size, _eip)                                  \
 ({	unsigned long _x;						\
-- 
cgit v1.2.3


From faa5a3ae39483aefc46a78299c811194f953af27 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 27 Nov 2008 17:36:41 +0200
Subject: KVM: x86 emulator: Extract 'pop' sequence into a function

Switch 'pop r/m' instruction to use the new function.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index a11af6f74d6..2555762f4b4 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1057,20 +1057,33 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
 					       c->regs[VCPU_REGS_RSP]);
 }
 
-static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
-				struct x86_emulate_ops *ops)
+static int emulate_pop(struct x86_emulate_ctxt *ctxt,
+		       struct x86_emulate_ops *ops)
 {
 	struct decode_cache *c = &ctxt->decode;
 	int rc;
 
 	rc = ops->read_std(register_address(c, ss_base(ctxt),
 					    c->regs[VCPU_REGS_RSP]),
-			   &c->dst.val, c->dst.bytes, ctxt->vcpu);
+			   &c->src.val, c->src.bytes, ctxt->vcpu);
 	if (rc != 0)
 		return rc;
 
-	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes);
+	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
+	return rc;
+}
 
+static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
+				struct x86_emulate_ops *ops)
+{
+	struct decode_cache *c = &ctxt->decode;
+	int rc;
+
+	c->src.bytes = c->dst.bytes;
+	rc = emulate_pop(ctxt, ops);
+	if (rc != 0)
+		return rc;
+	c->dst.val = c->src.val;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 781d0edc5fc5cfe7491a0c5081734e62f6dc66ee Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 27 Nov 2008 18:00:28 +0200
Subject: KVM: x86 emulator: allow pop from mmio

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 2555762f4b4..70242f5f096 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1063,9 +1063,9 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
 	struct decode_cache *c = &ctxt->decode;
 	int rc;
 
-	rc = ops->read_std(register_address(c, ss_base(ctxt),
-					    c->regs[VCPU_REGS_RSP]),
-			   &c->src.val, c->src.bytes, ctxt->vcpu);
+	rc = ops->read_emulated(register_address(c, ss_base(ctxt),
+						 c->regs[VCPU_REGS_RSP]),
+				&c->src.val, c->src.bytes, ctxt->vcpu);
 	if (rc != 0)
 		return rc;
 
-- 
cgit v1.2.3


From 8a09b6877f3100207b3572e7e12ea796493fe914 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 27 Nov 2008 18:06:33 +0200
Subject: KVM: x86 emulator: switch 'pop reg' instruction to emulate_pop()

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 70242f5f096..702de9869c1 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1389,14 +1389,11 @@ special_insn:
 		break;
 	case 0x58 ... 0x5f: /* pop reg */
 	pop_instruction:
-		if ((rc = ops->read_std(register_address(c, ss_base(ctxt),
-			c->regs[VCPU_REGS_RSP]), c->dst.ptr,
-			c->op_bytes, ctxt->vcpu)) != 0)
+		c->src.bytes = c->op_bytes;
+		rc = emulate_pop(ctxt, ops);
+		if (rc != 0)
 			goto done;
-
-		register_address_increment(c, &c->regs[VCPU_REGS_RSP],
-					   c->op_bytes);
-		c->dst.type = OP_NONE;	/* Disable writeback. */
+		c->dst.val = c->src.val;
 		break;
 	case 0x63:		/* movsxd */
 		if (ctxt->mode != X86EMUL_MODE_PROT64)
-- 
cgit v1.2.3


From cf5de4f886116871c2ae2eee53524edd741a68ae Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Fri, 28 Nov 2008 00:14:07 +0200
Subject: KVM: x86 emulator: fix ret emulation

'ret' did not set the operand type or size for the destination, so
writeback ignored it.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 702de9869c1..72ae86b1b13 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1650,7 +1650,9 @@ special_insn:
 		emulate_grp2(ctxt);
 		break;
 	case 0xc3: /* ret */
+		c->dst.type = OP_REG;
 		c->dst.ptr = &c->eip;
+		c->dst.bytes = c->op_bytes;
 		goto pop_instruction;
 	case 0xc6 ... 0xc7:	/* mov (sole member of Grp11) */
 	mov:
-- 
cgit v1.2.3


From 2b48cc75b21431037d6f902b9d583b1aff198490 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sat, 29 Nov 2008 20:36:13 +0200
Subject: KVM: x86 emulator: fix popf emulation

Set operand type and size to get correct writeback behavior.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 72ae86b1b13..e8c87ccfe31 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1552,7 +1552,9 @@ special_insn:
 		emulate_push(ctxt);
 		break;
 	case 0x9d: /* popf */
+		c->dst.type = OP_REG;
 		c->dst.ptr = (unsigned long *) &ctxt->eflags;
+		c->dst.bytes = c->op_bytes;
 		goto pop_instruction;
 	case 0xa0 ... 0xa1:	/* mov */
 		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
-- 
cgit v1.2.3


From f3fd92fbdb7663bd889c136842afc3851351ea8f Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sat, 29 Nov 2008 20:38:12 +0200
Subject: KVM: Remove extraneous semicolon after do/while

Notices by Guillaume Thouvenin.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index e8c87ccfe31..69b330ba0ad 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -374,7 +374,7 @@ static u16 group2_table[] = {
 			: "=m" (_eflags), "=m" ((_dst).val),		\
 			  "=&r" (_tmp)					\
 			: _y ((_src).val), "i" (EFLAGS_MASK));		\
-	} while (0);
+	} while (0)
 
 
 /* Raw emulation: instruction has two explicit operands. */
-- 
cgit v1.2.3


From efff9e538f6bfa8ee2ca03f7e9a55d98df115186 Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Fri, 28 Nov 2008 17:02:06 +0100
Subject: KVM: VMX: fix sparse warning

Impact: make global function static

  arch/x86/kvm/vmx.c:134:3: warning: symbol 'vmx_capability' was not declared. Should it be static?

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7ea485543cf..e446f232588 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -128,7 +128,7 @@ static struct vmcs_config {
 	u32 vmentry_ctrl;
 } vmcs_config;
 
-struct vmx_capability {
+static struct vmx_capability {
 	u32 ept;
 	u32 vpid;
 } vmx_capability;
-- 
cgit v1.2.3


From 45ed60b371aeae6ed80f7e9d594a5e6412edc176 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Thu, 4 Dec 2008 14:25:38 +0100
Subject: KVM: x86 emulator: Extend the opcode descriptor

Extend the opcode descriptor to 32 bits. This is needed by the
introduction of a new Src2 operand type.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 69b330ba0ad..7a07ca46c8a 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -76,7 +76,7 @@ enum {
 	Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
 };
 
-static u16 opcode_table[256] = {
+static u32 opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -195,7 +195,7 @@ static u16 opcode_table[256] = {
 	ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
 };
 
-static u16 twobyte_table[256] = {
+static u32 twobyte_table[256] = {
 	/* 0x00 - 0x0F */
 	0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
 	ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
@@ -253,7 +253,7 @@ static u16 twobyte_table[256] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 
-static u16 group_table[] = {
+static u32 group_table[] = {
 	[Group1_80*8] =
 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
@@ -297,7 +297,7 @@ static u16 group_table[] = {
 	SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
 };
 
-static u16 group2_table[] = {
+static u32 group2_table[] = {
 	[Group7*8] =
 	SrcNone | ModRM, 0, 0, 0,
 	SrcNone | ModRM | DstMem | Mov, 0,
-- 
cgit v1.2.3


From 0dc8d10f7d848b63c8d32cf6fd31ba7def792ac9 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Thu, 4 Dec 2008 14:26:42 +0100
Subject: KVM: x86 emulator: add Src2 decode set

Instruction like shld has three operands, so we need to add a Src2
decode set. We start with Src2None, Src2CL, and Src2ImmByte, Src2One to
support shld/shrd and we will expand it later.

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_x86_emulate.h |  1 +
 arch/x86/kvm/x86_emulate.c             | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index 16a002655f3..6a159732881 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -123,6 +123,7 @@ struct decode_cache {
 	u8 ad_bytes;
 	u8 rex_prefix;
 	struct operand src;
+	struct operand src2;
 	struct operand dst;
 	bool has_seg_override;
 	u8 seg_override;
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 7a07ca46c8a..7f5cd62362c 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -70,6 +70,12 @@
 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
 #define GroupMask   0xff        /* Group number stored in bits 0:7 */
+/* Source 2 operand type */
+#define Src2None    (0<<29)
+#define Src2CL      (1<<29)
+#define Src2ImmByte (2<<29)
+#define Src2One     (3<<29)
+#define Src2Mask    (7<<29)
 
 enum {
 	Group1_80, Group1_81, Group1_82, Group1_83,
@@ -1000,6 +1006,29 @@ done_prefixes:
 		break;
 	}
 
+	/*
+	 * Decode and fetch the second source operand: register, memory
+	 * or immediate.
+	 */
+	switch (c->d & Src2Mask) {
+	case Src2None:
+		break;
+	case Src2CL:
+		c->src2.bytes = 1;
+		c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
+		break;
+	case Src2ImmByte:
+		c->src2.type = OP_IMM;
+		c->src2.ptr = (unsigned long *)c->eip;
+		c->src2.bytes = 1;
+		c->src2.val = insn_fetch(u8, 1, c->eip);
+		break;
+	case Src2One:
+		c->src2.bytes = 1;
+		c->src2.val = 1;
+		break;
+	}
+
 	/* Decode and fetch the destination operand: register or memory. */
 	switch (c->d & DstMask) {
 	case ImplicitOps:
-- 
cgit v1.2.3


From bfcadf83ec5aafe600e73dd427d997db7bcc1d12 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Thu, 4 Dec 2008 14:27:38 +0100
Subject: KVM: x86 emulator: add a new "implied 1" Src decode type

Add SrcOne operand type when we need to decode an implied '1' like with
regular shift instruction

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 7f5cd62362c..0c75306e7a0 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -58,6 +58,7 @@
 #define SrcMem32    (4<<4)	/* Memory operand (32-bit). */
 #define SrcImm      (5<<4)	/* Immediate operand. */
 #define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
+#define SrcOne      (7<<4)	/* Implied '1' */
 #define SrcMask     (7<<4)
 /* Generic ModRM decode. */
 #define ModRM       (1<<7)
@@ -1004,6 +1005,10 @@ done_prefixes:
 		c->src.bytes = 1;
 		c->src.val = insn_fetch(s8, 1, c->eip);
 		break;
+	case SrcOne:
+		c->src.bytes = 1;
+		c->src.val = 1;
+		break;
 	}
 
 	/*
-- 
cgit v1.2.3


From d175226a5f54817ba427368c6b739aefa7780fb2 Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Thu, 4 Dec 2008 14:29:00 +0100
Subject: KVM: x86 emulator: add the assembler code for three operands

Add the assembler code for instruction with three operands and one
operand is stored in ECX register

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 0c75306e7a0..9ae6d5b3e96 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -431,6 +431,45 @@ static u32 group2_table[] = {
 	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
 			     "w", "r", _LO32, "r", "", "r")
 
+/* Instruction has three operands and one operand is stored in ECX register */
+#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) 	\
+	do {									\
+		unsigned long _tmp;						\
+		_type _clv  = (_cl).val;  					\
+		_type _srcv = (_src).val;    					\
+		_type _dstv = (_dst).val;					\
+										\
+		__asm__ __volatile__ (						\
+			_PRE_EFLAGS("0", "5", "2")				\
+			_op _suffix " %4,%1 \n"					\
+			_POST_EFLAGS("0", "5", "2")				\
+			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)		\
+			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)		\
+			); 							\
+										\
+		(_cl).val  = (unsigned long) _clv;				\
+		(_src).val = (unsigned long) _srcv;				\
+		(_dst).val = (unsigned long) _dstv;				\
+	} while (0)
+
+#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)				\
+	do {									\
+		switch ((_dst).bytes) {						\
+		case 2:								\
+			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
+						"w", unsigned short);         	\
+			break;							\
+		case 4: 							\
+			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
+						"l", unsigned int);           	\
+			break;							\
+		case 8:								\
+			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
+						"q", unsigned long));  		\
+			break;							\
+		}								\
+	} while (0)
+
 #define __emulate_1op(_op, _dst, _eflags, _suffix)			\
 	do {								\
 		unsigned long _tmp;					\
-- 
cgit v1.2.3


From 9bf8ea42fe22d7d1c48044148fa658cb9083d49c Mon Sep 17 00:00:00 2001
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Date: Thu, 4 Dec 2008 14:30:13 +0100
Subject: KVM: x86 emulator: add the emulation of shld and shrd instructions

Add emulation of shld and shrd instructions

Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 9ae6d5b3e96..219dc3110bf 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -237,9 +237,14 @@ static u32 twobyte_table[256] = {
 	/* 0x90 - 0x9F */
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0xA0 - 0xA7 */
-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
+	0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+	DstMem | SrcReg | Src2ImmByte | ModRM,
+	DstMem | SrcReg | Src2CL | ModRM, 0, 0,
 	/* 0xA8 - 0xAF */
-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0,
+	0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+	DstMem | SrcReg | Src2ImmByte | ModRM,
+	DstMem | SrcReg | Src2CL | ModRM,
+	ModRM, 0,
 	/* 0xB0 - 0xB7 */
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
 	    DstMem | SrcReg | ModRM | BitOp,
@@ -2037,12 +2042,20 @@ twobyte_insn:
 		c->src.val &= (c->dst.bytes << 3) - 1;
 		emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
 		break;
+	case 0xa4: /* shld imm8, r, r/m */
+	case 0xa5: /* shld cl, r, r/m */
+		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
+		break;
 	case 0xab:
 	      bts:		/* bts */
 		/* only subword offset */
 		c->src.val &= (c->dst.bytes << 3) - 1;
 		emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
 		break;
+	case 0xac: /* shrd imm8, r, r/m */
+	case 0xad: /* shrd cl, r, r/m */
+		emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
+		break;
 	case 0xae:              /* clflush */
 		break;
 	case 0xb0 ... 0xb1:	/* cmpxchg */
-- 
cgit v1.2.3


From fbce554e940a983d005e29849636d0ef54b3eb18 Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Thu, 4 Dec 2008 11:11:40 +0000
Subject: KVM: x86 emulator: Fix handling of VMMCALL instruction

The VMMCALL instruction doesn't get recognised and isn't processed
by the emulator.

This is seen on an Intel host that tries to execute the VMMCALL
instruction after a guest live migrates from an AMD host.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86_emulate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 219dc3110bf..d174db7a337 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -311,7 +311,7 @@ static u32 group_table[] = {
 
 static u32 group2_table[] = {
 	[Group7*8] =
-	SrcNone | ModRM, 0, 0, 0,
+	SrcNone | ModRM, 0, 0, SrcNone | ModRM,
 	SrcNone | ModRM | DstMem | Mov, 0,
 	SrcMem16 | ModRM | Mov, 0,
 };
-- 
cgit v1.2.3


From 60c8aec6e2c9923492dabbd6b67e34692bd26c20 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 1 Dec 2008 22:32:02 -0200
Subject: KVM: MMU: use page array in unsync walk

Instead of invoking the handler directly collect pages into
an array so the caller can work with it.

Simplifies TLB flush collapsing.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   2 +-
 arch/x86/kvm/mmu.c              | 195 ++++++++++++++++++++++++++++------------
 2 files changed, 141 insertions(+), 56 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f58f7ebdea8..93d0aed3588 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -200,7 +200,7 @@ struct kvm_mmu_page {
 	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
 	bool unsync;
-	bool unsync_children;
+	unsigned int unsync_children;
 	union {
 		u64 *parent_pte;               /* !multimapped */
 		struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dd20b199a7c..7ce92f78f33 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -908,8 +908,9 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte)
 	struct kvm_mmu_page *sp = page_header(__pa(spte));
 
 	index = spte - sp->spt;
-	__set_bit(index, sp->unsync_child_bitmap);
-	sp->unsync_children = 1;
+	if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
+		sp->unsync_children++;
+	WARN_ON(!sp->unsync_children);
 }
 
 static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
@@ -936,7 +937,6 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
 
 static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
-	sp->unsync_children = 1;
 	kvm_mmu_update_parents_unsync(sp);
 	return 1;
 }
@@ -967,18 +967,41 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
 }
 
+#define KVM_PAGE_ARRAY_NR 16
+
+struct kvm_mmu_pages {
+	struct mmu_page_and_offset {
+		struct kvm_mmu_page *sp;
+		unsigned int idx;
+	} page[KVM_PAGE_ARRAY_NR];
+	unsigned int nr;
+};
+
 #define for_each_unsync_children(bitmap, idx)		\
 	for (idx = find_first_bit(bitmap, 512);		\
 	     idx < 512;					\
 	     idx = find_next_bit(bitmap, 512, idx+1))
 
-static int mmu_unsync_walk(struct kvm_mmu_page *sp,
-			   struct kvm_unsync_walk *walker)
+int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
+		   int idx)
 {
-	int i, ret;
+	int i;
 
-	if (!sp->unsync_children)
-		return 0;
+	if (sp->unsync)
+		for (i=0; i < pvec->nr; i++)
+			if (pvec->page[i].sp == sp)
+				return 0;
+
+	pvec->page[pvec->nr].sp = sp;
+	pvec->page[pvec->nr].idx = idx;
+	pvec->nr++;
+	return (pvec->nr == KVM_PAGE_ARRAY_NR);
+}
+
+static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
+			   struct kvm_mmu_pages *pvec)
+{
+	int i, ret, nr_unsync_leaf = 0;
 
 	for_each_unsync_children(sp->unsync_child_bitmap, i) {
 		u64 ent = sp->spt[i];
@@ -988,17 +1011,22 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
 			child = page_header(ent & PT64_BASE_ADDR_MASK);
 
 			if (child->unsync_children) {
-				ret = mmu_unsync_walk(child, walker);
-				if (ret)
+				if (mmu_pages_add(pvec, child, i))
+					return -ENOSPC;
+
+				ret = __mmu_unsync_walk(child, pvec);
+				if (!ret)
+					__clear_bit(i, sp->unsync_child_bitmap);
+				else if (ret > 0)
+					nr_unsync_leaf += ret;
+				else
 					return ret;
-				__clear_bit(i, sp->unsync_child_bitmap);
 			}
 
 			if (child->unsync) {
-				ret = walker->entry(child, walker);
-				__clear_bit(i, sp->unsync_child_bitmap);
-				if (ret)
-					return ret;
+				nr_unsync_leaf++;
+				if (mmu_pages_add(pvec, child, i))
+					return -ENOSPC;
 			}
 		}
 	}
@@ -1006,7 +1034,17 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
 	if (find_first_bit(sp->unsync_child_bitmap, 512) == 512)
 		sp->unsync_children = 0;
 
-	return 0;
+	return nr_unsync_leaf;
+}
+
+static int mmu_unsync_walk(struct kvm_mmu_page *sp,
+			   struct kvm_mmu_pages *pvec)
+{
+	if (!sp->unsync_children)
+		return 0;
+
+	mmu_pages_add(pvec, sp, 0);
+	return __mmu_unsync_walk(sp, pvec);
 }
 
 static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
@@ -1056,30 +1094,81 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	return 0;
 }
 
-struct sync_walker {
-	struct kvm_vcpu *vcpu;
-	struct kvm_unsync_walk walker;
+struct mmu_page_path {
+	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
+	unsigned int idx[PT64_ROOT_LEVEL-1];
 };
 
-static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
+#define for_each_sp(pvec, sp, parents, i)			\
+		for (i = mmu_pages_next(&pvec, &parents, -1),	\
+			sp = pvec.page[i].sp;			\
+			i < pvec.nr && ({ sp = pvec.page[i].sp; 1;});	\
+			i = mmu_pages_next(&pvec, &parents, i))
+
+int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
+		   int i)
 {
-	struct sync_walker *sync_walk = container_of(walk, struct sync_walker,
-						     walker);
-	struct kvm_vcpu *vcpu = sync_walk->vcpu;
+	int n;
+
+	for (n = i+1; n < pvec->nr; n++) {
+		struct kvm_mmu_page *sp = pvec->page[n].sp;
+
+		if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
+			parents->idx[0] = pvec->page[n].idx;
+			return n;
+		}
 
-	kvm_sync_page(vcpu, sp);
-	return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
+		parents->parent[sp->role.level-2] = sp;
+		parents->idx[sp->role.level-1] = pvec->page[n].idx;
+	}
+
+	return n;
 }
 
-static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+void mmu_pages_clear_parents(struct mmu_page_path *parents)
 {
-	struct sync_walker walker = {
-		.walker = { .entry = mmu_sync_fn, },
-		.vcpu = vcpu,
-	};
+	struct kvm_mmu_page *sp;
+	unsigned int level = 0;
+
+	do {
+		unsigned int idx = parents->idx[level];
+
+		sp = parents->parent[level];
+		if (!sp)
+			return;
+
+		--sp->unsync_children;
+		WARN_ON((int)sp->unsync_children < 0);
+		__clear_bit(idx, sp->unsync_child_bitmap);
+		level++;
+	} while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
+}
+
+static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
+			       struct mmu_page_path *parents,
+			       struct kvm_mmu_pages *pvec)
+{
+	parents->parent[parent->role.level-1] = NULL;
+	pvec->nr = 0;
+}
 
-	while (mmu_unsync_walk(sp, &walker.walker))
+static void mmu_sync_children(struct kvm_vcpu *vcpu,
+			      struct kvm_mmu_page *parent)
+{
+	int i;
+	struct kvm_mmu_page *sp;
+	struct mmu_page_path parents;
+	struct kvm_mmu_pages pages;
+
+	kvm_mmu_pages_init(parent, &parents, &pages);
+	while (mmu_unsync_walk(parent, &pages)) {
+		for_each_sp(pages, sp, parents, i) {
+			kvm_sync_page(vcpu, sp);
+			mmu_pages_clear_parents(&parents);
+		}
 		cond_resched_lock(&vcpu->kvm->mmu_lock);
+		kvm_mmu_pages_init(parent, &parents, &pages);
+	}
 }
 
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
@@ -1245,33 +1334,29 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 	}
 }
 
-struct zap_walker {
-	struct kvm_unsync_walk walker;
-	struct kvm *kvm;
-	int zapped;
-};
-
-static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
-{
-	struct zap_walker *zap_walk = container_of(walk, struct zap_walker,
-						     walker);
-	kvm_mmu_zap_page(zap_walk->kvm, sp);
-	zap_walk->zapped = 1;
-	return 0;
-}
-
-static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
+static int mmu_zap_unsync_children(struct kvm *kvm,
+				   struct kvm_mmu_page *parent)
 {
-	struct zap_walker walker = {
-		.walker = { .entry = mmu_zap_fn, },
-		.kvm = kvm,
-		.zapped = 0,
-	};
+	int i, zapped = 0;
+	struct mmu_page_path parents;
+	struct kvm_mmu_pages pages;
 
-	if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+	if (parent->role.level == PT_PAGE_TABLE_LEVEL)
 		return 0;
-	mmu_unsync_walk(sp, &walker.walker);
-	return walker.zapped;
+
+	kvm_mmu_pages_init(parent, &parents, &pages);
+	while (mmu_unsync_walk(parent, &pages)) {
+		struct kvm_mmu_page *sp;
+
+		for_each_sp(pages, sp, parents, i) {
+			kvm_mmu_zap_page(kvm, sp);
+			mmu_pages_clear_parents(&parents);
+		}
+		zapped += pages.nr;
+		kvm_mmu_pages_init(parent, &parents, &pages);
+	}
+
+	return zapped;
 }
 
 static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
-- 
cgit v1.2.3


From b1a368218ad5b6e62380c8f206f16e6f18bf154c Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 1 Dec 2008 22:32:03 -0200
Subject: KVM: MMU: collapse remote TLB flushes on root sync

Collapse remote TLB flushes on root sync.

kernbench is 2.7% faster on 4-way guest. Improvements have been seen
with other loads such as AIM7.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7ce92f78f33..58c35dead32 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -621,7 +621,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
 	return NULL;
 }
 
-static void rmap_write_protect(struct kvm *kvm, u64 gfn)
+static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 {
 	unsigned long *rmapp;
 	u64 *spte;
@@ -667,8 +667,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 		spte = rmap_next(kvm, rmapp, spte);
 	}
 
-	if (write_protected)
-		kvm_flush_remote_tlbs(kvm);
+	return write_protected;
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -1083,7 +1082,8 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		return 1;
 	}
 
-	rmap_write_protect(vcpu->kvm, sp->gfn);
+	if (rmap_write_protect(vcpu->kvm, sp->gfn))
+		kvm_flush_remote_tlbs(vcpu->kvm);
 	kvm_unlink_unsync_page(vcpu->kvm, sp);
 	if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
 		kvm_mmu_zap_page(vcpu->kvm, sp);
@@ -1162,6 +1162,14 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
 
 	kvm_mmu_pages_init(parent, &parents, &pages);
 	while (mmu_unsync_walk(parent, &pages)) {
+		int protected = 0;
+
+		for_each_sp(pages, sp, parents, i)
+			protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
+
+		if (protected)
+			kvm_flush_remote_tlbs(vcpu->kvm);
+
 		for_each_sp(pages, sp, parents, i) {
 			kvm_sync_page(vcpu, sp);
 			mmu_pages_clear_parents(&parents);
@@ -1226,7 +1234,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	sp->role = role;
 	hlist_add_head(&sp->hash_link, bucket);
 	if (!metaphysical) {
-		rmap_write_protect(vcpu->kvm, gfn);
+		if (rmap_write_protect(vcpu->kvm, gfn))
+			kvm_flush_remote_tlbs(vcpu->kvm);
 		account_shadowed(vcpu->kvm, gfn);
 	}
 	if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
-- 
cgit v1.2.3


From 6cffe8ca4a2adf1ac5003d9cad08fe4434d6eee0 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 1 Dec 2008 22:32:04 -0200
Subject: KVM: MMU: skip global pgtables on sync due to cr3 switch

Skip syncing global pages on cr3 switch (but not on cr4/cr0). This is
important for Linux 32-bit guests with PAE, where the kmap page is
marked as global.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  6 +++++
 arch/x86/kvm/mmu.c              | 53 ++++++++++++++++++++++++++++++++++++-----
 arch/x86/kvm/paging_tmpl.h      | 10 ++++----
 arch/x86/kvm/x86.c              |  4 ++++
 4 files changed, 63 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 93d0aed3588..65b1ed29569 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -182,6 +182,8 @@ struct kvm_mmu_page {
 	struct list_head link;
 	struct hlist_node hash_link;
 
+	struct list_head oos_link;
+
 	/*
 	 * The following two entries are used to key the shadow page in the
 	 * hash table.
@@ -200,6 +202,7 @@ struct kvm_mmu_page {
 	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
 	bool unsync;
+	bool global;
 	unsigned int unsync_children;
 	union {
 		u64 *parent_pte;               /* !multimapped */
@@ -356,6 +359,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct list_head oos_global_pages;
 	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
@@ -385,6 +389,7 @@ struct kvm_vm_stat {
 	u32 mmu_recycled;
 	u32 mmu_cache_miss;
 	u32 mmu_unsync;
+	u32 mmu_unsync_global;
 	u32 remote_tlb_flush;
 	u32 lpages;
 };
@@ -603,6 +608,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 58c35dead32..cbac9e4b156 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -793,9 +793,11 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
+	INIT_LIST_HEAD(&sp->oos_link);
 	ASSERT(is_empty_shadow_page(sp->spt));
 	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
 	sp->multimapped = 0;
+	sp->global = 1;
 	sp->parent_pte = parent_pte;
 	--vcpu->kvm->arch.n_free_mmu_pages;
 	return sp;
@@ -1066,10 +1068,18 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 	return NULL;
 }
 
+static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	list_del(&sp->oos_link);
+	--kvm->stat.mmu_unsync_global;
+}
+
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	WARN_ON(!sp->unsync);
 	sp->unsync = 0;
+	if (sp->global)
+		kvm_unlink_unsync_global(kvm, sp);
 	--kvm->stat.mmu_unsync;
 }
 
@@ -1615,9 +1625,15 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		if (s->role.word != sp->role.word)
 			return 1;
 	}
-	kvm_mmu_mark_parents_unsync(vcpu, sp);
 	++vcpu->kvm->stat.mmu_unsync;
 	sp->unsync = 1;
+
+	if (sp->global) {
+		list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
+		++vcpu->kvm->stat.mmu_unsync_global;
+	} else
+		kvm_mmu_mark_parents_unsync(vcpu, sp);
+
 	mmu_convert_notrap(sp);
 	return 0;
 }
@@ -1643,12 +1659,21 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		    unsigned pte_access, int user_fault,
 		    int write_fault, int dirty, int largepage,
-		    gfn_t gfn, pfn_t pfn, bool speculative,
+		    int global, gfn_t gfn, pfn_t pfn, bool speculative,
 		    bool can_unsync)
 {
 	u64 spte;
 	int ret = 0;
 	u64 mt_mask = shadow_mt_mask;
+	struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
+
+	if (!global && sp->global) {
+		sp->global = 0;
+		if (sp->unsync) {
+			kvm_unlink_unsync_global(vcpu->kvm, sp);
+			kvm_mmu_mark_parents_unsync(vcpu, sp);
+		}
+	}
 
 	/*
 	 * We don't set the accessed bit, since we sometimes want to see
@@ -1717,8 +1742,8 @@ set_pte:
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			 unsigned pt_access, unsigned pte_access,
 			 int user_fault, int write_fault, int dirty,
-			 int *ptwrite, int largepage, gfn_t gfn,
-			 pfn_t pfn, bool speculative)
+			 int *ptwrite, int largepage, int global,
+			 gfn_t gfn, pfn_t pfn, bool speculative)
 {
 	int was_rmapped = 0;
 	int was_writeble = is_writeble_pte(*shadow_pte);
@@ -1751,7 +1776,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		}
 	}
 	if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
-		      dirty, largepage, gfn, pfn, speculative, true)) {
+		      dirty, largepage, global, gfn, pfn, speculative, true)) {
 		if (write_fault)
 			*ptwrite = 1;
 		kvm_x86_ops->tlb_flush(vcpu);
@@ -1808,7 +1833,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk,
 	    || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
 		mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
 			     0, walk->write, 1, &walk->pt_write,
-			     walk->largepage, gfn, walk->pfn, false);
+			     walk->largepage, 0, gfn, walk->pfn, false);
 		++vcpu->stat.pf_fixed;
 		return 1;
 	}
@@ -1995,6 +2020,15 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 	}
 }
 
+static void mmu_sync_global(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_mmu_page *sp, *n;
+
+	list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
+		kvm_sync_page(vcpu, sp);
+}
+
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 {
 	spin_lock(&vcpu->kvm->mmu_lock);
@@ -2002,6 +2036,13 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 	spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
+void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
+{
+	spin_lock(&vcpu->kvm->mmu_lock);
+	mmu_sync_global(vcpu);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+}
+
 static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
 {
 	return vaddr;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 84eee43bbe7..e644d81979b 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -274,7 +274,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 		return;
 	kvm_get_pfn(pfn);
 	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
-		     gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
+		     gpte & PT_DIRTY_MASK, NULL, largepage,
+		     gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
 		     pfn, true);
 }
 
@@ -301,8 +302,9 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
 		mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
 			     sw->user_fault, sw->write_fault,
 			     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
-			     sw->ptwrite, sw->largepage, gw->gfn, sw->pfn,
-			     false);
+			     sw->ptwrite, sw->largepage,
+			     gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
+			     gw->gfn, sw->pfn, false);
 		sw->sptep = sptep;
 		return 1;
 	}
@@ -580,7 +582,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		nr_present++;
 		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
 		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
-			 is_dirty_pte(gpte), 0, gfn,
+			 is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
 			 spte_to_pfn(sp->spt[i]), true, false);
 	}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7a2aeba0bfb..774db00d2db 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -104,6 +104,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "mmu_recycled", VM_STAT(mmu_recycled) },
 	{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
 	{ "mmu_unsync", VM_STAT(mmu_unsync) },
+	{ "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
 	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
 	{ "largepages", VM_STAT(lpages) },
 	{ NULL }
@@ -315,6 +316,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	kvm_x86_ops->set_cr0(vcpu, cr0);
 	vcpu->arch.cr0 = cr0;
 
+	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 	return;
 }
@@ -358,6 +360,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	}
 	kvm_x86_ops->set_cr4(vcpu, cr4);
 	vcpu->arch.cr4 = cr4;
+	kvm_mmu_sync_global(vcpu);
 	kvm_mmu_reset_context(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -4113,6 +4116,7 @@ struct  kvm *kvm_arch_create_vm(void)
 		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+	INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
 	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
-- 
cgit v1.2.3


From ad218f85e388e8ca816ff09d91c246cd014c53a8 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 1 Dec 2008 22:32:05 -0200
Subject: KVM: MMU: prepopulate the shadow on invlpg

If the guest executes invlpg, peek into the pagetable and attempt to
prepopulate the shadow entry.

Also stop dirty fault updates from interfering with the fork detector.

2% improvement on RHEL3/AIM7.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/kvm/mmu.c              | 25 +++++++++++++------------
 arch/x86/kvm/paging_tmpl.h      | 25 ++++++++++++++++++++++++-
 arch/x86/kvm/x86.c              |  2 +-
 4 files changed, 40 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 65b1ed29569..97215a458e5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -602,7 +602,8 @@ unsigned long segment_base(u16 selector);
 
 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-		       const u8 *new, int bytes);
+		       const u8 *new, int bytes,
+		       bool guest_initiated);
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index cbac9e4b156..863baf70506 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2441,7 +2441,8 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
 }
 
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-		       const u8 *new, int bytes)
+		       const u8 *new, int bytes,
+		       bool guest_initiated)
 {
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	struct kvm_mmu_page *sp;
@@ -2467,15 +2468,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	kvm_mmu_free_some_pages(vcpu);
 	++vcpu->kvm->stat.mmu_pte_write;
 	kvm_mmu_audit(vcpu, "pre pte write");
-	if (gfn == vcpu->arch.last_pt_write_gfn
-	    && !last_updated_pte_accessed(vcpu)) {
-		++vcpu->arch.last_pt_write_count;
-		if (vcpu->arch.last_pt_write_count >= 3)
-			flooded = 1;
-	} else {
-		vcpu->arch.last_pt_write_gfn = gfn;
-		vcpu->arch.last_pt_write_count = 1;
-		vcpu->arch.last_pte_updated = NULL;
+	if (guest_initiated) {
+		if (gfn == vcpu->arch.last_pt_write_gfn
+		    && !last_updated_pte_accessed(vcpu)) {
+			++vcpu->arch.last_pt_write_count;
+			if (vcpu->arch.last_pt_write_count >= 3)
+				flooded = 1;
+		} else {
+			vcpu->arch.last_pt_write_gfn = gfn;
+			vcpu->arch.last_pt_write_count = 1;
+			vcpu->arch.last_pte_updated = NULL;
+		}
 	}
 	index = kvm_page_table_hashfn(gfn);
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
@@ -2615,9 +2618,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
-	spin_lock(&vcpu->kvm->mmu_lock);
 	vcpu->arch.mmu.invlpg(vcpu, gva);
-	spin_unlock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_flush_tlb(vcpu);
 	++vcpu->stat.invlpg;
 }
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index e644d81979b..d2064015421 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -82,6 +82,7 @@ struct shadow_walker {
 	int *ptwrite;
 	pfn_t pfn;
 	u64 *sptep;
+	gpa_t pte_gpa;
 };
 
 static gfn_t gpte_to_gfn(pt_element_t gpte)
@@ -222,7 +223,7 @@ walk:
 		if (ret)
 			goto walk;
 		pte |= PT_DIRTY_MASK;
-		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
+		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
 		walker->ptes[walker->level - 1] = pte;
 	}
 
@@ -468,8 +469,15 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
 				      struct kvm_vcpu *vcpu, u64 addr,
 				      u64 *sptep, int level)
 {
+	struct shadow_walker *sw =
+		container_of(_sw, struct shadow_walker, walker);
 
 	if (level == PT_PAGE_TABLE_LEVEL) {
+		struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+		sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
+		sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+
 		if (is_shadow_present_pte(*sptep))
 			rmap_remove(vcpu->kvm, sptep);
 		set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
@@ -482,11 +490,26 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
 
 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 {
+	pt_element_t gpte;
 	struct shadow_walker walker = {
 		.walker = { .entry = FNAME(shadow_invlpg_entry), },
+		.pte_gpa = -1,
 	};
 
+	spin_lock(&vcpu->kvm->mmu_lock);
 	walk_shadow(&walker.walker, vcpu, gva);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	if (walker.pte_gpa == -1)
+		return;
+	if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
+				  sizeof(pt_element_t)))
+		return;
+	if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
+		if (mmu_topup_memory_caches(vcpu))
+			return;
+		kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
+				  sizeof(pt_element_t), 0);
+	}
 }
 
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 774db00d2db..ba102879de3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2046,7 +2046,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
 	if (ret < 0)
 		return 0;
-	kvm_mmu_pte_write(vcpu, gpa, val, bytes);
+	kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
 	return 1;
 }
 
-- 
cgit v1.2.3


From e93353c93a3ba4215633ce930784f40a4e94e3f9 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Fri, 5 Dec 2008 18:36:45 -0200
Subject: x86: KVM guest: kvm_get_tsc_khz: return khz, not lpj

kvm_get_tsc_khz() currently returns the previously-calculated preset_lpj
value, but it is in loops-per-jiffy, not kHz. The current code works
correctly only when HZ=1000.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kernel/kvmclock.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index b38e801014e..652fce6d2cc 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void)
  */
 static unsigned long kvm_get_tsc_khz(void)
 {
-	return preset_lpj;
+	struct pvclock_vcpu_time_info *src;
+	src = &per_cpu(hv_clock, 0);
+	return pvclock_tsc_khz(src);
 }
 
 static void kvm_get_preset_lpj(void)
 {
-	struct pvclock_vcpu_time_info *src;
 	unsigned long khz;
 	u64 lpj;
 
-	src = &per_cpu(hv_clock, 0);
-	khz = pvclock_tsc_khz(src);
+	khz = kvm_get_tsc_khz();
 
 	lpj = ((u64)khz * 1000);
 	do_div(lpj, HZ);
-- 
cgit v1.2.3


From ca9edaee1aea34ebd9adb48910aba0b3d64b1b22 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 8 Dec 2008 18:29:29 +0200
Subject: KVM: Consolidate userspace memory capability reporting into common
 code

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ba102879de3..10302d3bd41 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -964,7 +964,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IRQCHIP:
 	case KVM_CAP_HLT:
 	case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
-	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SET_TSS_ADDR:
 	case KVM_CAP_EXT_CPUID:
 	case KVM_CAP_CLOCKSOURCE:
-- 
cgit v1.2.3


From eb64f1e8cd5c3cae912db30a77d062367f7a11a6 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 9 Dec 2008 16:07:22 +0100
Subject: KVM: MMU: check for present pdptr shadow page in walk_shadow

walk_shadow assumes the caller verified validity of the pdptr pointer in
question, which is not the case for the invlpg handler.

Fixes oops during Solaris 10 install.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 863baf70506..641c07844e6 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1269,6 +1269,8 @@ static int walk_shadow(struct kvm_shadow_walk *walker,
 	if (level == PT32E_ROOT_LEVEL) {
 		shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
 		shadow_addr &= PT64_BASE_ADDR_MASK;
+		if (!shadow_addr)
+			return 1;
 		--level;
 	}
 
-- 
cgit v1.2.3


From 264ff01d55b456932cef03082448b41d2edeb6a1 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 24 Nov 2008 12:26:19 +0100
Subject: KVM: VMX: Fix pending NMI-vs.-IRQ race for user space irqchip

As with the kernel irqchip, don't allow an NMI to stomp over an already
injected IRQ; instead wait for the IRQ injection to be completed.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e446f232588..487e1dcdce3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2486,7 +2486,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	vmx_update_window_states(vcpu);
 
 	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-		if (vcpu->arch.nmi_window_open) {
+		if (vcpu->arch.interrupt.pending) {
+			enable_nmi_window(vcpu);
+		} else if (vcpu->arch.nmi_window_open) {
 			vcpu->arch.nmi_pending = false;
 			vcpu->arch.nmi_injected = true;
 		} else {
-- 
cgit v1.2.3


From 4531220b71f0399e71cda0c4cf749e7281a7416a Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Thu, 11 Dec 2008 16:54:54 +0100
Subject: KVM: x86: Rework user space NMI injection as KVM_CAP_USER_NMI

There is no point in doing the ready_for_nmi_injection/
request_nmi_window dance with user space. First, we don't do this for
in-kernel irqchip anyway, while the code path is the same as for user
space irqchip mode. And second, there is nothing to loose if a pending
NMI is overwritten by another one (in contrast to IRQs where we have to
save the number). Actually, there is even the risk of raising spurious
NMIs this way because the reason for the held-back NMI might already be
handled while processing the first one.

Therefore this patch creates a simplified user space NMI injection
interface, exporting it under KVM_CAP_USER_NMI and dropping the old
KVM_CAP_NMI capability. And this time we also take care to provide the
interface only on archs supporting NMIs via KVM (right now only x86).

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 24 ++----------------------
 arch/x86/kvm/x86.c | 28 ++--------------------------
 2 files changed, 4 insertions(+), 48 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 487e1dcdce3..6259d746764 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2498,15 +2498,13 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	}
 	if (vcpu->arch.nmi_injected) {
 		vmx_inject_nmi(vcpu);
-		if (vcpu->arch.nmi_pending || kvm_run->request_nmi_window)
+		if (vcpu->arch.nmi_pending)
 			enable_nmi_window(vcpu);
 		else if (vcpu->arch.irq_summary
 			 || kvm_run->request_interrupt_window)
 			enable_irq_window(vcpu);
 		return;
 	}
-	if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window)
-		enable_nmi_window(vcpu);
 
 	if (vcpu->arch.interrupt_window_open) {
 		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
@@ -3040,14 +3038,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 	++vcpu->stat.nmi_window_exits;
 
-	/*
-	 * If the user space waits to inject a NMI, exit as soon as possible
-	 */
-	if (kvm_run->request_nmi_window && !vcpu->arch.nmi_pending) {
-		kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
-		return 0;
-	}
-
 	return 1;
 }
 
@@ -3162,7 +3152,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			vmx->soft_vnmi_blocked = 0;
 			vcpu->arch.nmi_window_open = 1;
 		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
-		    (kvm_run->request_nmi_window || vcpu->arch.nmi_pending)) {
+			   vcpu->arch.nmi_pending) {
 			/*
 			 * This CPU don't support us in finding the end of an
 			 * NMI-blocked window if the guest runs with IRQs
@@ -3175,16 +3165,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			vmx->soft_vnmi_blocked = 0;
 			vmx->vcpu.arch.nmi_window_open = 1;
 		}
-
-		/*
-		 * If the user space waits to inject an NNI, exit ASAP
-		 */
-		if (vcpu->arch.nmi_window_open && kvm_run->request_nmi_window
-		    && !vcpu->arch.nmi_pending) {
-			kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
-			++vcpu->stat.nmi_window_exits;
-			return 0;
-		}
 	}
 
 	if (exit_reason < kvm_vmx_max_exit_handlers
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 10302d3bd41..0e6aa8141dc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2887,37 +2887,18 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
 }
 
-/*
- * Check if userspace requested a NMI window, and that the NMI window
- * is open.
- *
- * No need to exit to userspace if we already have a NMI queued.
- */
-static int dm_request_for_nmi_injection(struct kvm_vcpu *vcpu,
-					struct kvm_run *kvm_run)
-{
-	return (!vcpu->arch.nmi_pending &&
-		kvm_run->request_nmi_window &&
-		vcpu->arch.nmi_window_open);
-}
-
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 			      struct kvm_run *kvm_run)
 {
 	kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
-	if (irqchip_in_kernel(vcpu->kvm)) {
+	if (irqchip_in_kernel(vcpu->kvm))
 		kvm_run->ready_for_interrupt_injection = 1;
-		kvm_run->ready_for_nmi_injection = 1;
-	} else {
+	else
 		kvm_run->ready_for_interrupt_injection =
 					(vcpu->arch.interrupt_window_open &&
 					 vcpu->arch.irq_summary == 0);
-		kvm_run->ready_for_nmi_injection =
-					(vcpu->arch.nmi_window_open &&
-					 vcpu->arch.nmi_pending == 0);
-	}
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
@@ -3093,11 +3074,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 
 		if (r > 0) {
-			if (dm_request_for_nmi_injection(vcpu, kvm_run)) {
-				r = -EINTR;
-				kvm_run->exit_reason = KVM_EXIT_NMI;
-				++vcpu->stat.request_nmi_exits;
-			}
 			if (dm_request_for_irq_injection(vcpu, kvm_run)) {
 				r = -EINTR;
 				kvm_run->exit_reason = KVM_EXIT_INTR;
-- 
cgit v1.2.3


From 25e2343246fe135fce672f41abe61e9d2c38caac Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 21 Dec 2008 18:31:10 +0200
Subject: KVM: MMU: Don't treat a global pte as such if cr4.pge is cleared

The pte.g bit is meaningless if global pages are disabled; deferring
mmu page synchronization on these ptes will lead to the guest using stale
shadow ptes.

Fixes Vista x86 smp bootloader failure.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 641c07844e6..d50ebac6a07 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1669,6 +1669,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 	u64 mt_mask = shadow_mt_mask;
 	struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
 
+	if (!(vcpu->arch.cr4 & X86_CR4_PGE))
+		global = 0;
 	if (!global && sp->global) {
 		sp->global = 0;
 		if (sp->unsync) {
-- 
cgit v1.2.3


From 3f353858c98dbe0240dac558a89870f4600f81bb Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 21 Dec 2008 22:48:32 +0200
Subject: KVM: Add locking to virtual i8259 interrupt controller

While most accesses to the i8259 are with the kvm mutex taken, the call
to kvm_pic_read_irq() is not.  We can't easily take the kvm mutex there
since the function is called with interrupts disabled.

Fix by adding a spinlock to the virtual interrupt controller.  Since we
can't send an IPI under the spinlock (we also take the same spinlock in
an irq disabled context), we defer the IPI until the spinlock is released.
Similarly, we defer irq ack notifications until after spinlock release to
avoid lock recursion.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8259.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++----
 arch/x86/kvm/irq.h   |  5 +++++
 2 files changed, 53 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 17e41e165f1..179dcb0103f 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -26,10 +26,40 @@
  *   Port from Qemu.
  */
 #include <linux/mm.h>
+#include <linux/bitops.h>
 #include "irq.h"
 
 #include <linux/kvm_host.h>
 
+static void pic_lock(struct kvm_pic *s)
+{
+	spin_lock(&s->lock);
+}
+
+static void pic_unlock(struct kvm_pic *s)
+{
+	struct kvm *kvm = s->kvm;
+	unsigned acks = s->pending_acks;
+	bool wakeup = s->wakeup_needed;
+	struct kvm_vcpu *vcpu;
+
+	s->pending_acks = 0;
+	s->wakeup_needed = false;
+
+	spin_unlock(&s->lock);
+
+	while (acks) {
+		kvm_notify_acked_irq(kvm, __ffs(acks));
+		acks &= acks - 1;
+	}
+
+	if (wakeup) {
+		vcpu = s->kvm->vcpus[0];
+		if (vcpu)
+			kvm_vcpu_kick(vcpu);
+	}
+}
+
 static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
 {
 	s->isr &= ~(1 << irq);
@@ -136,17 +166,21 @@ static void pic_update_irq(struct kvm_pic *s)
 
 void kvm_pic_update_irq(struct kvm_pic *s)
 {
+	pic_lock(s);
 	pic_update_irq(s);
+	pic_unlock(s);
 }
 
 void kvm_pic_set_irq(void *opaque, int irq, int level)
 {
 	struct kvm_pic *s = opaque;
 
+	pic_lock(s);
 	if (irq >= 0 && irq < PIC_NUM_PINS) {
 		pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
 		pic_update_irq(s);
 	}
+	pic_unlock(s);
 }
 
 /*
@@ -172,6 +206,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 	int irq, irq2, intno;
 	struct kvm_pic *s = pic_irqchip(kvm);
 
+	pic_lock(s);
 	irq = pic_get_irq(&s->pics[0]);
 	if (irq >= 0) {
 		pic_intack(&s->pics[0], irq);
@@ -196,6 +231,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 		intno = s->pics[0].irq_base + irq;
 	}
 	pic_update_irq(s);
+	pic_unlock(s);
 	kvm_notify_acked_irq(kvm, irq);
 
 	return intno;
@@ -203,7 +239,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
 void kvm_pic_reset(struct kvm_kpic_state *s)
 {
-	int irq, irqbase;
+	int irq, irqbase, n;
 	struct kvm *kvm = s->pics_state->irq_request_opaque;
 	struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
 
@@ -214,8 +250,10 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 
 	for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
 		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
-			if (s->irr & (1 << irq) || s->isr & (1 << irq))
-				kvm_notify_acked_irq(kvm, irq+irqbase);
+			if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
+				n = irq + irqbase;
+				s->pics_state->pending_acks |= 1 << n;
+			}
 	}
 	s->last_irr = 0;
 	s->irr = 0;
@@ -406,6 +444,7 @@ static void picdev_write(struct kvm_io_device *this,
 			printk(KERN_ERR "PIC: non byte write\n");
 		return;
 	}
+	pic_lock(s);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -418,6 +457,7 @@ static void picdev_write(struct kvm_io_device *this,
 		elcr_ioport_write(&s->pics[addr & 1], addr, data);
 		break;
 	}
+	pic_unlock(s);
 }
 
 static void picdev_read(struct kvm_io_device *this,
@@ -431,6 +471,7 @@ static void picdev_read(struct kvm_io_device *this,
 			printk(KERN_ERR "PIC: non byte read\n");
 		return;
 	}
+	pic_lock(s);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -444,6 +485,7 @@ static void picdev_read(struct kvm_io_device *this,
 		break;
 	}
 	*(unsigned char *)val = data;
+	pic_unlock(s);
 }
 
 /*
@@ -459,7 +501,7 @@ static void pic_irq_request(void *opaque, int level)
 	s->output = level;
 	if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
 		s->pics[0].isr_ack &= ~(1 << irq);
-		kvm_vcpu_kick(vcpu);
+		s->wakeup_needed = true;
 	}
 }
 
@@ -469,6 +511,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 	s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
 	if (!s)
 		return NULL;
+	spin_lock_init(&s->lock);
+	s->kvm = kvm;
 	s->pics[0].elcr_mask = 0xf8;
 	s->pics[1].elcr_mask = 0xde;
 	s->irq_request = pic_irq_request;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index b9e9051650e..2bf32a03cee 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -25,6 +25,7 @@
 #include <linux/mm_types.h>
 #include <linux/hrtimer.h>
 #include <linux/kvm_host.h>
+#include <linux/spinlock.h>
 
 #include "iodev.h"
 #include "ioapic.h"
@@ -59,6 +60,10 @@ struct kvm_kpic_state {
 };
 
 struct kvm_pic {
+	spinlock_t lock;
+	bool wakeup_needed;
+	unsigned pending_acks;
+	struct kvm *kvm;
 	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
 	irq_request_func *irq_request;
 	void *irq_request_opaque;
-- 
cgit v1.2.3


From 87917239204d67a316cb89751750f86c9ed3640b Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 22 Dec 2008 18:49:30 -0200
Subject: KVM: MMU: handle large host sptes on invlpg/resync

The invlpg and sync walkers lack knowledge of large host sptes,
descending to non-existant pagetable level.

Stop at directory level in such case.

Fixes SMP Windows XP with hugepages.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c         | 2 +-
 arch/x86/kvm/paging_tmpl.h | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d50ebac6a07..83f11c7474a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1007,7 +1007,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 	for_each_unsync_children(sp->unsync_child_bitmap, i) {
 		u64 ent = sp->spt[i];
 
-		if (is_shadow_present_pte(ent)) {
+		if (is_shadow_present_pte(ent) && !is_large_pte(ent)) {
 			struct kvm_mmu_page *child;
 			child = page_header(ent & PT64_BASE_ADDR_MASK);
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d2064015421..9fd78b6e17a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -472,14 +472,19 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
 	struct shadow_walker *sw =
 		container_of(_sw, struct shadow_walker, walker);
 
-	if (level == PT_PAGE_TABLE_LEVEL) {
+	/* FIXME: properly handle invlpg on large guest pages */
+	if (level == PT_PAGE_TABLE_LEVEL ||
+	    ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
 		struct kvm_mmu_page *sp = page_header(__pa(sptep));
 
 		sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
 		sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
 
-		if (is_shadow_present_pte(*sptep))
+		if (is_shadow_present_pte(*sptep)) {
 			rmap_remove(vcpu->kvm, sptep);
+			if (is_large_pte(*sptep))
+				--vcpu->kvm->stat.lpages;
+		}
 		set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
 		return 1;
 	}
-- 
cgit v1.2.3


From 18d8fda7c3c9439be04d7ea2e82da2513b121acb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 26 Dec 2008 00:35:37 -0500
Subject: take init_fs to saner place

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/kernel/init_task.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index d39918076bb..df3bf269bea 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -10,7 +10,6 @@
 #include <asm/pgtable.h>
 #include <asm/desc.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
-- 
cgit v1.2.3


From c64d8996bd758cedc2ddc04b86ca66fa1d8599cf Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Fri, 2 Jan 2009 11:27:18 +0300
Subject: x86: early_printk - use sizeof instead of hardcoded number

Impact: cleanup

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/early_printk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 23b138e31e9..504ad198e4a 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -886,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...)
 	va_list ap;
 
 	va_start(ap, fmt);
-	n = vscnprintf(buf, 512, fmt, ap);
+	n = vscnprintf(buf, sizeof(buf), fmt, ap);
 	early_console->write(early_console, buf, n);
 	va_end(ap);
 }
-- 
cgit v1.2.3


From a9067d537615d534dcef06c0d819472e43a0d152 Mon Sep 17 00:00:00 2001
From: Ingo Brueckl <ib@wupperonline.de>
Date: Fri, 2 Jan 2009 14:42:00 +0100
Subject: x86: convert permanent_kmaps_init() from macro to inline

Impact: cleanup

This compiler warning:

  arch/x86/mm/init_32.c:515: warning: unused variable 'pgd_base'

triggers because permanent_kmaps_init() is a CPP macro in the
!CONFIG_HIGHMEM case, that does not tell the compiler that the
'pgd_base' parameter is used.

Convert permanent_kmaps_init() (and set_highmem_pages_init()) to
C inline functions - which gives the parameter a proper type and
which gets rid of the compiler warning as well.

Signed-off-by: Ingo Brueckl <ib@wupperonline.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_32.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 800e1d94c1b..ad98b18f2b4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -434,8 +434,12 @@ static void __init set_highmem_pages_init(void)
 #endif /* !CONFIG_NUMA */
 
 #else
-# define permanent_kmaps_init(pgd_base)		do { } while (0)
-# define set_highmem_pages_init()	do { } while (0)
+static inline void permanent_kmaps_init(pgd_t *pgd_base)
+{
+}
+static inline void set_highmem_pages_init(void)
+{
+}
 #endif /* CONFIG_HIGHMEM */
 
 void __init native_pagetable_setup_start(pgd_t *base)
-- 
cgit v1.2.3


From 26799a63110dcbe81291ea53178f6b4810d07424 Mon Sep 17 00:00:00 2001
From: Ravikiran G Thirumalai <kiran@scalex86.org>
Date: Wed, 31 Dec 2008 13:44:46 -0800
Subject: x86: fix incorrect __read_mostly on _boot_cpu_pda

The pda rework (commit 3461b0af025251bbc6b3d56c821c6ac2de6f7209)
to remove static boot cpu pdas introduced a performance bug.

_boot_cpu_pda is the actual pda used by the boot cpu and is definitely
not "__read_mostly" and ended up polluting the read mostly section with
writes.  This bug caused regression of about 8-10% on certain syscall
intensive workloads.

Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Acked-by: Mike Travis <travis@sgi.com>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 388e05a5fc1..b9a4d8c4b93 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -27,7 +27,7 @@
 #include <asm/trampoline.h>
 
 /* boot cpu pda */
-static struct x8664_pda _boot_cpu_pda __read_mostly;
+static struct x8664_pda _boot_cpu_pda;
 
 #ifdef CONFIG_SMP
 /*
-- 
cgit v1.2.3


From 46814dded1b972a07b1609d81632eef3009fbb10 Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Wed, 31 Dec 2008 13:20:50 -0600
Subject: x86, UV: remove erroneous BAU initialization

Impact: fix crash on x86/UV

UV is the SGI "UltraViolet" machine, which is x86_64 based.
BAU is the "Broadcast Assist Unit", used for TLB shootdown in UV.

This patch removes the allocation and initialization of an unused table.

This table is left over from a development test mode.  It is unused in
the present code.

And it was incorrectly initialized: 8 entries allocated but 17 initialized,
causing slab corruption.

This patch should go into 2.6.27 and 2.6.28 as well as the current tree.

Diffed against 2.6.28 (linux-next, 12/30/08)

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/tlb_uv.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 6a00e5faaa7..f885023167e 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -582,7 +582,6 @@ static int __init uv_ptc_init(void)
 static struct bau_control * __init uv_table_bases_init(int blade, int node)
 {
 	int i;
-	int *ip;
 	struct bau_msg_status *msp;
 	struct bau_control *bau_tabp;
 
@@ -599,13 +598,6 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
 		bau_cpubits_clear(&msp->seen_by, (int)
 				  uv_blade_nr_possible_cpus(blade));
 
-	bau_tabp->watching =
-	    kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node);
-	BUG_ON(!bau_tabp->watching);
-
-	for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++)
-		*ip = 0;
-
 	uv_bau_table_bases[blade] = bau_tabp;
 
 	return bau_tabp;
@@ -628,7 +620,6 @@ uv_table_bases_finish(int blade, int node, int cur_cpu,
 		bcp->bau_msg_head	= bau_tablesp->va_queue_first;
 		bcp->va_queue_first	= bau_tablesp->va_queue_first;
 		bcp->va_queue_last	= bau_tablesp->va_queue_last;
-		bcp->watching		= bau_tablesp->watching;
 		bcp->msg_statuses	= bau_tablesp->msg_statuses;
 		bcp->descriptor_base	= adp;
 	}
-- 
cgit v1.2.3


From f634fa941188a91dbf1dab961fe7a4509852fd6e Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Wed, 31 Dec 2008 16:29:48 +0530
Subject: x86: cpuid.c fix style problems

Impact: cleanup

Fixes style problems:

 WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>
 ERROR: "foo * bar" should be "foo *bar"
 ERROR: trailing whitespace
 WARNING: usage of NR_CPUS is often wrong - consider using cpu_possible(), num_possible_cpus(), for_each_possible_cpu(), etc

total: 2 errors, 2 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpuid.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 72cefd1e649..85d28d53f5d 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -39,10 +39,10 @@
 #include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
+#include <linux/uaccess.h>
 
 #include <asm/processor.h>
 #include <asm/msr.h>
-#include <asm/uaccess.h>
 #include <asm/system.h>
 
 static struct class *cpuid_class;
@@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
 }
 
 static ssize_t cpuid_read(struct file *file, char __user *buf,
-			  size_t count, loff_t * ppos)
+			  size_t count, loff_t *ppos)
 {
 	char __user *tmp = buf;
 	struct cpuid_regs cmd;
@@ -117,7 +117,7 @@ static int cpuid_open(struct inode *inode, struct file *file)
 	unsigned int cpu;
 	struct cpuinfo_x86 *c;
 	int ret = 0;
-	
+
 	lock_kernel();
 
 	cpu = iminor(file->f_path.dentry->d_inode);
-- 
cgit v1.2.3


From 423a54058f746579aff1430877dbc82f17442b34 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Wed, 31 Dec 2008 16:42:20 +0530
Subject: x86: ldt.c fix style problems

Impact: cleanup

Fixes style problems:

 WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>
 ERROR: space required before the open parenthesis '('

total: 1 errors, 1 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ldt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index eee32b43fee..71f1d99a635 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,8 +12,8 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/vmalloc.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
@@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
 	if (err < 0)
 		return err;
 
-	for(i = 0; i < old->size; i++)
+	for (i = 0; i < old->size; i++)
 		write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
 	return 0;
 }
-- 
cgit v1.2.3


From dceb4521c8ed24b9fe4230e0d385cf4770260383 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Wed, 31 Dec 2008 17:35:02 +0530
Subject: x86: nmi.c fix style problems

Impact: cleanup, fix style problems

Fixes style problems:

 WARNING: Use #include <linux/smp.h> instead of <asm/smp.h>
 WARNING: Use #include <linux/nmi.h> instead of <asm/nmi.h>

total: 0 errors, 2 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/nmi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8bd1bf9622a..45a09ccdc21 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -26,11 +26,10 @@
 #include <linux/kernel_stat.h>
 #include <linux/kdebug.h>
 #include <linux/smp.h>
+#include <linux/nmi.h>
 
 #include <asm/i8259.h>
 #include <asm/io_apic.h>
-#include <asm/smp.h>
-#include <asm/nmi.h>
 #include <asm/proto.h>
 #include <asm/timer.h>
 
-- 
cgit v1.2.3


From 103ceffb9501531f6931df6aebc11a05189201f0 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Fri, 2 Jan 2009 23:43:25 +0530
Subject: x86: mpparse.c fix style problems

Impact: cleanup, fix style problems, more readable

Fixes style problems:

 WARNING: Use #include <linux/smp.h> instead of <asm/smp.h>
 WARNING: Use #include <linux/acpi.h> instead of <asm/acpi.h>
 WARNING: suspect code indent for conditional statements (8, 17)
 WARNING: space prohibited between function name and open parenthesis '('

total: 0 errors, 5 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mpparse.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 45e3b69808b..c5c5b8df1db 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -16,14 +16,14 @@
 #include <linux/bitops.h>
 #include <linux/acpi.h>
 #include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/acpi.h>
 
-#include <asm/smp.h>
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
 #include <asm/pgalloc.h>
 #include <asm/io_apic.h>
 #include <asm/proto.h>
-#include <asm/acpi.h>
 #include <asm/bios_ebda.h>
 #include <asm/e820.h>
 #include <asm/trampoline.h>
@@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
 #endif
 
 	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
-		 set_bit(m->mpc_busid, mp_bus_not_pci);
-#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
+		set_bit(m->mpc_busid, mp_bus_not_pci);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
 #endif
 	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
@@ -104,7 +104,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
 			x86_quirks->mpc_oem_pci_bus(m);
 
 		clear_bit(m->mpc_busid, mp_bus_not_pci);
-#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
 	} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
-- 
cgit v1.2.3


From e8e32326279cba3d049b4325111f76618953195c Mon Sep 17 00:00:00 2001
From: Ingo Brueckl <ib@wupperonline.de>
Date: Fri, 2 Jan 2009 14:42:00 +0100
Subject: Fix compiler warning in arch/x86/mm/init_32.c

Signed-off-by: Ingo Brueckl <ib@wupperonline.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/init_32.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8655b5bb096..f99a6c6c432 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -435,8 +435,12 @@ static void __init set_highmem_pages_init(void)
 #endif /* !CONFIG_NUMA */
 
 #else
-# define permanent_kmaps_init(pgd_base)		do { } while (0)
-# define set_highmem_pages_init()	do { } while (0)
+static inline void permanent_kmaps_init(pgd_t *pgd_base)
+{
+}
+static inline void set_highmem_pages_init(void)
+{
+}
 #endif /* CONFIG_HIGHMEM */
 
 void __init native_pagetable_setup_start(pgd_t *base)
-- 
cgit v1.2.3


From 79ff56ebd3edfb16f8badc558cb439b203a3298f Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Tue, 30 Dec 2008 20:18:00 -0800
Subject: swiotlb: add missing __init annotations

Impact: cleanup, reduce kernel size a bit

The current kernel build warns:

    WARNING: vmlinux.o(.text+0x11458): Section mismatch in reference from the function swiotlb_alloc_boot() to the function .init.text:__alloc_bootmem_low()
    The function swiotlb_alloc_boot() references
    the function __init __alloc_bootmem_low().
    This is often because swiotlb_alloc_boot lacks a __init
    annotation or the annotation of __alloc_bootmem_low is wrong.

    WARNING: vmlinux.o(.text+0x1011f2): Section mismatch in reference from the function swiotlb_late_init_with_default_size() to the function .init.text:__alloc_bootmem_low()
    The function swiotlb_late_init_with_default_size() references
    the function __init __alloc_bootmem_low().
    This is often because swiotlb_late_init_with_default_size lacks a __init
    annotation or the annotation of __alloc_bootmem_low is wrong.

and indeed the functions calling __alloc_bootmem_low() can be marked
__init as well.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-swiotlb_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 242c3440687..8cba3749a51 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -13,7 +13,7 @@
 
 int swiotlb __read_mostly;
 
-void *swiotlb_alloc_boot(size_t size, unsigned long nslabs)
+void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
 {
 	return alloc_bootmem_low_pages(size);
 }
-- 
cgit v1.2.3


From 37dd3cb4153084ff693e738b7ee087990d351e5e Mon Sep 17 00:00:00 2001
From: Markus Trippelsdorf <markus@trippelsdorf.de>
Date: Wed, 31 Dec 2008 13:00:43 +0100
Subject: x86: remove debug printks (io_apic.c)

Impact: reduce printk noise

The message "alloc irq_2_pin on cpu 0 node 0" is printed way too often.

% dmesg|grep irq_2_pin|wc -l
20

Get rid of the debug printks.

Signed-off-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 69911722b9d..45073520407 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -129,7 +129,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
 	node = cpu_to_node(cpu);
 
 	pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
-	printk(KERN_DEBUG "  alloc irq_2_pin on cpu %d node %d\n", cpu, node);
 
 	return pin;
 }
@@ -227,7 +226,6 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 			cpumask_clear(cfg->old_domain);
 		}
 	}
-	printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
 
 	return cfg;
 }
-- 
cgit v1.2.3


From c4fa3864281c7d88b7262cbc6cbd5c90bb59860e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 26 Nov 2008 15:51:19 +0100
Subject: KVM: rename vtd.c to iommu.c

Impact: file renamed

The code in the vtd.c file can be reused for other IOMMUs as well. So
rename it to make it clear that it handle more than VT-d.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kvm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c02343594b4..00f46c2d14b 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -8,7 +8,7 @@ ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
 ifeq ($(CONFIG_DMAR),y)
-common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
+common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
 endif
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
-- 
cgit v1.2.3


From 1aaf118352b85bb359ce28070bcc478f659a7031 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 26 Nov 2008 17:25:13 +0100
Subject: select IOMMU_API when DMAR and/or AMD_IOMMU is selected

These two IOMMUs can implement the current version of this API. So
select the API if one or both of these IOMMU drivers is selected.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/Kconfig | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 249d1e0824b..4737435b00d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -599,6 +599,9 @@ config SWIOTLB
 config IOMMU_HELPER
 	def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
 
+config IOMMU_API
+	def_bool (AMD_IOMMU || DMAR)
+
 config MAXSMP
 	bool "Configure Maximum number of SMP Processors and NUMA Nodes"
 	depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
-- 
cgit v1.2.3


From 19de40a8472fa64693eab844911eec277d489f6c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 14:43:34 +0100
Subject: KVM: change KVM to use IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 arch/x86/kvm/Makefile           | 2 +-
 arch/x86/kvm/x86.c              | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 97215a458e5..730843d1d2f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -360,7 +360,7 @@ struct kvm_arch{
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
 	struct list_head oos_global_pages;
-	struct dmar_domain *intel_iommu_domain;
+	struct iommu_domain *iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 00f46c2d14b..d3ec292f00f 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,7 +7,7 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
 ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
-ifeq ($(CONFIG_DMAR),y)
+ifeq ($(CONFIG_IOMMU_API),y)
 common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
 endif
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0e6aa8141dc..cc17546a240 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -34,6 +34,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
@@ -989,7 +990,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 		r = !tdp_enabled;
 		break;
 	case KVM_CAP_IOMMU:
-		r = intel_iommu_found();
+		r = iommu_found();
 		break;
 	default:
 		r = 0;
-- 
cgit v1.2.3


From 38e817febe2f12bd2fbf92a1df36f41946d0c223 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 17:27:52 +0100
Subject: AMD IOMMU: rename iommu_map to iommu_map_page

Impact: function rename

The iommu_map function maps only one page. Make this clear in the
function name.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2e2da717b35..b11c855af7b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -338,10 +338,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
  * supporting all features of AMD IOMMU page tables like level skipping
  * and full 64 bit address spaces.
  */
-static int iommu_map(struct protection_domain *dom,
-		     unsigned long bus_addr,
-		     unsigned long phys_addr,
-		     int prot)
+static int iommu_map_page(struct protection_domain *dom,
+			  unsigned long bus_addr,
+			  unsigned long phys_addr,
+			  int prot)
 {
 	u64 __pte, *pte, *page;
 
@@ -440,7 +440,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 
 	for (addr = e->address_start; addr < e->address_end;
 	     addr += PAGE_SIZE) {
-		ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
+		ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
 		if (ret)
 			return ret;
 		/*
-- 
cgit v1.2.3


From 86db2e5d47bfa61a151d6ac83263f4bde4d52290 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 18:20:21 +0100
Subject: AMD IOMMU: make dma_ops_free_pagetable generic

Impact: change code to free pagetables from protection domains

The dma_ops_free_pagetable function can only free pagetables from
dma_ops domains. Change that to free pagetables of pure protection
domains.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index b11c855af7b..8a0fd3d0997 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -587,12 +587,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
 	iommu_area_reserve(dom->bitmap, start_page, pages);
 }
 
-static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
+static void free_pagetable(struct protection_domain *domain)
 {
 	int i, j;
 	u64 *p1, *p2, *p3;
 
-	p1 = dma_dom->domain.pt_root;
+	p1 = domain->pt_root;
 
 	if (!p1)
 		return;
@@ -613,6 +613,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
 	}
 
 	free_page((unsigned long)p1);
+
+	domain->pt_root = NULL;
 }
 
 /*
@@ -624,7 +626,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 	if (!dom)
 		return;
 
-	dma_ops_free_pagetable(dom);
+	free_pagetable(&dom->domain);
 
 	kfree(dom->pte_pages);
 
-- 
cgit v1.2.3


From a2acfb75792511a35586db80a38b8e4701a97730 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 18:28:53 +0100
Subject: AMD IOMMU: add domain id free function

Impact: add code to release a domain id

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 8a0fd3d0997..0922d5fe633 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -571,6 +571,18 @@ static u16 domain_id_alloc(void)
 	return id;
 }
 
+#ifdef CONFIG_IOMMU_API
+static void domain_id_free(int id)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	if (id > 0 && id < MAX_DOMAIN_ID)
+		__clear_bit(id, amd_iommu_pd_alloc_bitmap);
+	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+}
+#endif
+
 /*
  * Used to reserve address ranges in the aperture (e.g. for exclusion
  * ranges.
-- 
cgit v1.2.3


From 8d201968e15f56ae2837b0d0b64d3fff098857b0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 20:34:41 +0100
Subject: AMD IOMMU: refactor completion wait handling into separate functions

Impact: split one function into three

The separate functions are required synchronize commands across all
hardware IOMMUs in the system.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 68 ++++++++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 23 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0922d5fe633..2280ef86651 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -195,6 +195,46 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 	return ret;
 }
 
+/*
+ * This function waits until an IOMMU has completed a completion
+ * wait command
+ */
+static void __iommu_wait_for_completion(struct amd_iommu *iommu)
+{
+	int ready = 0;
+	unsigned status = 0;
+	unsigned long i = 0;
+
+	while (!ready && (i < EXIT_LOOP_COUNT)) {
+		++i;
+		/* wait for the bit to become one */
+		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+		ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
+	}
+
+	/* set bit back to zero */
+	status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
+	writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+	if (unlikely(i == EXIT_LOOP_COUNT))
+		panic("AMD IOMMU: Completion wait loop failed\n");
+}
+
+/*
+ * This function queues a completion wait command into the command
+ * buffer of an IOMMU
+ */
+static int __iommu_completion_wait(struct amd_iommu *iommu)
+{
+	struct iommu_cmd cmd;
+
+	 memset(&cmd, 0, sizeof(cmd));
+	 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
+	 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
+
+	 return __iommu_queue_command(iommu, &cmd);
+}
+
 /*
  * This function is called whenever we need to ensure that the IOMMU has
  * completed execution of all commands we sent. It sends a
@@ -204,40 +244,22 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
  */
 static int iommu_completion_wait(struct amd_iommu *iommu)
 {
-	int ret = 0, ready = 0;
-	unsigned status = 0;
-	struct iommu_cmd cmd;
-	unsigned long flags, i = 0;
-
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
-	CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
+	int ret = 0;
+	unsigned long flags;
 
 	spin_lock_irqsave(&iommu->lock, flags);
 
 	if (!iommu->need_sync)
 		goto out;
 
-	iommu->need_sync = 0;
+	ret = __iommu_completion_wait(iommu);
 
-	ret = __iommu_queue_command(iommu, &cmd);
+	iommu->need_sync = 0;
 
 	if (ret)
 		goto out;
 
-	while (!ready && (i < EXIT_LOOP_COUNT)) {
-		++i;
-		/* wait for the bit to become one */
-		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
-		ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
-	}
-
-	/* set bit back to zero */
-	status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
-	writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
-
-	if (unlikely(i == EXIT_LOOP_COUNT))
-		panic("AMD IOMMU: Completion wait loop failed\n");
+	__iommu_wait_for_completion(iommu);
 
 out:
 	spin_unlock_irqrestore(&iommu->lock, flags);
-- 
cgit v1.2.3


From 237b6f33291394c337ae84e2d3782d5605803af2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 20:54:37 +0100
Subject: AMD IOMMU: move invalidation command building to a separate function

Impact: refactoring of iommu_queue_inv_iommu_pages

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2280ef86651..fee16fbf2f3 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -286,6 +286,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
 	return ret;
 }
 
+static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
+					  u16 domid, int pde, int s)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	address &= PAGE_MASK;
+	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
+	cmd->data[1] |= domid;
+	cmd->data[2] = lower_32_bits(address);
+	cmd->data[3] = upper_32_bits(address);
+	if (s) /* size bit - we flush more than one 4kb page */
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+}
+
 /*
  * Generic command send function for invalidaing TLB entries
  */
@@ -295,16 +310,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
 	struct iommu_cmd cmd;
 	int ret;
 
-	memset(&cmd, 0, sizeof(cmd));
-	address &= PAGE_MASK;
-	CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
-	cmd.data[1] |= domid;
-	cmd.data[2] = lower_32_bits(address);
-	cmd.data[3] = upper_32_bits(address);
-	if (s) /* size bit - we flush more than one 4kb page */
-		cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
-	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
-		cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+	__iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
 
 	ret = iommu_queue_command(iommu, &cmd);
 
-- 
cgit v1.2.3


From 9e919012e33c481991e46aa4cb13d807cd47b798 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 20:05:52 +0100
Subject: AMD IOMMU: don't remove protection domain from iommu_pd_list

Impact: save unneeded logic to add and remove domains to the list

The removal of a protection domain from the iommu_pd_list is not
necessary. Another benefit is that we save complexity because we don't
have to readd it later when the device no longer uses the domain.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index fee16fbf2f3..b7b3067630c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -844,7 +844,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid)
 	list_for_each_entry(entry, &iommu_pd_list, list) {
 		if (entry->target_dev == devid) {
 			ret = entry;
-			list_del(&ret->list);
 			break;
 		}
 	}
-- 
cgit v1.2.3


From 43f4960983a309568a6c4375f081e63fb2ff24a3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 21:01:12 +0100
Subject: AMD IOMMU: add iommu_flush_domain function

Impact: add a function to flush a domain id on every IOMMU

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index b7b3067630c..2b6b8e050bd 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -352,6 +352,30 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
 	iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
 }
 
+#ifdef CONFIG_IOMMU_API
+/*
+ * This function is used to flush the IO/TLB for a given protection domain
+ * on every IOMMU in the system
+ */
+static void iommu_flush_domain(u16 domid)
+{
+	unsigned long flags;
+	struct amd_iommu *iommu;
+	struct iommu_cmd cmd;
+
+	__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
+				      domid, 1, 1);
+
+	list_for_each_entry(iommu, &amd_iommu_list, list) {
+		spin_lock_irqsave(&iommu->lock, flags);
+		__iommu_queue_command(iommu, &cmd);
+		__iommu_completion_wait(iommu);
+		__iommu_wait_for_completion(iommu);
+		spin_unlock_irqrestore(&iommu->lock, flags);
+	}
+}
+#endif
+
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
-- 
cgit v1.2.3


From 9fdb19d64c0247f23343b51fc85f438f8e7a2f3c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 17:46:25 +0100
Subject: AMD IOMMU: add protection domain flags

Imapct: add a new struct member to 'struct protection_domain'

When using protection domains for dma_ops and KVM its better to know for
which subsystem it was allocated. Add a flags member to struct
protection domain for that purpose.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 14 +++++++++-----
 arch/x86/kernel/amd_iommu.c            |  1 +
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ac302a2fa33..4862a5be899 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -190,16 +190,20 @@
 /* FIXME: move this macro to <linux/pci.h> */
 #define PCI_BUS(x) (((x) >> 8) & 0xff)
 
+/* Protection domain flags */
+#define PD_DMA_OPS_MASK		(1UL << 0) /* domain used for dma_ops */
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
  */
 struct protection_domain {
-	spinlock_t lock; /* mostly used to lock the page table*/
-	u16 id;		 /* the domain id written to the device table */
-	int mode;	 /* paging mode (0-6 levels) */
-	u64 *pt_root;	 /* page table root pointer */
-	void *priv;	 /* private data */
+	spinlock_t lock;	/* mostly used to lock the page table*/
+	u16 id;			/* the domain id written to the device table */
+	int mode;		/* paging mode (0-6 levels) */
+	u64 *pt_root;		/* page table root pointer */
+	unsigned long flags;	/* flags to find out type of domain */
+	void *priv;		/* private data */
 };
 
 /*
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2b6b8e050bd..bb28e2cda71 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -729,6 +729,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 		goto free_dma_dom;
 	dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
 	dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+	dma_dom->domain.flags = PD_DMA_OPS_MASK;
 	dma_dom->domain.priv = dma_dom;
 	if (!dma_dom->domain.pt_root)
 		goto free_dma_dom;
-- 
cgit v1.2.3


From 5b28df6f43ac9878f310ad0cb7f11ddb262a7ac6 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 17:49:42 +0100
Subject: AMD IOMMU: add checks for dma_ops domain to dma_ops functions

Impact: detect when a driver uses a device assigned otherwise

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index bb28e2cda71..5c465c91150 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -791,6 +791,15 @@ free_dma_dom:
 	return NULL;
 }
 
+/*
+ * little helper function to check whether a given protection domain is a
+ * dma_ops domain
+ */
+static bool dma_ops_domain(struct protection_domain *domain)
+{
+	return domain->flags & PD_DMA_OPS_MASK;
+}
+
 /*
  * Find out the protection domain structure for a given PCI device. This
  * will give us the pointer to the page table root for example.
@@ -1096,6 +1105,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
 		/* device not handled by any AMD IOMMU */
 		return (dma_addr_t)paddr;
 
+	if (!dma_ops_domain(domain))
+		return bad_dma_address;
+
 	spin_lock_irqsave(&domain->lock, flags);
 	addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
 			    dma_mask);
@@ -1126,6 +1138,9 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
 		/* device not handled by any AMD IOMMU */
 		return;
 
+	if (!dma_ops_domain(domain))
+		return;
+
 	spin_lock_irqsave(&domain->lock, flags);
 
 	__unmap_single(iommu, domain->priv, dma_addr, size, dir);
@@ -1180,6 +1195,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 	if (!iommu || !domain)
 		return map_sg_no_iommu(dev, sglist, nelems, dir);
 
+	if (!dma_ops_domain(domain))
+		return 0;
+
 	spin_lock_irqsave(&domain->lock, flags);
 
 	for_each_sg(sglist, s, nelems, i) {
@@ -1233,6 +1251,9 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 	    !get_device_resources(dev, &iommu, &domain, &devid))
 		return;
 
+	if (!dma_ops_domain(domain))
+		return;
+
 	spin_lock_irqsave(&domain->lock, flags);
 
 	for_each_sg(sglist, s, nelems, i) {
@@ -1278,6 +1299,9 @@ static void *alloc_coherent(struct device *dev, size_t size,
 		return virt_addr;
 	}
 
+	if (!dma_ops_domain(domain))
+		goto out_free;
+
 	if (!dma_mask)
 		dma_mask = *dev->dma_mask;
 
@@ -1286,18 +1310,20 @@ static void *alloc_coherent(struct device *dev, size_t size,
 	*dma_addr = __map_single(dev, iommu, domain->priv, paddr,
 				 size, DMA_BIDIRECTIONAL, true, dma_mask);
 
-	if (*dma_addr == bad_dma_address) {
-		free_pages((unsigned long)virt_addr, get_order(size));
-		virt_addr = NULL;
-		goto out;
-	}
+	if (*dma_addr == bad_dma_address)
+		goto out_free;
 
 	iommu_completion_wait(iommu);
 
-out:
 	spin_unlock_irqrestore(&domain->lock, flags);
 
 	return virt_addr;
+
+out_free:
+
+	free_pages((unsigned long)virt_addr, get_order(size));
+
+	return NULL;
 }
 
 /*
@@ -1319,6 +1345,9 @@ static void free_coherent(struct device *dev, size_t size,
 	if (!iommu || !domain)
 		goto free_mem;
 
+	if (!dma_ops_domain(domain))
+		goto free_mem;
+
 	spin_lock_irqsave(&domain->lock, flags);
 
 	__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
-- 
cgit v1.2.3


From 863c74ebd0152b21bc4b11c1447b5d1429287d37 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 17:56:36 +0100
Subject: AMD IOMMU: add device reference counting for protection domains

Impact: know how many devices are assigned to a domain

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 1 +
 arch/x86/kernel/amd_iommu.c            | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 4862a5be899..1c769f4e6cd 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -203,6 +203,7 @@ struct protection_domain {
 	int mode;		/* paging mode (0-6 levels) */
 	u64 *pt_root;		/* page table root pointer */
 	unsigned long flags;	/* flags to find out type of domain */
+	unsigned dev_cnt;	/* devices assigned to this domain */
 	void *priv;		/* private data */
 };
 
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 5c465c91150..8b45bc49d1c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -825,9 +825,10 @@ static void set_device_domain(struct amd_iommu *iommu,
 			      u16 devid)
 {
 	unsigned long flags;
-
 	u64 pte_root = virt_to_phys(domain->pt_root);
 
+	domain->dev_cnt += 1;
+
 	pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
 		    << DEV_ENTRY_MODE_SHIFT;
 	pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
-- 
cgit v1.2.3


From f1179dc005ee2b0e55c3f74f3552c3e9ef852265 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 14:39:51 +0100
Subject: AMD IOMMU: rename set_device_domain function

Impact: rename set_device_domain() to attach_device()

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 8b45bc49d1c..12e8b67e588 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -820,9 +820,9 @@ static struct protection_domain *domain_for_device(u16 devid)
  * If a device is not yet associated with a domain, this function does
  * assigns it visible for the hardware
  */
-static void set_device_domain(struct amd_iommu *iommu,
-			      struct protection_domain *domain,
-			      u16 devid)
+static void attach_device(struct amd_iommu *iommu,
+			  struct protection_domain *domain,
+			  u16 devid)
 {
 	unsigned long flags;
 	u64 pte_root = virt_to_phys(domain->pt_root);
@@ -929,14 +929,14 @@ static int get_device_resources(struct device *dev,
 		if (!dma_dom)
 			dma_dom = (*iommu)->default_dom;
 		*domain = &dma_dom->domain;
-		set_device_domain(*iommu, *domain, *bdf);
+		attach_device(*iommu, *domain, *bdf);
 		printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
 				"device ", (*domain)->id);
 		print_devid(_bdf, 1);
 	}
 
 	if (domain_for_device(_bdf) == NULL)
-		set_device_domain(*iommu, *domain, _bdf);
+		attach_device(*iommu, *domain, _bdf);
 
 	return 1;
 }
-- 
cgit v1.2.3


From 355bf553edb7fe21ada51f62c849180bec6da877 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Dec 2008 12:02:41 +0100
Subject: AMD IOMMU: add device detach helper functions

Impact: add helper functions to detach a device from a domain

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 12e8b67e588..15456a3a18c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -844,6 +844,45 @@ static void attach_device(struct amd_iommu *iommu,
 	iommu_queue_inv_dev_entry(iommu, devid);
 }
 
+#ifdef CONFIG_IOMMU_API
+/*
+ * Removes a device from a protection domain (unlocked)
+ */
+static void __detach_device(struct protection_domain *domain, u16 devid)
+{
+
+	/* lock domain */
+	spin_lock(&domain->lock);
+
+	/* remove domain from the lookup table */
+	amd_iommu_pd_table[devid] = NULL;
+
+	/* remove entry from the device table seen by the hardware */
+	amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
+	amd_iommu_dev_table[devid].data[1] = 0;
+	amd_iommu_dev_table[devid].data[2] = 0;
+
+	/* decrease reference counter */
+	domain->dev_cnt -= 1;
+
+	/* ready */
+	spin_unlock(&domain->lock);
+}
+
+/*
+ * Removes a device from a protection domain (with devtable_lock held)
+ */
+static void detach_device(struct protection_domain *domain, u16 devid)
+{
+	unsigned long flags;
+
+	/* lock device table */
+	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	__detach_device(domain, devid);
+	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+}
+#endif
+
 /*****************************************************************************
  *
  * The next functions belong to the dma_ops mapping/unmapping code.
-- 
cgit v1.2.3


From e275a2a0fc9e2168b15f6c7814e30b7ad58b1c7c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 18:27:25 +0100
Subject: AMD IOMMU: add device notifier callback

Impact: inform IOMMU about state change of a device in the driver core

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 62 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 60 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 15456a3a18c..140875b3f6e 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -47,6 +47,8 @@ struct iommu_cmd {
 
 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 			     struct unity_map_entry *e);
+static struct dma_ops_domain *find_protection_domain(u16 devid);
+
 
 /* returns !0 if the IOMMU is caching non-present entries in its TLB */
 static int iommu_has_npcache(struct amd_iommu *iommu)
@@ -844,7 +846,6 @@ static void attach_device(struct amd_iommu *iommu,
 	iommu_queue_inv_dev_entry(iommu, devid);
 }
 
-#ifdef CONFIG_IOMMU_API
 /*
  * Removes a device from a protection domain (unlocked)
  */
@@ -881,7 +882,62 @@ static void detach_device(struct protection_domain *domain, u16 devid)
 	__detach_device(domain, devid);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 }
-#endif
+
+static int device_change_notifier(struct notifier_block *nb,
+				  unsigned long action, void *data)
+{
+	struct device *dev = data;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
+	struct protection_domain *domain;
+	struct dma_ops_domain *dma_domain;
+	struct amd_iommu *iommu;
+
+	if (devid > amd_iommu_last_bdf)
+		goto out;
+
+	devid = amd_iommu_alias_table[devid];
+
+	iommu = amd_iommu_rlookup_table[devid];
+	if (iommu == NULL)
+		goto out;
+
+	domain = domain_for_device(devid);
+
+	if (domain && !dma_ops_domain(domain))
+		WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
+			  "to a non-dma-ops domain\n", dev_name(dev));
+
+	switch (action) {
+	case BUS_NOTIFY_BOUND_DRIVER:
+		if (domain)
+			goto out;
+		dma_domain = find_protection_domain(devid);
+		if (!dma_domain)
+			dma_domain = iommu->default_dom;
+		attach_device(iommu, &dma_domain->domain, devid);
+		printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
+		       "device %s\n", dma_domain->domain.id, dev_name(dev));
+		break;
+	case BUS_NOTIFY_UNBIND_DRIVER:
+		if (!domain)
+			goto out;
+		detach_device(domain, devid);
+		break;
+	default:
+		goto out;
+	}
+
+	iommu_queue_inv_dev_entry(iommu, devid);
+	iommu_completion_wait(iommu);
+
+out:
+	return 0;
+}
+
+struct notifier_block device_nb = {
+	.notifier_call = device_change_notifier,
+};
 
 /*****************************************************************************
  *
@@ -1510,6 +1566,8 @@ int __init amd_iommu_init_dma_ops(void)
 	/* Make the driver finally visible to the drivers */
 	dma_ops = &amd_iommu_dma_ops;
 
+	bus_register_notifier(&pci_bus_type, &device_nb);
+
 	return 0;
 
 free_domains:
-- 
cgit v1.2.3


From 6d98cd8043c13438e4ca8a9464893f0224198a30 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Dec 2008 12:05:55 +0100
Subject: AMD IOMMU: add domain cleanup helper function

Impact: add a function to remove all devices from a domain

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 140875b3f6e..5d3f085289e 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1579,3 +1579,31 @@ free_domains:
 
 	return ret;
 }
+
+/*****************************************************************************
+ *
+ * The following functions belong to the exported interface of AMD IOMMU
+ *
+ * This interface allows access to lower level functions of the IOMMU
+ * like protection domain handling and assignement of devices to domains
+ * which is not possible with the dma_ops interface.
+ *
+ *****************************************************************************/
+
+#ifdef CONFIG_IOMMU_API
+
+static void cleanup_domain(struct protection_domain *domain)
+{
+	unsigned long flags;
+	u16 devid;
+
+	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+
+	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
+		if (amd_iommu_pd_table[devid] == domain)
+			__detach_device(domain, devid);
+
+	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+}
+
+#endif
-- 
cgit v1.2.3


From c156e347d6d3c36b6843c3b168eda61b9a02c827 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 18:13:27 +0100
Subject: AMD IOMMU: add domain init function for IOMMU API

Impact: add a generic function for allocation protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 5d3f085289e..6c0bd49cee5 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -22,6 +22,9 @@
 #include <linux/bitops.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
+#ifdef CONFIG_IOMMU_API
+#include <linux/iommu.h>
+#endif
 #include <asm/proto.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -1606,4 +1609,31 @@ static void cleanup_domain(struct protection_domain *domain)
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 }
 
+static int amd_iommu_domain_init(struct iommu_domain *dom)
+{
+	struct protection_domain *domain;
+
+	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+	if (!domain)
+		return -ENOMEM;
+
+	spin_lock_init(&domain->lock);
+	domain->mode = PAGE_MODE_3_LEVEL;
+	domain->id = domain_id_alloc();
+	if (!domain->id)
+		goto out_free;
+	domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!domain->pt_root)
+		goto out_free;
+
+	dom->priv = domain;
+
+	return 0;
+
+out_free:
+	kfree(domain);
+
+	return -ENOMEM;
+}
+
 #endif
-- 
cgit v1.2.3


From 98383fc301c6546af0f3a8a1d3cb8bf218f7e940 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 18:34:12 +0100
Subject: AMD IOMMU: add domain destroy function for IOMMU API

Impact: add a generic function for releasing protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 6c0bd49cee5..891d713d9c9 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1636,4 +1636,25 @@ out_free:
 	return -ENOMEM;
 }
 
+static void amd_iommu_domain_destroy(struct iommu_domain *dom)
+{
+	struct protection_domain *domain = dom->priv;
+
+	if (!domain)
+		return;
+
+	if (domain->dev_cnt > 0)
+		cleanup_domain(domain);
+
+	BUG_ON(domain->dev_cnt != 0);
+
+	free_pagetable(domain);
+
+	domain_id_free(domain->id);
+
+	kfree(domain);
+
+	dom->priv = NULL;
+}
+
 #endif
-- 
cgit v1.2.3


From 684f2888847b896faafed87ce4733501d2cc283c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Dec 2008 12:07:44 +0100
Subject: AMD IOMMU: add device detach function for IOMMU API

Impact: add a generic function to detach devices from protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 891d713d9c9..ef9b309e8e0 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1657,4 +1657,30 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
 	dom->priv = NULL;
 }
 
+static void amd_iommu_detach_device(struct iommu_domain *dom,
+				    struct device *dev)
+{
+	struct protection_domain *domain = dom->priv;
+	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
+	u16 devid;
+
+	if (dev->bus != &pci_bus_type)
+		return;
+
+	pdev = to_pci_dev(dev);
+
+	devid = calc_devid(pdev->bus->number, pdev->devfn);
+
+	if (devid > 0)
+		detach_device(domain, devid);
+
+	iommu = amd_iommu_rlookup_table[devid];
+	if (!iommu)
+		return;
+
+	iommu_queue_inv_dev_entry(iommu, devid);
+	iommu_completion_wait(iommu);
+}
+
 #endif
-- 
cgit v1.2.3


From 01106066a6900b518debe990ddaadf376d433bd6 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 19:34:11 +0100
Subject: AMD IOMMU: add device attach function for IOMMU API

Impact: add a generic function to attach devices to protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index ef9b309e8e0..2f7c0b3a448 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1683,4 +1683,39 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
 	iommu_completion_wait(iommu);
 }
 
+static int amd_iommu_attach_device(struct iommu_domain *dom,
+				   struct device *dev)
+{
+	struct protection_domain *domain = dom->priv;
+	struct protection_domain *old_domain;
+	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
+	u16 devid;
+
+	if (dev->bus != &pci_bus_type)
+		return -EINVAL;
+
+	pdev = to_pci_dev(dev);
+
+	devid = calc_devid(pdev->bus->number, pdev->devfn);
+
+	if (devid >= amd_iommu_last_bdf ||
+			devid != amd_iommu_alias_table[devid])
+		return -EINVAL;
+
+	iommu = amd_iommu_rlookup_table[devid];
+	if (!iommu)
+		return -EINVAL;
+
+	old_domain = domain_for_device(devid);
+	if (old_domain)
+		return -EBUSY;
+
+	attach_device(iommu, domain, devid);
+
+	iommu_completion_wait(iommu);
+
+	return 0;
+}
+
 #endif
-- 
cgit v1.2.3


From c6229ca649aa9b312d1f1de20af8d2603b14eead Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 19:48:43 +0100
Subject: AMD IOMMU: add domain map function for IOMMU API

Impact: add a generic function to map pages into protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2f7c0b3a448..1fcedbe39a9 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1718,4 +1718,33 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 	return 0;
 }
 
+static int amd_iommu_map_range(struct iommu_domain *dom,
+			       unsigned long iova, phys_addr_t paddr,
+			       size_t size, int iommu_prot)
+{
+	struct protection_domain *domain = dom->priv;
+	unsigned long i,  npages = iommu_num_pages(paddr, size, PAGE_SIZE);
+	int prot = 0;
+	int ret;
+
+	if (iommu_prot & IOMMU_READ)
+		prot |= IOMMU_PROT_IR;
+	if (iommu_prot & IOMMU_WRITE)
+		prot |= IOMMU_PROT_IW;
+
+	iova  &= PAGE_MASK;
+	paddr &= PAGE_MASK;
+
+	for (i = 0; i < npages; ++i) {
+		ret = iommu_map_page(domain, iova, paddr, prot);
+		if (ret)
+			return ret;
+
+		iova  += PAGE_SIZE;
+		paddr += PAGE_SIZE;
+	}
+
+	return 0;
+}
+
 #endif
-- 
cgit v1.2.3


From eb74ff6cc0080c7f6270fdfffba65c4eff23d3ad Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 19:59:10 +0100
Subject: AMD IOMMU: add domain unmap function for IOMMU API

Impact: add a generic function to unmap pages into protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 1fcedbe39a9..d8a0abf423b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -445,6 +445,30 @@ static int iommu_map_page(struct protection_domain *dom,
 	return 0;
 }
 
+#ifdef CONFIG_IOMMU_API
+static void iommu_unmap_page(struct protection_domain *dom,
+			     unsigned long bus_addr)
+{
+	u64 *pte;
+
+	pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return;
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return;
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
+
+	*pte = 0;
+}
+#endif
+
 /*
  * This function checks if a specific unity mapping entry is needed for
  * this specific IOMMU.
@@ -1747,4 +1771,21 @@ static int amd_iommu_map_range(struct iommu_domain *dom,
 	return 0;
 }
 
+static void amd_iommu_unmap_range(struct iommu_domain *dom,
+				  unsigned long iova, size_t size)
+{
+
+	struct protection_domain *domain = dom->priv;
+	unsigned long i,  npages = iommu_num_pages(iova, size, PAGE_SIZE);
+
+	iova  &= PAGE_MASK;
+
+	for (i = 0; i < npages; ++i) {
+		iommu_unmap_page(domain, iova);
+		iova  += PAGE_SIZE;
+	}
+
+	iommu_flush_domain(domain->id);
+}
+
 #endif
-- 
cgit v1.2.3


From 645c4c8d7289a718c9c828ec217f2b94e3b3e6ff Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 20:05:50 +0100
Subject: AMD IOMMU: add domain address lookup function for IOMMU API

Impact: add a generic function to lockup addresses in protection domains

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index d8a0abf423b..b599e80051f 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1788,4 +1788,35 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,
 	iommu_flush_domain(domain->id);
 }
 
+static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
+					  unsigned long iova)
+{
+	struct protection_domain *domain = dom->priv;
+	unsigned long offset = iova & ~PAGE_MASK;
+	phys_addr_t paddr;
+	u64 *pte;
+
+	pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return 0;
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return 0;
+
+	pte = IOMMU_PTE_PAGE(*pte);
+	pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
+
+	if (!IOMMU_PTE_PRESENT(*pte))
+		return 0;
+
+	paddr  = *pte & IOMMU_PAGE_MASK;
+	paddr |= offset;
+
+	return paddr;
+}
+
 #endif
-- 
cgit v1.2.3


From 26961efe0dab9ca73f8fc3b6137b814252e04972 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 17:00:17 +0100
Subject: AMD IOMMU: register functions for the IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index b599e80051f..d9b651c0118 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -41,6 +41,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 static LIST_HEAD(iommu_pd_list);
 static DEFINE_SPINLOCK(iommu_pd_list_lock);
 
+#ifdef CONFIG_IOMMU_API
+static struct iommu_ops amd_iommu_ops;
+#endif
+
 /*
  * general struct to manage commands send to an IOMMU
  */
@@ -1593,6 +1597,10 @@ int __init amd_iommu_init_dma_ops(void)
 	/* Make the driver finally visible to the drivers */
 	dma_ops = &amd_iommu_dma_ops;
 
+#ifdef CONFIG_IOMMU_API
+	register_iommu(&amd_iommu_ops);
+#endif
+
 	bus_register_notifier(&pci_bus_type, &device_nb);
 
 	return 0;
@@ -1819,4 +1827,14 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
 	return paddr;
 }
 
+static struct iommu_ops amd_iommu_ops = {
+	.domain_init = amd_iommu_domain_init,
+	.domain_destroy = amd_iommu_domain_destroy,
+	.attach_dev = amd_iommu_attach_device,
+	.detach_dev = amd_iommu_detach_device,
+	.map = amd_iommu_map_range,
+	.unmap = amd_iommu_unmap_range,
+	.iova_to_phys = amd_iommu_iova_to_phys,
+};
+
 #endif
-- 
cgit v1.2.3


From e2dc14a2a6c9a83baaafc51f06b7e73cec2167be Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 18:48:59 +0100
Subject: AMD IOMMU: add a domain flag for default domains

Impact: adds a new protection domain flag

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 2 ++
 arch/x86/kernel/amd_iommu.c            | 1 +
 2 files changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 1c769f4e6cd..6adc7020ef9 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -192,6 +192,8 @@
 
 /* Protection domain flags */
 #define PD_DMA_OPS_MASK		(1UL << 0) /* domain used for dma_ops */
+#define PD_DEFAULT_MASK		(1UL << 1) /* domain is a default dma_ops
+					      domain for an IOMMU */
 
 /*
  * This structure contains generic data for  IOMMU protection domains
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index d9b651c0118..f2956546423 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1574,6 +1574,7 @@ int __init amd_iommu_init_dma_ops(void)
 		iommu->default_dom = dma_ops_domain_alloc(iommu, order);
 		if (iommu->default_dom == NULL)
 			return -ENOMEM;
+		iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
 		ret = iommu_init_unity_mappings(iommu);
 		if (ret)
 			goto free_domains;
-- 
cgit v1.2.3


From 1ac4cbbc5eb56de96d264d10f464ba5222815b1b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 19:33:26 +0100
Subject: AMD IOMMU: allocate a new protection for hotplugged devices

Impact: also hotplug devices benefit from device isolation

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f2956546423..f2260609ead 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -923,6 +923,8 @@ static int device_change_notifier(struct notifier_block *nb,
 	struct protection_domain *domain;
 	struct dma_ops_domain *dma_domain;
 	struct amd_iommu *iommu;
+	int order = amd_iommu_aperture_order;
+	unsigned long flags;
 
 	if (devid > amd_iommu_last_bdf)
 		goto out;
@@ -954,6 +956,21 @@ static int device_change_notifier(struct notifier_block *nb,
 		if (!domain)
 			goto out;
 		detach_device(domain, devid);
+		break;
+	case BUS_NOTIFY_ADD_DEVICE:
+		/* allocate a protection domain if a device is added */
+		dma_domain = find_protection_domain(devid);
+		if (dma_domain)
+			goto out;
+		dma_domain = dma_ops_domain_alloc(iommu, order);
+		if (!dma_domain)
+			goto out;
+		dma_domain->target_dev = devid;
+
+		spin_lock_irqsave(&iommu_pd_list_lock, flags);
+		list_add_tail(&dma_domain->list, &iommu_pd_list);
+		spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
+
 		break;
 	default:
 		goto out;
-- 
cgit v1.2.3


From ab896722867602eb0e836717e8b857ad513800d8 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 19:43:07 +0100
Subject: AMD IOMMU: use dev_name instead of self-build print_devid

Impact: use generic dev_name instead of own function

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 12 ------------
 arch/x86/kernel/amd_iommu.c            |  3 +--
 2 files changed, 1 insertion(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 6adc7020ef9..ee8cfa00017 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -389,18 +389,6 @@ extern int amd_iommu_isolate;
  */
 extern bool amd_iommu_unmap_flush;
 
-/* takes a PCI device id and prints it out in a readable form */
-static inline void print_devid(u16 devid, int nl)
-{
-	int bus = devid >> 8;
-	int dev = devid >> 3 & 0x1f;
-	int fn  = devid & 0x07;
-
-	printk("%02x:%02x.%x", bus, dev, fn);
-	if (nl)
-		printk("\n");
-}
-
 /* takes bus and device/function and returns the device id
  * FIXME: should that be in generic PCI code? */
 static inline u16 calc_devid(u8 bus, u8 devfn)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f2260609ead..a53cf48d3be 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1074,8 +1074,7 @@ static int get_device_resources(struct device *dev,
 		*domain = &dma_dom->domain;
 		attach_device(*iommu, *domain, *bdf);
 		printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-				"device ", (*domain)->id);
-		print_devid(_bdf, 1);
+				"device %s\n", (*domain)->id, dev_name(dev));
 	}
 
 	if (domain_for_device(_bdf) == NULL)
-- 
cgit v1.2.3


From 0cfd7aa90be83a4d278810d231f9ef03f189b4f0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 19:58:00 +0100
Subject: AMD IOMMU: convert iommu->need_sync to bool

Impact: use bool instead of int for iommu->need_sync

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 2 +-
 arch/x86/kernel/amd_iommu.c            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ee8cfa00017..c4b144ece1f 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -302,7 +302,7 @@ struct amd_iommu {
 	bool int_enabled;
 
 	/* if one, we need to send a completion wait command */
-	int need_sync;
+	bool need_sync;
 
 	/* default dma_ops domain for that IOMMU */
 	struct dma_ops_domain *default_dom;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a53cf48d3be..e410e97e5b0 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -198,7 +198,7 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 	spin_lock_irqsave(&iommu->lock, flags);
 	ret = __iommu_queue_command(iommu, cmd);
 	if (!ret)
-		iommu->need_sync = 1;
+		iommu->need_sync = true;
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	return ret;
@@ -263,7 +263,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 
 	ret = __iommu_completion_wait(iommu);
 
-	iommu->need_sync = 0;
+	iommu->need_sync = false;
 
 	if (ret)
 		goto out;
-- 
cgit v1.2.3


From c226f853091577e665ebc02c064af4834d8d4f28 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 13:53:54 +0100
Subject: AMD IOMMU: convert amd_iommu_isolate to bool

Impact: cleanup

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 2 +-
 arch/x86/kernel/amd_iommu_init.c       | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index c4b144ece1f..7abf9cf0c1f 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -381,7 +381,7 @@ extern struct protection_domain **amd_iommu_pd_table;
 extern unsigned long *amd_iommu_pd_alloc_bitmap;
 
 /* will be 1 if device isolation is enabled */
-extern int amd_iommu_isolate;
+extern bool amd_iommu_isolate;
 
 /*
  * If true, the addresses will be flushed on unmap time, not when
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c625800c55c..47e163b4431 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -122,7 +122,8 @@ u16 amd_iommu_last_bdf;			/* largest PCI device id we have
 LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
 					   we find in ACPI */
 unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
-int amd_iommu_isolate = 1;		/* if 1, device isolation is enabled */
+bool amd_iommu_isolate = true;		/* if true, device isolation is
+					   enabled */
 bool amd_iommu_unmap_flush;		/* if true, flush on every unmap */
 
 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
@@ -1218,9 +1219,9 @@ static int __init parse_amd_iommu_options(char *str)
 {
 	for (; *str; ++str) {
 		if (strncmp(str, "isolate", 7) == 0)
-			amd_iommu_isolate = 1;
+			amd_iommu_isolate = true;
 		if (strncmp(str, "share", 5) == 0)
-			amd_iommu_isolate = 0;
+			amd_iommu_isolate = false;
 		if (strncmp(str, "fullflush", 9) == 0)
 			amd_iommu_unmap_flush = true;
 	}
-- 
cgit v1.2.3


From edcb34da259c503a2ffd37e51a658672ba3bc7a2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 20:01:45 +0100
Subject: AMD IOMMU: use calc_devid in prealloc_protection_domains

Impact: cleanup

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index e410e97e5b0..3011ea7a3f8 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1543,7 +1543,7 @@ void prealloc_protection_domains(void)
 	u16 devid;
 
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
-		devid = (dev->bus->number << 8) | dev->devfn;
+		devid = calc_devid(dev->bus->number, dev->devfn);
 		if (devid > amd_iommu_last_bdf)
 			continue;
 		devid = amd_iommu_alias_table[devid];
-- 
cgit v1.2.3


From a4e267c88b4acfc87ff2ab0cc8e9509878e9aaba Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 10 Dec 2008 20:04:18 +0100
Subject: AMD IOMMU: use dev_name in iommu_enable function

Impact: cleanup

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu_init.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 47e163b4431..be81f6125c5 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -246,12 +246,8 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 /* Function to enable the hardware */
 void __init iommu_enable(struct amd_iommu *iommu)
 {
-	printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
-	       "at %02x:%02x.%x cap 0x%hx\n",
-	       iommu->dev->bus->number,
-	       PCI_SLOT(iommu->dev->devfn),
-	       PCI_FUNC(iommu->dev->devfn),
-	       iommu->cap_ptr);
+	printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
+	       dev_name(&iommu->dev->dev), iommu->cap_ptr);
 
 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
-- 
cgit v1.2.3


From 2e117604a4e8f3f9cee4aec3373b0382159e152a Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 11 Dec 2008 19:00:12 +0100
Subject: AMD IOMMU: add Kconfig entry for statistic collection code

Impact: adds new Kconfig entry

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/Kconfig | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 249d1e0824b..f9998d276eb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -586,6 +586,16 @@ config AMD_IOMMU
 	  your BIOS for an option to enable it or if you have an IVRS ACPI
 	  table.
 
+config AMD_IOMMU_STATS
+	bool "Export AMD IOMMU statistics to debugfs"
+	depends on AMD_IOMMU
+	select DEBUG_FS
+	help
+	  This option enables code in the AMD IOMMU driver to collect various
+	  statistics about whats happening in the driver and exports that
+	  information to userspace via debugfs.
+	  If unsure, say N.
+
 # need this always selected by IOMMU for the VIA workaround
 config SWIOTLB
 	def_bool y if X86_64
-- 
cgit v1.2.3


From a9dddbe0497ab0df7ee729e8d4cb0ee2dec3e4ba Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 12:33:06 +0100
Subject: AMD IOMMU: add necessary header defines for stats counting

Impact: add defines to make iommu stats collection configurable

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 7abf9cf0c1f..1379c5fe86d 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -396,4 +396,30 @@ static inline u16 calc_devid(u8 bus, u8 devfn)
 	return (((u16)bus) << 8) | devfn;
 }
 
+#ifdef CONFIG_AMD_IOMMU_STATS
+
+struct __iommu_counter {
+	char *name;
+	struct dentry *dent;
+	u64 value;
+};
+
+#define DECLARE_STATS_COUNTER(nm) \
+	static struct __iommu_counter nm = {	\
+		.name = #nm,			\
+	}
+
+#define INC_STATS_COUNTER(name)		name.value += 1
+#define ADD_STATS_COUNTER(name, x)	name.value += (x)
+#define SUB_STATS_COUNTER(name, x)	name.value -= (x)
+
+#else /* CONFIG_AMD_IOMMU_STATS */
+
+#define DECLARE_STATS_COUNTER(name)
+#define INC_STATS_COUNTER(name)
+#define ADD_STATS_COUNTER(name, x)
+#define SUB_STATS_COUNTER(name, x)
+
+#endif /* CONFIG_AMD_IOMMU_STATS */
+
 #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
-- 
cgit v1.2.3


From 7f26508bbb76ce86aad1130ef6b7f1a4bb7de0c2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 13:50:21 +0100
Subject: AMD IOMMU: add init code for statistic collection

Impact: create a new debugfs directory

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h |  2 ++
 arch/x86/kernel/amd_iommu.c            | 37 ++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 1379c5fe86d..95c8cd9d22b 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -420,6 +420,8 @@ struct __iommu_counter {
 #define ADD_STATS_COUNTER(name, x)
 #define SUB_STATS_COUNTER(name, x)
 
+static inline void amd_iommu_stats_init(void) { }
+
 #endif /* CONFIG_AMD_IOMMU_STATS */
 
 #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 3011ea7a3f8..f98f70626bc 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -20,6 +20,7 @@
 #include <linux/pci.h>
 #include <linux/gfp.h>
 #include <linux/bitops.h>
+#include <linux/debugfs.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
 #ifdef CONFIG_IOMMU_API
@@ -57,6 +58,40 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 static struct dma_ops_domain *find_protection_domain(u16 devid);
 
 
+#ifdef CONFIG_AMD_IOMMU_STATS
+
+/*
+ * Initialization code for statistics collection
+ */
+
+static struct dentry *stats_dir;
+static struct dentry *de_isolate;
+static struct dentry *de_fflush;
+
+static void amd_iommu_stats_add(struct __iommu_counter *cnt)
+{
+	if (stats_dir == NULL)
+		return;
+
+	cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
+				       &cnt->value);
+}
+
+static void amd_iommu_stats_init(void)
+{
+	stats_dir = debugfs_create_dir("amd-iommu", NULL);
+	if (stats_dir == NULL)
+		return;
+
+	de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
+					 (u32 *)&amd_iommu_isolate);
+
+	de_fflush  = debugfs_create_bool("fullflush", 0444, stats_dir,
+					 (u32 *)&amd_iommu_unmap_flush);
+}
+
+#endif
+
 /* returns !0 if the IOMMU is caching non-present entries in its TLB */
 static int iommu_has_npcache(struct amd_iommu *iommu)
 {
@@ -1620,6 +1655,8 @@ int __init amd_iommu_init_dma_ops(void)
 
 	bus_register_notifier(&pci_bus_type, &device_nb);
 
+	amd_iommu_stats_init();
+
 	return 0;
 
 free_domains:
-- 
cgit v1.2.3


From da49f6df726ecaaee87757e8b65a560679d32f99 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 14:59:58 +0100
Subject: AMD IOMMU: add stats counter for completion wait events

Impact: see number of completion wait events in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f98f70626bc..b21435748e0 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -64,6 +64,8 @@ static struct dma_ops_domain *find_protection_domain(u16 devid);
  * Initialization code for statistics collection
  */
 
+DECLARE_STATS_COUNTER(compl_wait);
+
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
 static struct dentry *de_fflush;
@@ -88,6 +90,8 @@ static void amd_iommu_stats_init(void)
 
 	de_fflush  = debugfs_create_bool("fullflush", 0444, stats_dir,
 					 (u32 *)&amd_iommu_unmap_flush);
+
+	amd_iommu_stats_add(&compl_wait);
 }
 
 #endif
@@ -249,6 +253,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu)
 	unsigned status = 0;
 	unsigned long i = 0;
 
+	INC_STATS_COUNTER(compl_wait);
+
 	while (!ready && (i < EXIT_LOOP_COUNT)) {
 		++i;
 		/* wait for the bit to become one */
-- 
cgit v1.2.3


From 0f2a86f200bc97ae6cefc5d3ac879094b3fcde48 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:05:16 +0100
Subject: AMD IOMMU: add stats counter for map_single requests

Impact: see number of map_single requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index b21435748e0..ef377865eb7 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -65,6 +65,7 @@ static struct dma_ops_domain *find_protection_domain(u16 devid);
  */
 
 DECLARE_STATS_COUNTER(compl_wait);
+DECLARE_STATS_COUNTER(cnt_map_single);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -92,6 +93,7 @@ static void amd_iommu_stats_init(void)
 					 (u32 *)&amd_iommu_unmap_flush);
 
 	amd_iommu_stats_add(&compl_wait);
+	amd_iommu_stats_add(&cnt_map_single);
 }
 
 #endif
@@ -1278,6 +1280,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
 	dma_addr_t addr;
 	u64 dma_mask;
 
+	INC_STATS_COUNTER(cnt_map_single);
+
 	if (!check_device(dev))
 		return bad_dma_address;
 
-- 
cgit v1.2.3


From 146a6917fc30616401a090f55cff2b855ee5b2ab Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:07:12 +0100
Subject: AMD IOMMU: add stats counter for unmap_single requests

Impact: see number of unmap_single requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index ef377865eb7..71646c81421 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -66,6 +66,7 @@ static struct dma_ops_domain *find_protection_domain(u16 devid);
 
 DECLARE_STATS_COUNTER(compl_wait);
 DECLARE_STATS_COUNTER(cnt_map_single);
+DECLARE_STATS_COUNTER(cnt_unmap_single);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -94,6 +95,7 @@ static void amd_iommu_stats_init(void)
 
 	amd_iommu_stats_add(&compl_wait);
 	amd_iommu_stats_add(&cnt_map_single);
+	amd_iommu_stats_add(&cnt_unmap_single);
 }
 
 #endif
@@ -1321,6 +1323,8 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
 	struct protection_domain *domain;
 	u16 devid;
 
+	INC_STATS_COUNTER(cnt_unmap_single);
+
 	if (!check_device(dev) ||
 	    !get_device_resources(dev, &iommu, &domain, &devid))
 		/* device not handled by any AMD IOMMU */
-- 
cgit v1.2.3


From d03f067a9d0a1cc09529427af9a15e15d32ba1de Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:09:48 +0100
Subject: AMD IOMMU: add stats counter for map_sg requests

Impact: see number of map_sg requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 71646c81421..859ca741745 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -67,6 +67,7 @@ static struct dma_ops_domain *find_protection_domain(u16 devid);
 DECLARE_STATS_COUNTER(compl_wait);
 DECLARE_STATS_COUNTER(cnt_map_single);
 DECLARE_STATS_COUNTER(cnt_unmap_single);
+DECLARE_STATS_COUNTER(cnt_map_sg);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -96,6 +97,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&compl_wait);
 	amd_iommu_stats_add(&cnt_map_single);
 	amd_iommu_stats_add(&cnt_unmap_single);
+	amd_iommu_stats_add(&cnt_map_sg);
 }
 
 #endif
@@ -1377,6 +1379,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 	int mapped_elems = 0;
 	u64 dma_mask;
 
+	INC_STATS_COUNTER(cnt_map_sg);
+
 	if (!check_device(dev))
 		return 0;
 
-- 
cgit v1.2.3


From 55877a6bcdf0843414eecc658550c6551f5b5e1d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:12:14 +0100
Subject: AMD IOMMU: add stats counter for unmap_sg requests

Impact: see number of unmap_sg requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 859ca741745..49f2c8533e1 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -68,6 +68,7 @@ DECLARE_STATS_COUNTER(compl_wait);
 DECLARE_STATS_COUNTER(cnt_map_single);
 DECLARE_STATS_COUNTER(cnt_unmap_single);
 DECLARE_STATS_COUNTER(cnt_map_sg);
+DECLARE_STATS_COUNTER(cnt_unmap_sg);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -98,6 +99,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cnt_map_single);
 	amd_iommu_stats_add(&cnt_unmap_single);
 	amd_iommu_stats_add(&cnt_map_sg);
+	amd_iommu_stats_add(&cnt_unmap_sg);
 }
 
 #endif
@@ -1443,6 +1445,8 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 	u16 devid;
 	int i;
 
+	INC_STATS_COUNTER(cnt_unmap_sg);
+
 	if (!check_device(dev) ||
 	    !get_device_resources(dev, &iommu, &domain, &devid))
 		return;
-- 
cgit v1.2.3


From c8f0fb36bffa9e21d214a2910b825567d52bfc2c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:14:21 +0100
Subject: AMD IOMMU: add stats counter for alloc_coherent requests

Impact: see number of alloc_coherent requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 49f2c8533e1..ecc89f8857b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -69,6 +69,7 @@ DECLARE_STATS_COUNTER(cnt_map_single);
 DECLARE_STATS_COUNTER(cnt_unmap_single);
 DECLARE_STATS_COUNTER(cnt_map_sg);
 DECLARE_STATS_COUNTER(cnt_unmap_sg);
+DECLARE_STATS_COUNTER(cnt_alloc_coherent);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -100,6 +101,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cnt_unmap_single);
 	amd_iommu_stats_add(&cnt_map_sg);
 	amd_iommu_stats_add(&cnt_unmap_sg);
+	amd_iommu_stats_add(&cnt_alloc_coherent);
 }
 
 #endif
@@ -1481,6 +1483,8 @@ static void *alloc_coherent(struct device *dev, size_t size,
 	phys_addr_t paddr;
 	u64 dma_mask = dev->coherent_dma_mask;
 
+	INC_STATS_COUNTER(cnt_alloc_coherent);
+
 	if (!check_device(dev))
 		return NULL;
 
-- 
cgit v1.2.3


From 5d31ee7e08b7713596b999a42e67491bdf3665b3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:16:38 +0100
Subject: AMD IOMMU: add stats counter for free_coherent requests

Impact: see number of free_coherent requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index ecc89f8857b..112412d733a 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -70,6 +70,7 @@ DECLARE_STATS_COUNTER(cnt_unmap_single);
 DECLARE_STATS_COUNTER(cnt_map_sg);
 DECLARE_STATS_COUNTER(cnt_unmap_sg);
 DECLARE_STATS_COUNTER(cnt_alloc_coherent);
+DECLARE_STATS_COUNTER(cnt_free_coherent);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -102,6 +103,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cnt_map_sg);
 	amd_iommu_stats_add(&cnt_unmap_sg);
 	amd_iommu_stats_add(&cnt_alloc_coherent);
+	amd_iommu_stats_add(&cnt_free_coherent);
 }
 
 #endif
@@ -1541,6 +1543,8 @@ static void free_coherent(struct device *dev, size_t size,
 	struct protection_domain *domain;
 	u16 devid;
 
+	INC_STATS_COUNTER(cnt_free_coherent);
+
 	if (!check_device(dev))
 		return;
 
-- 
cgit v1.2.3


From c1858976f5ef05122bb671f678beaf7e1fe1dd74 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:42:39 +0100
Subject: AMD IOMMU: add stats counter for cross-page request

Impact: see number of requests for more than one page in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 112412d733a..f5455039b60 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -71,6 +71,7 @@ DECLARE_STATS_COUNTER(cnt_map_sg);
 DECLARE_STATS_COUNTER(cnt_unmap_sg);
 DECLARE_STATS_COUNTER(cnt_alloc_coherent);
 DECLARE_STATS_COUNTER(cnt_free_coherent);
+DECLARE_STATS_COUNTER(cross_page);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -104,6 +105,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cnt_unmap_sg);
 	amd_iommu_stats_add(&cnt_alloc_coherent);
 	amd_iommu_stats_add(&cnt_free_coherent);
+	amd_iommu_stats_add(&cross_page);
 }
 
 #endif
@@ -1217,6 +1219,9 @@ static dma_addr_t __map_single(struct device *dev,
 	pages = iommu_num_pages(paddr, size, PAGE_SIZE);
 	paddr &= PAGE_MASK;
 
+	if (pages > 1)
+		INC_STATS_COUNTER(cross_page);
+
 	if (align)
 		align_mask = (1UL << get_order(size)) - 1;
 
-- 
cgit v1.2.3


From f57d98ae6979f7bcbf758023b4716f485385f903 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:46:29 +0100
Subject: AMD IOMMU: add stats counter for single iommu domain tlb flushes

Impact: see number of single iommu domain tlb flushes in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f5455039b60..e99022d3a39 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -72,6 +72,7 @@ DECLARE_STATS_COUNTER(cnt_unmap_sg);
 DECLARE_STATS_COUNTER(cnt_alloc_coherent);
 DECLARE_STATS_COUNTER(cnt_free_coherent);
 DECLARE_STATS_COUNTER(cross_page);
+DECLARE_STATS_COUNTER(domain_flush_single);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -106,6 +107,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cnt_alloc_coherent);
 	amd_iommu_stats_add(&cnt_free_coherent);
 	amd_iommu_stats_add(&cross_page);
+	amd_iommu_stats_add(&domain_flush_single);
 }
 
 #endif
@@ -413,6 +415,8 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
 {
 	u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
 
+	INC_STATS_COUNTER(domain_flush_single);
+
 	iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
 }
 
-- 
cgit v1.2.3


From 18811f55d48e5f3ee70c4744c592f940022fa592 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:48:28 +0100
Subject: AMD IOMMU: add stats counter for domain tlb flushes

Impact: see number of domain tlb flushes in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index e99022d3a39..a897c7246dc 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -73,6 +73,7 @@ DECLARE_STATS_COUNTER(cnt_alloc_coherent);
 DECLARE_STATS_COUNTER(cnt_free_coherent);
 DECLARE_STATS_COUNTER(cross_page);
 DECLARE_STATS_COUNTER(domain_flush_single);
+DECLARE_STATS_COUNTER(domain_flush_all);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -108,6 +109,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cnt_free_coherent);
 	amd_iommu_stats_add(&cross_page);
 	amd_iommu_stats_add(&domain_flush_single);
+	amd_iommu_stats_add(&domain_flush_all);
 }
 
 #endif
@@ -431,6 +433,8 @@ static void iommu_flush_domain(u16 domid)
 	struct amd_iommu *iommu;
 	struct iommu_cmd cmd;
 
+	INC_STATS_COUNTER(domain_flush_all);
+
 	__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
 				      domid, 1, 1);
 
-- 
cgit v1.2.3


From 5774f7c5fef2526bfa58eab628fbe91dce5e07b1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 15:57:30 +0100
Subject: AMD IOMMU: add statistics about allocated io memory

Impact: see amount of allocated io memory in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a897c7246dc..69f367b033a 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -74,6 +74,7 @@ DECLARE_STATS_COUNTER(cnt_free_coherent);
 DECLARE_STATS_COUNTER(cross_page);
 DECLARE_STATS_COUNTER(domain_flush_single);
 DECLARE_STATS_COUNTER(domain_flush_all);
+DECLARE_STATS_COUNTER(alloced_io_mem);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -110,6 +111,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&cross_page);
 	amd_iommu_stats_add(&domain_flush_single);
 	amd_iommu_stats_add(&domain_flush_all);
+	amd_iommu_stats_add(&alloced_io_mem);
 }
 
 #endif
@@ -1246,6 +1248,8 @@ static dma_addr_t __map_single(struct device *dev,
 	}
 	address += offset;
 
+	ADD_STATS_COUNTER(alloced_io_mem, size);
+
 	if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
 		iommu_flush_tlb(iommu, dma_dom->domain.id);
 		dma_dom->need_flush = false;
@@ -1282,6 +1286,8 @@ static void __unmap_single(struct amd_iommu *iommu,
 		start += PAGE_SIZE;
 	}
 
+	SUB_STATS_COUNTER(alloced_io_mem, size);
+
 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
 
 	if (amd_iommu_unmap_flush || dma_dom->need_flush) {
-- 
cgit v1.2.3


From 8ecaf8f19f0f0627d6ac6d69ed9472e7d307f35b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 12 Dec 2008 16:13:04 +0100
Subject: AMD IOMMU: add statistics about total number of map requests

Impact: see total number of map requests in debugfs

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 69f367b033a..0c504b207bf 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -75,6 +75,7 @@ DECLARE_STATS_COUNTER(cross_page);
 DECLARE_STATS_COUNTER(domain_flush_single);
 DECLARE_STATS_COUNTER(domain_flush_all);
 DECLARE_STATS_COUNTER(alloced_io_mem);
+DECLARE_STATS_COUNTER(total_map_requests);
 
 static struct dentry *stats_dir;
 static struct dentry *de_isolate;
@@ -112,6 +113,7 @@ static void amd_iommu_stats_init(void)
 	amd_iommu_stats_add(&domain_flush_single);
 	amd_iommu_stats_add(&domain_flush_all);
 	amd_iommu_stats_add(&alloced_io_mem);
+	amd_iommu_stats_add(&total_map_requests);
 }
 
 #endif
@@ -1229,6 +1231,8 @@ static dma_addr_t __map_single(struct device *dev,
 	pages = iommu_num_pages(paddr, size, PAGE_SIZE);
 	paddr &= PAGE_MASK;
 
+	INC_STATS_COUNTER(total_map_requests);
+
 	if (pages > 1)
 		INC_STATS_COUNTER(cross_page);
 
-- 
cgit v1.2.3


From 0e93dd883537e628b809a2120854cd591c8935f1 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 21:45:22 +0530
Subject: AMD IOMMU: prealloc_protection_domains should be static

Impact: cleanup, reduce kernel size a bit, avoid sparse warning

Fixes sparse warning:
arch/x86/kernel/amd_iommu.c:1299:6: warning: symbol 'prealloc_protection_domains' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0c504b207bf..881c68ffdf2 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1622,7 +1622,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
  * we don't need to preallocate the protection domains anymore.
  * For now we have to.
  */
-void prealloc_protection_domains(void)
+static void prealloc_protection_domains(void)
 {
 	struct pci_dev *dev = NULL;
 	struct dma_ops_domain *dma_dom;
-- 
cgit v1.2.3


From 065a6d68c71af2a3bdd080fa5aa353b76eede8f5 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Sat, 3 Jan 2009 14:16:35 +0100
Subject: AMD IOMMU: remove now unnecessary #ifdefs

The #ifdef's are no longer necessary when the iommu-api and the amd
iommu updates are merged together.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 881c68ffdf2..5113c080f0c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -426,7 +426,6 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
 	iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
 }
 
-#ifdef CONFIG_IOMMU_API
 /*
  * This function is used to flush the IO/TLB for a given protection domain
  * on every IOMMU in the system
@@ -450,7 +449,6 @@ static void iommu_flush_domain(u16 domid)
 		spin_unlock_irqrestore(&iommu->lock, flags);
 	}
 }
-#endif
 
 /****************************************************************************
  *
@@ -516,7 +514,6 @@ static int iommu_map_page(struct protection_domain *dom,
 	return 0;
 }
 
-#ifdef CONFIG_IOMMU_API
 static void iommu_unmap_page(struct protection_domain *dom,
 			     unsigned long bus_addr)
 {
@@ -538,7 +535,6 @@ static void iommu_unmap_page(struct protection_domain *dom,
 
 	*pte = 0;
 }
-#endif
 
 /*
  * This function checks if a specific unity mapping entry is needed for
@@ -723,7 +719,6 @@ static u16 domain_id_alloc(void)
 	return id;
 }
 
-#ifdef CONFIG_IOMMU_API
 static void domain_id_free(int id)
 {
 	unsigned long flags;
@@ -733,7 +728,6 @@ static void domain_id_free(int id)
 		__clear_bit(id, amd_iommu_pd_alloc_bitmap);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 }
-#endif
 
 /*
  * Used to reserve address ranges in the aperture (e.g. for exclusion
@@ -1702,9 +1696,7 @@ int __init amd_iommu_init_dma_ops(void)
 	/* Make the driver finally visible to the drivers */
 	dma_ops = &amd_iommu_dma_ops;
 
-#ifdef CONFIG_IOMMU_API
 	register_iommu(&amd_iommu_ops);
-#endif
 
 	bus_register_notifier(&pci_bus_type, &device_nb);
 
@@ -1732,8 +1724,6 @@ free_domains:
  *
  *****************************************************************************/
 
-#ifdef CONFIG_IOMMU_API
-
 static void cleanup_domain(struct protection_domain *domain)
 {
 	unsigned long flags;
@@ -1944,4 +1934,3 @@ static struct iommu_ops amd_iommu_ops = {
 	.iova_to_phys = amd_iommu_iova_to_phys,
 };
 
-#endif
-- 
cgit v1.2.3


From 730cf27246225d56ca1603b2f3c4fdbf882d4e51 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 31 Dec 2008 18:08:45 -0800
Subject: x86: enable cpus display of kernel_max and offlined cpus

Impact: enables /sys/devices/system/cpu/{kernel_max,offline} user interface

By setting total_cpus, the drivers/base/cpu.c will display the
values of kernel_max (NR_CPUS-1) and the offlined cpu map.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/smpboot.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9e177a4077e..f49c26bd7e2 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1298,6 +1298,8 @@ __init void prefill_possible_map(void)
 	else
 		possible = setup_possible_cpus;
 
+	total_cpus = max_t(int, possible, num_processors + disabled_cpus);
+
 	if (possible > CONFIG_NR_CPUS) {
 		printk(KERN_WARNING
 			"%d Processors exceeds NR_CPUS limit of %d\n",
-- 
cgit v1.2.3


From 9628937d5b37169151c5f6bbd40919c6ac958a46 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 31 Dec 2008 18:08:46 -0800
Subject: x86: cleanup some remaining usages of NR_CPUS where s/b nr_cpu_ids

Impact: Reduce future system panics due to cpumask operations using NR_CPUS

Insure that code does not look at bits >= nr_cpu_ids as when cpumasks are
allocated based on nr_cpu_ids, these extra bits will not be defined.

Also some other minor updates:

   * change in to use cpu accessor function set_cpu_present() instead of
     directly accessing cpu_present_map w/cpu_clear() [arch/x86/kernel/reboot.c]

   * use cpumask_of() instead of &cpumask_of_cpu() [arch/x86/kernel/reboot.c]

   * optimize some cpu_mask_to_apicid_and functions.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/es7000/apic.h  | 32 +++-------------------------
 arch/x86/include/asm/lguest.h       |  2 +-
 arch/x86/include/asm/numaq/apic.h   |  4 ++--
 arch/x86/include/asm/summit/apic.h  | 42 +++++++------------------------------
 arch/x86/kernel/acpi/boot.c         |  2 +-
 arch/x86/kernel/apic.c              |  4 ++--
 arch/x86/kernel/cpu/common.c        |  2 +-
 arch/x86/kernel/cpuid.c             |  2 +-
 arch/x86/kernel/msr.c               |  2 +-
 arch/x86/kernel/reboot.c            |  4 ++--
 arch/x86/kernel/smpboot.c           |  2 +-
 arch/x86/mach-voyager/voyager_smp.c |  7 +++----
 12 files changed, 26 insertions(+), 79 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index 51ac1230294..bc53d5ef138 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -157,7 +157,7 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
 
 	num_bits_set = cpumask_weight(cpumask);
 	/* Return id to all */
-	if (num_bits_set == NR_CPUS)
+	if (num_bits_set == nr_cpu_ids)
 		return 0xFF;
 	/*
 	 * The cpus in the mask must all be on the apic cluster.  If are not
@@ -190,7 +190,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 
 	num_bits_set = cpus_weight(*cpumask);
 	/* Return id to all */
-	if (num_bits_set == NR_CPUS)
+	if (num_bits_set == nr_cpu_ids)
 		return cpu_to_logical_apicid(0);
 	/*
 	 * The cpus in the mask must all be on the apic cluster.  If are not
@@ -218,9 +218,6 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
 						  const struct cpumask *andmask)
 {
-	int num_bits_set;
-	int cpus_found = 0;
-	int cpu;
 	int apicid = cpu_to_logical_apicid(0);
 	cpumask_var_t cpumask;
 
@@ -229,31 +226,8 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
 
 	cpumask_and(cpumask, inmask, andmask);
 	cpumask_and(cpumask, cpumask, cpu_online_mask);
+	apicid = cpu_mask_to_apicid(cpumask);
 
-	num_bits_set = cpumask_weight(cpumask);
-	/* Return id to all */
-	if (num_bits_set == NR_CPUS)
-		goto exit;
-	/*
-	 * The cpus in the mask must all be on the apic cluster.  If are not
-	 * on the same apicid cluster return default value of TARGET_CPUS.
-	 */
-	cpu = cpumask_first(cpumask);
-	apicid = cpu_to_logical_apicid(cpu);
-	while (cpus_found < num_bits_set) {
-		if (cpumask_test_cpu(cpu, cpumask)) {
-			int new_apicid = cpu_to_logical_apicid(cpu);
-			if (apicid_cluster(apicid) !=
-					apicid_cluster(new_apicid)){
-				printk ("%s: Not a valid mask!\n", __func__);
-				return cpu_to_logical_apicid(0);
-			}
-			apicid = new_apicid;
-			cpus_found++;
-		}
-		cpu++;
-	}
-exit:
 	free_cpumask_var(cpumask);
 	return apicid;
 }
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
index d28a507cef3..1caf57628b9 100644
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -15,7 +15,7 @@
 #define SHARED_SWITCHER_PAGES \
 	DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE)
 /* Pages for switcher itself, then two pages per cpu */
-#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS)
+#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids)
 
 /* We map at -4M for ease of mapping into the guest (one PTE page). */
 #define SWITCHER_ADDR 0xFFC00000
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index c80f00d2996..bf37bc49bd8 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -63,8 +63,8 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
 extern u8 cpu_2_logical_apicid[];
 static inline int cpu_to_logical_apicid(int cpu)
 {
-       if (cpu >= NR_CPUS)
-	       return BAD_APICID;
+	if (cpu >= nr_cpu_ids)
+		return BAD_APICID;
 	return (int)cpu_2_logical_apicid[cpu];
 }
 
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 99327d1be49..4bb5fb34f03 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -52,7 +52,7 @@ static inline void init_apic_ldr(void)
 	int i;
 
 	/* Create logical APIC IDs by counting CPUs already in cluster. */
-	for (count = 0, i = NR_CPUS; --i >= 0; ) {
+	for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
 		lid = cpu_2_logical_apicid[i];
 		if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster)
 			++count;
@@ -97,8 +97,8 @@ static inline int apicid_to_node(int logical_apicid)
 static inline int cpu_to_logical_apicid(int cpu)
 {
 #ifdef CONFIG_SMP
-       if (cpu >= NR_CPUS)
-	       return BAD_APICID;
+	if (cpu >= nr_cpu_ids)
+		return BAD_APICID;
 	return (int)cpu_2_logical_apicid[cpu];
 #else
 	return logical_smp_processor_id();
@@ -107,7 +107,7 @@ static inline int cpu_to_logical_apicid(int cpu)
 
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
-	if (mps_cpu < NR_CPUS)
+	if (mps_cpu < nr_cpu_ids)
 		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
 	else
 		return BAD_APICID;
@@ -146,7 +146,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 
 	num_bits_set = cpus_weight(*cpumask);
 	/* Return id to all */
-	if (num_bits_set == NR_CPUS)
+	if (num_bits_set >= nr_cpu_ids)
 		return (int) 0xFF;
 	/*
 	 * The cpus in the mask must all be on the apic cluster.  If are not
@@ -173,42 +173,16 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
 static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
 						  const struct cpumask *andmask)
 {
-	int num_bits_set;
-	int cpus_found = 0;
-	int cpu;
-	int apicid = 0xFF;
+	int apicid = cpu_to_logical_apicid(0);
 	cpumask_var_t cpumask;
 
 	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
-		return (int) 0xFF;
+		return apicid;
 
 	cpumask_and(cpumask, inmask, andmask);
 	cpumask_and(cpumask, cpumask, cpu_online_mask);
+	apicid = cpu_mask_to_apicid(cpumask);
 
-	num_bits_set = cpumask_weight(cpumask);
-	/* Return id to all */
-	if (num_bits_set == nr_cpu_ids)
-		goto exit;
-	/*
-	 * The cpus in the mask must all be on the apic cluster.  If are not
-	 * on the same apicid cluster return default value of TARGET_CPUS.
-	 */
-	cpu = cpumask_first(cpumask);
-	apicid = cpu_to_logical_apicid(cpu);
-	while (cpus_found < num_bits_set) {
-		if (cpumask_test_cpu(cpu, cpumask)) {
-			int new_apicid = cpu_to_logical_apicid(cpu);
-			if (apicid_cluster(apicid) !=
-					apicid_cluster(new_apicid)){
-				printk ("%s: Not a valid mask!\n", __func__);
-				return 0xFF;
-			}
-			apicid = apicid | new_apicid;
-			cpus_found++;
-		}
-		cpu++;
-	}
-exit:
 	free_cpumask_var(cpumask);
 	return apicid;
 }
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 65d0b72777e..fd24c55e4ae 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -598,7 +598,7 @@ EXPORT_SYMBOL(acpi_map_lsapic);
 int acpi_unmap_lsapic(int cpu)
 {
 	per_cpu(x86_cpu_to_apicid, cpu) = -1;
-	cpu_clear(cpu, cpu_present_map);
+	set_cpu_present(cpu, false);
 	num_processors--;
 
 	return (0);
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 6b7f824db16..99589245fd8 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -140,7 +140,7 @@ static int lapic_next_event(unsigned long delta,
 			    struct clock_event_device *evt);
 static void lapic_timer_setup(enum clock_event_mode mode,
 			      struct clock_event_device *evt);
-static void lapic_timer_broadcast(const cpumask_t *mask);
+static void lapic_timer_broadcast(const struct cpumask *mask);
 static void apic_pm_activate(void);
 
 /*
@@ -453,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
 /*
  * Local APIC timer broadcast function
  */
-static void lapic_timer_broadcast(const cpumask_t *mask)
+static void lapic_timer_broadcast(const struct cpumask *mask)
 {
 #ifdef CONFIG_SMP
 	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 42e0853030c..3f95a40f718 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -355,7 +355,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
 	} else if (smp_num_siblings > 1) {
 
-		if (smp_num_siblings > NR_CPUS) {
+		if (smp_num_siblings > nr_cpu_ids) {
 			printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
 					smp_num_siblings);
 			smp_num_siblings = 1;
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 72cefd1e649..62a3c23bd70 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -121,7 +121,7 @@ static int cpuid_open(struct inode *inode, struct file *file)
 	lock_kernel();
 
 	cpu = iminor(file->f_path.dentry->d_inode);
-	if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+	if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
 		ret = -ENXIO;	/* No such CPU */
 		goto out;
 	}
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 82a7c7ed6d4..726266695b2 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -136,7 +136,7 @@ static int msr_open(struct inode *inode, struct file *file)
 	lock_kernel();
 	cpu = iminor(file->f_path.dentry->d_inode);
 
-	if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+	if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
 		ret = -ENXIO;	/* No such CPU */
 		goto out;
 	}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index ba7b9a0e606..de4a9d643be 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -449,7 +449,7 @@ void native_machine_shutdown(void)
 
 #ifdef CONFIG_X86_32
 	/* See if there has been given a command line override */
-	if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
+	if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&
 		cpu_online(reboot_cpu))
 		reboot_cpu_id = reboot_cpu;
 #endif
@@ -459,7 +459,7 @@ void native_machine_shutdown(void)
 		reboot_cpu_id = smp_processor_id();
 
 	/* Make certain I only run on the appropriate processor */
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id));
+	set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
 
 	/* O.K Now that I'm on the appropriate processor,
 	 * stop all of the others.
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f49c26bd7e2..6bd4d9b7387 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1154,7 +1154,7 @@ static void __init smp_cpu_index_default(void)
 	for_each_possible_cpu(i) {
 		c = &cpu_data(i);
 		/* mark all to hotplug */
-		c->cpu_index = NR_CPUS;
+		c->cpu_index = nr_cpu_ids;
 	}
 }
 
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index a5bc05492b1..9840b7ec749 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -357,9 +357,8 @@ void __init find_smp_config(void)
 	printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id);
 
 	/* initialize the CPU structures (moved from smp_boot_cpus) */
-	for (i = 0; i < NR_CPUS; i++) {
+	for (i = 0; i < nr_cpu_ids; i++)
 		cpu_irq_affinity[i] = ~0;
-	}
 	cpu_online_map = cpumask_of_cpu(boot_cpu_id);
 
 	/* The boot CPU must be extended */
@@ -1227,7 +1226,7 @@ int setup_profiling_timer(unsigned int multiplier)
 	 * new values until the next timer interrupt in which they do process
 	 * accounting.
 	 */
-	for (i = 0; i < NR_CPUS; ++i)
+	for (i = 0; i < nr_cpu_ids; ++i)
 		per_cpu(prof_multiplier, i) = multiplier;
 
 	return 0;
@@ -1257,7 +1256,7 @@ void __init voyager_smp_intr_init(void)
 	int i;
 
 	/* initialize the per cpu irq mask to all disabled */
-	for (i = 0; i < NR_CPUS; i++)
+	for (i = 0; i < nr_cpu_ids; i++)
 		vic_irq_mask[i] = 0xFFFF;
 
 	VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt);
-- 
cgit v1.2.3


From ee943a82b697456f9d2ac46f1e6d230beedb4b6c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 31 Dec 2008 18:08:47 -0800
Subject: x86: use cpumask_var_t in acpi/boot.c

Impact: reduce stack size, use new API.

Replace cpumask_t with cpumask_var_t.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/boot.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fd24c55e4ae..29dc0c89d4a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -538,9 +538,10 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
 	struct acpi_madt_local_apic *lapic;
-	cpumask_t tmp_map, new_map;
+	cpumask_var_t tmp_map, new_map;
 	u8 physid;
 	int cpu;
+	int retval = -ENOMEM;
 
 	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
 		return -EINVAL;
@@ -569,23 +570,37 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
 	buffer.length = ACPI_ALLOCATE_BUFFER;
 	buffer.pointer = NULL;
 
-	tmp_map = cpu_present_map;
+	if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL))
+		goto out;
+
+	if (!alloc_cpumask_var(&new_map, GFP_KERNEL))
+		goto free_tmp_map;
+
+	cpumask_copy(tmp_map, cpu_present_mask);
 	acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED);
 
 	/*
 	 * If mp_register_lapic successfully generates a new logical cpu
 	 * number, then the following will get us exactly what was mapped
 	 */
-	cpus_andnot(new_map, cpu_present_map, tmp_map);
-	if (cpus_empty(new_map)) {
+	cpumask_andnot(new_map, cpu_present_mask, tmp_map);
+	if (cpumask_empty(new_map)) {
 		printk ("Unable to map lapic to logical cpu number\n");
-		return -EINVAL;
+		retval = -EINVAL;
+		goto free_new_map;
 	}
 
-	cpu = first_cpu(new_map);
+	cpu = cpumask_first(new_map);
 
 	*pcpu = cpu;
-	return 0;
+	retval = 0;
+
+free_new_map:
+	free_cpumask_var(new_map);
+free_tmp_map:
+	free_cpumask_var(tmp_map);
+out:
+	return retval;
 }
 
 /* wrapper to silence section mismatch warning */
-- 
cgit v1.2.3


From 2fdf66b491ac706657946442789ec644cc317e1a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 31 Dec 2008 18:08:47 -0800
Subject: cpumask: convert shared_cpu_map in acpi_processor* structs to
 cpumask_var_t

Impact: Reduce memory usage, use new API.

This is part of an effort to reduce structure sizes for machines
configured with large NR_CPUS.  cpumask_t gets replaced by
cpumask_var_t, which is either struct cpumask[1] (small NR_CPUS) or
struct cpumask * (large NR_CPUS).

(Changes to powernow-k* by <travis>.)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 27 ++++++++++++++++++++++++---
 arch/x86/kernel/cpu/cpufreq/powernow-k7.c  |  9 +++++++++
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c  | 24 +++++++++++++++---------
 3 files changed, 48 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 88ea02dcb62..d0a001093b2 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -517,6 +517,17 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
 	}
 }
 
+static void free_acpi_perf_data(void)
+{
+	unsigned int i;
+
+	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
+	for_each_possible_cpu(i)
+		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
+				 ->shared_cpu_map);
+	free_percpu(acpi_perf_data);
+}
+
 /*
  * acpi_cpufreq_early_init - initialize ACPI P-States library
  *
@@ -527,6 +538,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
  */
 static int __init acpi_cpufreq_early_init(void)
 {
+	unsigned int i;
 	dprintk("acpi_cpufreq_early_init\n");
 
 	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
@@ -534,6 +546,15 @@ static int __init acpi_cpufreq_early_init(void)
 		dprintk("Memory allocation error for acpi_perf_data.\n");
 		return -ENOMEM;
 	}
+	for_each_possible_cpu(i) {
+		if (!alloc_cpumask_var(&per_cpu_ptr(acpi_perf_data, i)
+				       ->shared_cpu_map, GFP_KERNEL)) {
+
+			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
+			free_acpi_perf_data();
+			return -ENOMEM;
+		}
+	}
 
 	/* Do initialization in ACPI core */
 	acpi_processor_preregister_performance(acpi_perf_data);
@@ -604,9 +625,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	 */
 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
 	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
-		policy->cpus = perf->shared_cpu_map;
+		cpumask_copy(&policy->cpus, perf->shared_cpu_map);
 	}
-	policy->related_cpus = perf->shared_cpu_map;
+	cpumask_copy(&policy->related_cpus, perf->shared_cpu_map);
 
 #ifdef CONFIG_SMP
 	dmi_check_system(sw_any_bug_dmi_table);
@@ -795,7 +816,7 @@ static int __init acpi_cpufreq_init(void)
 
 	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
 	if (ret)
-		free_percpu(acpi_perf_data);
+		free_acpi_perf_data();
 
 	return ret;
 }
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 7c7d56b4313..1b446d79a8f 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -310,6 +310,12 @@ static int powernow_acpi_init(void)
 		goto err0;
 	}
 
+	if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
+								GFP_KERNEL)) {
+		retval = -ENOMEM;
+		goto err05;
+	}
+
 	if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
 		retval = -EIO;
 		goto err1;
@@ -412,6 +418,8 @@ static int powernow_acpi_init(void)
 err2:
 	acpi_processor_unregister_performance(acpi_processor_perf, 0);
 err1:
+	free_cpumask_var(acpi_processor_perf->shared_cpu_map);
+err05:
 	kfree(acpi_processor_perf);
 err0:
 	printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n");
@@ -652,6 +660,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
 #ifdef CONFIG_X86_POWERNOW_K7_ACPI
 	if (acpi_processor_perf) {
 		acpi_processor_unregister_performance(acpi_processor_perf, 0);
+		free_cpumask_var(acpi_processor_perf->shared_cpu_map);
 		kfree(acpi_processor_perf);
 	}
 #endif
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 7f05f44b97e..c3c9adbaa26 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -766,7 +766,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
 static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 {
 	struct cpufreq_frequency_table *powernow_table;
-	int ret_val;
+	int ret_val = -ENODEV;
 
 	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
 		dprintk("register performance failed: bad ACPI data\n");
@@ -815,6 +815,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 	/* notify BIOS that we exist */
 	acpi_processor_notify_smm(THIS_MODULE);
 
+	if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
+		printk(KERN_ERR PFX
+				"unable to alloc powernow_k8_data cpumask\n");
+		ret_val = -ENOMEM;
+		goto err_out_mem;
+	}
+
 	return 0;
 
 err_out_mem:
@@ -826,7 +833,7 @@ err_out:
 	/* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
 	data->acpi_data.state_count = 0;
 
-	return -ENODEV;
+	return ret_val;
 }
 
 static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
@@ -929,6 +936,7 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
 {
 	if (data->acpi_data.state_count)
 		acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+	free_cpumask_var(data->acpi_data.shared_cpu_map);
 }
 
 #else
@@ -1134,7 +1142,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	data->cpu = pol->cpu;
 	data->currpstate = HW_PSTATE_INVALID;
 
-	if (powernow_k8_cpu_init_acpi(data)) {
+	rc = powernow_k8_cpu_init_acpi(data);
+	if (rc) {
 		/*
 		 * Use the PSB BIOS structure. This is only availabe on
 		 * an UP version, and is deprecated by AMD.
@@ -1152,20 +1161,17 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 			       "ACPI maintainers and complain to your BIOS "
 			       "vendor.\n");
 #endif
-			kfree(data);
-			return -ENODEV;
+			goto err_out;
 		}
 		if (pol->cpu != 0) {
 			printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
 			       "CPU other than CPU0. Complain to your BIOS "
 			       "vendor.\n");
-			kfree(data);
-			return -ENODEV;
+			goto err_out;
 		}
 		rc = find_psb_table(data);
 		if (rc) {
-			kfree(data);
-			return -ENODEV;
+			goto err_out;
 		}
 	}
 
-- 
cgit v1.2.3


From 80855f7361eb68205e6bc1981928629d9b02d5c9 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 31 Dec 2008 18:08:47 -0800
Subject: cpumask: use alloc_cpumask_var_node where appropriate

Impact: Reduce inter-node memory traffic.

Reduces inter-node memory traffic (offloading the global system bus)
by allocating referenced struct cpumasks on the same node as the
referring struct.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 5 +++--
 arch/x86/kernel/io_apic.c                  | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index d0a001093b2..28102ad1a36 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -547,8 +547,9 @@ static int __init acpi_cpufreq_early_init(void)
 		return -ENOMEM;
 	}
 	for_each_possible_cpu(i) {
-		if (!alloc_cpumask_var(&per_cpu_ptr(acpi_perf_data, i)
-				       ->shared_cpu_map, GFP_KERNEL)) {
+		if (!alloc_cpumask_var_node(
+			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
+			GFP_KERNEL, cpu_to_node(i))) {
 
 			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
 			free_acpi_perf_data();
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 3e070bb961d..a25c3f76b8a 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -212,11 +212,11 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 
 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
 	if (cfg) {
-		/* FIXME: needs alloc_cpumask_var_node() */
-		if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
+		if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
 			kfree(cfg);
 			cfg = NULL;
-		} else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) {
+		} else if (!alloc_cpumask_var_node(&cfg->old_domain,
+							  GFP_ATOMIC, node)) {
 			free_cpumask_var(cfg->domain);
 			kfree(cfg);
 			cfg = NULL;
-- 
cgit v1.2.3


From ab14398abd195af91a744c320a52a1bce814dd1e Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Fri, 2 Jan 2009 21:51:32 +0300
Subject: x86: setup_per_cpu_areas() cleanup

Impact: cleanup

__alloc_bootmem and __alloc_bootmem_node do panic
for us in case of fail so no need for additional
checks here.

Also lets use pr_*() macros for printing.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 49f3f709ee1..a4b619c3310 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -153,12 +153,10 @@ void __init setup_per_cpu_areas(void)
 	align = max_t(unsigned long, PAGE_SIZE, align);
 	size = roundup(old_size, align);
 
-	printk(KERN_INFO
-		"NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
+	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
 		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
 
-	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
-			  size);
+	pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size);
 
 	for_each_possible_cpu(cpu) {
 #ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -169,22 +167,15 @@ void __init setup_per_cpu_areas(void)
 		if (!node_online(node) || !NODE_DATA(node)) {
 			ptr = __alloc_bootmem(size, align,
 					 __pa(MAX_DMA_ADDRESS));
-			printk(KERN_INFO
-			       "cpu %d has no node %d or node-local memory\n",
+			pr_info("cpu %d has no node %d or node-local memory\n",
 				cpu, node);
-			if (ptr)
-				printk(KERN_DEBUG
-					"per cpu data for cpu%d at %016lx\n",
-					 cpu, __pa(ptr));
-		}
-		else {
+			pr_debug("per cpu data for cpu%d at %016lx\n",
+				 cpu, __pa(ptr));
+		} else {
 			ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
 							__pa(MAX_DMA_ADDRESS));
-			if (ptr)
-				printk(KERN_DEBUG
-					"per cpu data for cpu%d on node%d "
-					"at %016lx\n",
-					cpu, node, __pa(ptr));
+			pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
+				cpu, node, __pa(ptr));
 		}
 #endif
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
-- 
cgit v1.2.3


From 7aed55d1085f71241284a30af0300feea48c36db Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sun, 4 Jan 2009 00:27:09 +0100
Subject: x86: fix RIP printout in early_idt_handler

Impact: fix debug/crash printout

Since errorcode is popped out, RIP is on the top of the stack.
Use real RIP value instead of wrong CS.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head_64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 26cfdc1d7c7..0e275d49556 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -305,7 +305,7 @@ ENTRY(early_idt_handler)
 	call dump_stack
 #ifdef CONFIG_KALLSYMS	
 	leaq early_idt_ripmsg(%rip),%rdi
-	movq 8(%rsp),%rsi	# get rip again
+	movq 0(%rsp),%rsi	# get rip again
 	call __print_symbol
 #endif
 #endif /* EARLY_PRINTK */
-- 
cgit v1.2.3


From f29521e4ee394ca241df2ba546e9d671394927a2 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sat, 3 Jan 2009 15:46:57 +0530
Subject: x86: rename mp_config_table to mpc_table

Impact: cleanup, solve 80 columns wrap problems

mp_config_table should be renamed to mpc_table.
The reason: the 'c' in MPC already means 'config' -
no need to repeat that in the type name.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/es7000/mpparse.h            |  3 +--
 arch/x86/include/asm/genapic_32.h                |  4 ++--
 arch/x86/include/asm/mach-default/mach_mpparse.h |  4 ++--
 arch/x86/include/asm/mach-generic/mach_mpparse.h |  5 ++---
 arch/x86/include/asm/mach-generic/mach_mpspec.h  |  4 ++--
 arch/x86/include/asm/mpspec_def.h                |  2 +-
 arch/x86/include/asm/numaq/mpparse.h             |  3 +--
 arch/x86/include/asm/summit/mpparse.h            |  2 +-
 arch/x86/kernel/mpparse.c                        | 10 ++++------
 arch/x86/kernel/numaq_32.c                       |  3 +--
 arch/x86/mach-generic/es7000.c                   |  4 ++--
 arch/x86/mach-generic/numaq.c                    |  3 +--
 arch/x86/mach-generic/probe.c                    |  3 +--
 13 files changed, 21 insertions(+), 29 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/es7000/mpparse.h b/arch/x86/include/asm/es7000/mpparse.h
index ed5a3caae14..c1629b090ec 100644
--- a/arch/x86/include/asm/es7000/mpparse.h
+++ b/arch/x86/include/asm/es7000/mpparse.h
@@ -10,8 +10,7 @@ extern void setup_unisys(void);
 
 #ifndef CONFIG_X86_GENERICARCH
 extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id);
-extern int mps_oem_check(struct mp_config_table *mpc, char *oem,
-				char *productid);
+extern int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid);
 #endif
 
 #ifdef CONFIG_ACPI
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 746f37a7963..db02be008f7 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -16,7 +16,7 @@
  */
 
 struct mpc_config_bus;
-struct mp_config_table;
+struct mpc_table;
 struct mpc_config_processor;
 
 struct genapic {
@@ -51,7 +51,7 @@ struct genapic {
 	/* When one of the next two hooks returns 1 the genapic
 	   is switched to this. Essentially they are additional probe
 	   functions. */
-	int (*mps_oem_check)(struct mp_config_table *mpc, char *oem,
+	int (*mps_oem_check)(struct mpc_table *mpc, char *oem,
 			     char *productid);
 	int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
 
diff --git a/arch/x86/include/asm/mach-default/mach_mpparse.h b/arch/x86/include/asm/mach-default/mach_mpparse.h
index 8c1ea21238a..c70a263d68c 100644
--- a/arch/x86/include/asm/mach-default/mach_mpparse.h
+++ b/arch/x86/include/asm/mach-default/mach_mpparse.h
@@ -1,8 +1,8 @@
 #ifndef _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H
 #define _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H
 
-static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, 
-		char *productid)
+static inline int
+mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
 {
 	return 0;
 }
diff --git a/arch/x86/include/asm/mach-generic/mach_mpparse.h b/arch/x86/include/asm/mach-generic/mach_mpparse.h
index 048f1d46853..9444ab8dca9 100644
--- a/arch/x86/include/asm/mach-generic/mach_mpparse.h
+++ b/arch/x86/include/asm/mach-generic/mach_mpparse.h
@@ -2,9 +2,8 @@
 #define _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H
 
 
-extern int mps_oem_check(struct mp_config_table *mpc, char *oem,
-			 char *productid);
+extern int mps_oem_check(struct mpc_table *, char *, char *);
 
-extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id);
+extern int acpi_madt_oem_check(char *, char *);
 
 #endif /* _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H */
diff --git a/arch/x86/include/asm/mach-generic/mach_mpspec.h b/arch/x86/include/asm/mach-generic/mach_mpspec.h
index bbab5ccfd4f..3bc40722657 100644
--- a/arch/x86/include/asm/mach-generic/mach_mpspec.h
+++ b/arch/x86/include/asm/mach-generic/mach_mpspec.h
@@ -7,6 +7,6 @@
 /* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */
 #define MAX_MP_BUSSES 260
 
-extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
-				char *productid);
+extern void numaq_mps_oem_check(struct mpc_table *, char *, char *);
+
 #endif /* _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H */
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index e3ace7d1d35..e260e6543b4 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -39,7 +39,7 @@ struct intel_mp_floating {
 
 #define MPC_SIGNATURE "PCMP"
 
-struct mp_config_table {
+struct mpc_table {
 	char mpc_signature[4];
 	unsigned short mpc_length;	/* Size of table */
 	char mpc_spec;			/* 0x01 */
diff --git a/arch/x86/include/asm/numaq/mpparse.h b/arch/x86/include/asm/numaq/mpparse.h
index 252292e077b..a2eeefcd1cc 100644
--- a/arch/x86/include/asm/numaq/mpparse.h
+++ b/arch/x86/include/asm/numaq/mpparse.h
@@ -1,7 +1,6 @@
 #ifndef __ASM_NUMAQ_MPPARSE_H
 #define __ASM_NUMAQ_MPPARSE_H
 
-extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
-				char *productid);
+extern void numaq_mps_oem_check(struct mpc_table *, char *, char *);
 
 #endif /* __ASM_NUMAQ_MPPARSE_H */
diff --git a/arch/x86/include/asm/summit/mpparse.h b/arch/x86/include/asm/summit/mpparse.h
index 013ce6fab2d..380e86c0236 100644
--- a/arch/x86/include/asm/summit/mpparse.h
+++ b/arch/x86/include/asm/summit/mpparse.h
@@ -11,7 +11,7 @@ extern void setup_summit(void);
 #define setup_summit()	{}
 #endif
 
-static inline int mps_oem_check(struct mp_config_table *mpc, char *oem,
+static inline int mps_oem_check(struct mpc_table *mpc, char *oem,
 		char *productid)
 {
 	if (!strncmp(oem, "IBM ENSW", 8) &&
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index c5c5b8df1db..154de681e8b 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -246,8 +246,7 @@ static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
  * Read/parse the MPC
  */
 
-static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem,
-				char *str)
+static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
 {
 
 	if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
@@ -283,7 +282,7 @@ static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem,
 	return 1;
 }
 
-static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
+static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 {
 	char str[16];
 	char oem[10];
@@ -843,7 +842,7 @@ static int  __init get_MP_intsrc_index(struct mpc_config_intsrc *m)
 static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
 #endif
 
-static int  __init replace_intsrc_all(struct mp_config_table *mpc,
+static int  __init replace_intsrc_all(struct mpc_table *mpc,
 					unsigned long mpc_new_phys,
 					unsigned long mpc_new_length)
 {
@@ -1014,8 +1013,7 @@ static int __init update_mp_table(void)
 	char str[16];
 	char oem[10];
 	struct intel_mp_floating *mpf;
-	struct mp_config_table *mpc;
-	struct mp_config_table *mpc_new;
+	struct mpc_table *mpc, *mpc_new;
 
 	if (!enable_update_mptable)
 		return 0;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 0deea37a53c..454c55b5674 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -260,8 +260,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
 	.update_genapic		= numaq_update_genapic,
 };
 
-void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
-				 char *productid)
+void numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
 {
 	if (strncmp(oem, "IBM NUMA", 8))
 		printk("Warning!  Not a NUMA-Q system!\n");
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 4ba5ccaa158..3b580edf254 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -43,8 +43,8 @@ static void __init enable_apic_mode(void)
 	return;
 }
 
-static __init int mps_oem_check(struct mp_config_table *mpc, char *oem,
-		char *productid)
+static __init int
+mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
 {
 	if (mpc->mpc_oemptr) {
 		struct mp_config_oemtable *oem_table =
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 511d7941364..3679e225564 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -19,8 +19,7 @@
 #include <asm/numaq/wakecpu.h>
 #include <asm/numaq.h>
 
-static int mps_oem_check(struct mp_config_table *mpc, char *oem,
-		char *productid)
+static int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
 {
 	numaq_mps_oem_check(mpc, oem, productid);
 	return found_numaq;
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
index c346d9d0226..15a38daef1a 100644
--- a/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@ -110,8 +110,7 @@ void __init generic_apic_probe(void)
 
 /* These functions can switch the APIC even after the initial ->probe() */
 
-int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
-				 char *productid)
+int __init mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
 {
 	int i;
 	for (i = 0; apic_probe[i]; ++i) {
-- 
cgit v1.2.3


From 00fb8606e5621a79ca2fe788e461fe8a59eb1450 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sat, 3 Jan 2009 15:47:32 +0530
Subject: x86: rename mpc_config_bus to mpc_bus

Impact: cleanup, solve 80 columns wrap problems

mpc_config_bus should be renamed to mpc_bus.
The reason: the 'c' in MPC already means 'config' -
no need to repeat that in the type name.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/genapic_32.h |  2 +-
 arch/x86/include/asm/mpspec_def.h |  2 +-
 arch/x86/include/asm/setup.h      |  6 +++---
 arch/x86/kernel/mpparse.c         | 12 +++++-------
 arch/x86/kernel/numaq_32.c        |  4 ++--
 5 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index db02be008f7..057e2c2360f 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -15,7 +15,7 @@
  * Copyright 2003 Andi Kleen, SuSE Labs.
  */
 
-struct mpc_config_bus;
+struct mpc_bus;
 struct mpc_table;
 struct mpc_config_processor;
 
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index e260e6543b4..daa3f4ab2ff 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -80,7 +80,7 @@ struct mpc_config_processor {
 	unsigned int mpc_reserved[2];
 };
 
-struct mpc_config_bus {
+struct mpc_bus {
 	unsigned char mpc_type;
 	unsigned char mpc_busid;
 	unsigned char mpc_bustype[6];
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 4fcd53fd5f4..26754ca35a6 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -26,7 +26,7 @@ extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
  * Any setup quirks to be performed?
  */
 struct mpc_config_processor;
-struct mpc_config_bus;
+struct mpc_bus;
 struct mp_config_oemtable;
 struct x86_quirks {
 	int (*arch_pre_time_init)(void);
@@ -40,8 +40,8 @@ struct x86_quirks {
 
 	int *mpc_record;
 	int (*mpc_apic_id)(struct mpc_config_processor *m);
-	void (*mpc_oem_bus_info)(struct mpc_config_bus *m, char *name);
-	void (*mpc_oem_pci_bus)(struct mpc_config_bus *m);
+	void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
+	void (*mpc_oem_pci_bus)(struct mpc_bus *m);
 	void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
                                     unsigned short oemsize);
 	int (*setup_ioapic_ids)(void);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 154de681e8b..edbd2b19c60 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -74,7 +74,7 @@ static void __init MP_processor_info(struct mpc_config_processor *m)
 }
 
 #ifdef CONFIG_X86_IO_APIC
-static void __init MP_bus_info(struct mpc_config_bus *m)
+static void __init MP_bus_info(struct mpc_bus *m)
 {
 	char str[7];
 	memcpy(str, m->mpc_bustype, 6);
@@ -338,8 +338,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 			}
 		case MP_BUS:
 			{
-				struct mpc_config_bus *m =
-				    (struct mpc_config_bus *)mpt;
+				struct mpc_bus *m = (struct mpc_bus *)mpt;
 #ifdef CONFIG_X86_IO_APIC
 				MP_bus_info(m);
 #endif
@@ -488,7 +487,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
 static void __init construct_ioapic_table(int mpc_default_type)
 {
 	struct mpc_config_ioapic ioapic;
-	struct mpc_config_bus bus;
+	struct mpc_bus bus;
 
 	bus.mpc_type = MP_BUS;
 	bus.mpc_busid = 0;
@@ -656,7 +655,7 @@ static void __init __get_smp_config(unsigned int early)
 		 * ISA defaults and hope it will work.
 		 */
 		if (!mp_irq_entries) {
-			struct mpc_config_bus bus;
+			struct mpc_bus bus;
 
 			printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
 			       "using default mptable. "
@@ -867,8 +866,7 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 			}
 		case MP_BUS:
 			{
-				struct mpc_config_bus *m =
-				    (struct mpc_config_bus *)mpt;
+				struct mpc_bus *m = (struct mpc_bus *)mpt;
 				mpt += sizeof(*m);
 				count += sizeof(*m);
 				break;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 454c55b5674..81354269277 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -135,7 +135,7 @@ int mp_bus_id_to_node[MAX_MP_BUSSES];
 int mp_bus_id_to_local[MAX_MP_BUSSES];
 
 /* x86_quirks member */
-static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name)
+static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
 {
 	int quad = translation_table[mpc_record]->trans_quad;
 	int local = translation_table[mpc_record]->trans_local;
@@ -149,7 +149,7 @@ static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name)
 int quad_local_to_mp_bus_id [NR_CPUS/4][4];
 
 /* x86_quirks member */
-static void mpc_oem_pci_bus(struct mpc_config_bus *m)
+static void mpc_oem_pci_bus(struct mpc_bus *m)
 {
 	int quad = translation_table[mpc_record]->trans_quad;
 	int local = translation_table[mpc_record]->trans_local;
-- 
cgit v1.2.3


From f4f21b716bb997dcafca622d9e232f855a64c7b6 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sat, 3 Jan 2009 15:48:52 +0530
Subject: x86: rename mpc_config_processor to mpc_cpu

Impact: cleanup, solve 80 columns wrap problems

mpc_config_processor should be renamed to mpc_cpu.

The reason: the 'c' in MPC already means 'config' -
no need to repeat that in the type name.

Plus 'processor' is a lot longer than 'cpu' - so we try to use 'cpu' in all
type names, as much as possible.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/genapic_32.h |  2 +-
 arch/x86/include/asm/mpspec_def.h |  2 +-
 arch/x86/include/asm/setup.h      |  4 ++--
 arch/x86/kernel/mpparse.c         | 10 ++++------
 arch/x86/kernel/numaq_32.c        |  2 +-
 arch/x86/kernel/visws_quirks.c    |  4 ++--
 6 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 057e2c2360f..2c05b737ee2 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -17,7 +17,7 @@
 
 struct mpc_bus;
 struct mpc_table;
-struct mpc_config_processor;
+struct mpc_cpu;
 
 struct genapic {
 	char *name;
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index daa3f4ab2ff..7e8ca5136b2 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -70,7 +70,7 @@ struct mpc_table {
 #define CPU_MODEL_MASK		0x00F0
 #define CPU_FAMILY_MASK		0x0F00
 
-struct mpc_config_processor {
+struct mpc_cpu {
 	unsigned char mpc_type;
 	unsigned char mpc_apicid;	/* Local APIC number */
 	unsigned char mpc_apicver;	/* Its versions */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 26754ca35a6..8cf77ec2088 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -25,7 +25,7 @@ extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
 /*
  * Any setup quirks to be performed?
  */
-struct mpc_config_processor;
+struct mpc_cpu;
 struct mpc_bus;
 struct mp_config_oemtable;
 struct x86_quirks {
@@ -39,7 +39,7 @@ struct x86_quirks {
 	int (*mach_find_smp_config)(unsigned int reserve);
 
 	int *mpc_record;
-	int (*mpc_apic_id)(struct mpc_config_processor *m);
+	int (*mpc_apic_id)(struct mpc_cpu *m);
 	void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
 	void (*mpc_oem_pci_bus)(struct mpc_bus *m);
 	void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index edbd2b19c60..1ec13bb1873 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
 	return sum & 0xFF;
 }
 
-static void __init MP_processor_info(struct mpc_config_processor *m)
+static void __init MP_processor_info(struct mpc_cpu *m)
 {
 	int apicid;
 	char *bootup_cpu = "";
@@ -327,8 +327,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 		switch (*mpt) {
 		case MP_PROCESSOR:
 			{
-				struct mpc_config_processor *m =
-				    (struct mpc_config_processor *)mpt;
+				struct mpc_cpu *m = (struct mpc_cpu *)mpt;
 				/* ACPI may have already provided this data */
 				if (!acpi_lapic)
 					MP_processor_info(m);
@@ -534,7 +533,7 @@ static inline void __init construct_ioapic_table(int mpc_default_type) { }
 
 static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 {
-	struct mpc_config_processor processor;
+	struct mpc_cpu processor;
 	struct mpc_config_lintsrc lintsrc;
 	int linttypes[2] = { mp_ExtINT, mp_NMI };
 	int i;
@@ -858,8 +857,7 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 		switch (*mpt) {
 		case MP_PROCESSOR:
 			{
-				struct mpc_config_processor *m =
-				    (struct mpc_config_processor *)mpt;
+				struct mpc_cpu *m = (struct mpc_cpu *)mpt;
 				mpt += sizeof(*m);
 				count += sizeof(*m);
 				break;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 81354269277..bcc3da1644f 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -117,7 +117,7 @@ static inline int generate_logical_apicid(int quad, int phys_apicid)
 }
 
 /* x86_quirks member */
-static int mpc_apic_id(struct mpc_config_processor *m)
+static int mpc_apic_id(struct mpc_cpu *m)
 {
 	int quad = translation_table[mpc_record]->trans_quad;
 	int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 0c9667f0752..ca12afa6347 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -176,7 +176,7 @@ static int __init visws_get_smp_config(unsigned int early)
  * No problem for Linux.
  */
 
-static void __init MP_processor_info(struct mpc_config_processor *m)
+static void __init MP_processor_info(struct mpc_cpu *m)
 {
 	int ver, logical_apicid;
 	physid_mask_t apic_cpus;
@@ -218,7 +218,7 @@ static void __init MP_processor_info(struct mpc_config_processor *m)
 
 static int __init visws_find_smp_config(unsigned int reserve)
 {
-	struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
+	struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
 	unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
 
 	if (ncpus > CO_CPU_MAX) {
-- 
cgit v1.2.3


From 2b85b5fb47c95cd4cbd15f0c0ef0242d8f36e2e0 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sat, 3 Jan 2009 15:49:57 +0530
Subject: x86: rename mpc_config_ioapic to mpc_ioapic

Impact: cleanup, solve 80 columns wrap problems

mpc_config_ioapic should be renamed to mpc_ioapic.

The reason: the 'c' in MPC already means 'config' -
no need to repeat that in the type name.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec_def.h |  2 +-
 arch/x86/kernel/mpparse.c         | 15 +++++++--------
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 7e8ca5136b2..79289c70afd 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -108,7 +108,7 @@ struct mpc_bus {
 
 #define MPC_APIC_USABLE		0x01
 
-struct mpc_config_ioapic {
+struct mpc_ioapic {
 	unsigned char mpc_type;
 	unsigned char mpc_apicid;
 	unsigned char mpc_apicver;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 1ec13bb1873..7ec4b0da985 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -133,7 +133,7 @@ static int bad_ioapic(unsigned long address)
 	return 0;
 }
 
-static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
+static void __init MP_ioapic_info(struct mpc_ioapic *m)
 {
 	if (!(m->mpc_flags & MPC_APIC_USABLE))
 		return;
@@ -348,12 +348,11 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 		case MP_IOAPIC:
 			{
 #ifdef CONFIG_X86_IO_APIC
-				struct mpc_config_ioapic *m =
-				    (struct mpc_config_ioapic *)mpt;
+				struct mpc_ioapic *m = (struct mpc_ioapic *)mpt;
 				MP_ioapic_info(m);
 #endif
-				mpt += sizeof(struct mpc_config_ioapic);
-				count += sizeof(struct mpc_config_ioapic);
+				mpt += sizeof(struct mpc_ioapic);
+				count += sizeof(struct mpc_ioapic);
 				break;
 			}
 		case MP_INTSRC:
@@ -485,7 +484,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
 
 static void __init construct_ioapic_table(int mpc_default_type)
 {
-	struct mpc_config_ioapic ioapic;
+	struct mpc_ioapic ioapic;
 	struct mpc_bus bus;
 
 	bus.mpc_type = MP_BUS;
@@ -871,8 +870,8 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 			}
 		case MP_IOAPIC:
 			{
-				mpt += sizeof(struct mpc_config_ioapic);
-				count += sizeof(struct mpc_config_ioapic);
+				mpt += sizeof(struct mpc_ioapic);
+				count += sizeof(struct mpc_ioapic);
 				break;
 			}
 		case MP_INTSRC:
-- 
cgit v1.2.3


From 540d4e72e1ec6f0d07c252abc97616173aa0382b Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sat, 3 Jan 2009 15:50:52 +0530
Subject: x86: rename mpc_config_intsrc to mpc_intsrc

Impact: cleanup, solve 80 columns wrap problems

mpc_config_intsrc should be renamed to mpc_intsrc.

The reason: the 'c' in MPC already means 'config' -
no need to repeat that in the type name.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec_def.h |  2 +-
 arch/x86/kernel/mpparse.c         | 37 +++++++++++++++++--------------------
 2 files changed, 18 insertions(+), 21 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 79289c70afd..e5a8468c215 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -116,7 +116,7 @@ struct mpc_ioapic {
 	unsigned int mpc_apicaddr;
 };
 
-struct mpc_config_intsrc {
+struct mpc_intsrc {
 	unsigned char mpc_type;
 	unsigned char mpc_irqtype;
 	unsigned short mpc_irqflag;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 7ec4b0da985..6b75d314358 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -152,7 +152,7 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m)
 	nr_ioapics++;
 }
 
-static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
+static void print_MP_intsrc_info(struct mpc_intsrc *m)
 {
 	apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
 		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
@@ -170,7 +170,7 @@ static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
 		mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
 }
 
-static void __init assign_to_mp_irq(struct mpc_config_intsrc *m,
+static void __init assign_to_mp_irq(struct mpc_intsrc *m,
 				    struct mp_config_intsrc *mp_irq)
 {
 	mp_irq->mp_dstapic = m->mpc_dstapic;
@@ -183,7 +183,7 @@ static void __init assign_to_mp_irq(struct mpc_config_intsrc *m,
 }
 
 static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
-					struct mpc_config_intsrc *m)
+					struct mpc_intsrc *m)
 {
 	m->mpc_dstapic = mp_irq->mp_dstapic;
 	m->mpc_type = mp_irq->mp_type;
@@ -195,7 +195,7 @@ static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
 }
 
 static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
-					struct mpc_config_intsrc *m)
+					struct mpc_intsrc *m)
 {
 	if (mp_irq->mp_dstapic != m->mpc_dstapic)
 		return 1;
@@ -215,7 +215,7 @@ static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
 	return 0;
 }
 
-static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
+static void __init MP_intsrc_info(struct mpc_intsrc *m)
 {
 	int i;
 
@@ -358,13 +358,12 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 		case MP_INTSRC:
 			{
 #ifdef CONFIG_X86_IO_APIC
-				struct mpc_config_intsrc *m =
-				    (struct mpc_config_intsrc *)mpt;
+				struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
 
 				MP_intsrc_info(m);
 #endif
-				mpt += sizeof(struct mpc_config_intsrc);
-				count += sizeof(struct mpc_config_intsrc);
+				mpt += sizeof(struct mpc_intsrc);
+				count += sizeof(struct mpc_intsrc);
 				break;
 			}
 		case MP_LINTSRC:
@@ -413,7 +412,7 @@ static int __init ELCR_trigger(unsigned int irq)
 
 static void __init construct_default_ioirq_mptable(int mpc_default_type)
 {
-	struct mpc_config_intsrc intsrc;
+	struct mpc_intsrc intsrc;
 	int i;
 	int ELCR_fallback = 0;
 
@@ -799,7 +798,7 @@ void __init find_smp_config(void)
 #ifdef CONFIG_X86_IO_APIC
 static u8 __initdata irq_used[MAX_IRQ_SOURCES];
 
-static int  __init get_MP_intsrc_index(struct mpc_config_intsrc *m)
+static int  __init get_MP_intsrc_index(struct mpc_intsrc *m)
 {
 	int i;
 
@@ -836,7 +835,7 @@ static int  __init get_MP_intsrc_index(struct mpc_config_intsrc *m)
 
 #define SPARE_SLOT_NUM 20
 
-static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
+static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
 #endif
 
 static int  __init replace_intsrc_all(struct mpc_table *mpc,
@@ -877,8 +876,7 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 		case MP_INTSRC:
 			{
 #ifdef CONFIG_X86_IO_APIC
-				struct mpc_config_intsrc *m =
-				    (struct mpc_config_intsrc *)mpt;
+				struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
 
 				printk(KERN_INFO "OLD ");
 				print_MP_intsrc_info(m);
@@ -899,8 +897,8 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 					nr_m_spare++;
 				}
 #endif
-				mpt += sizeof(struct mpc_config_intsrc);
-				count += sizeof(struct mpc_config_intsrc);
+				mpt += sizeof(struct mpc_intsrc);
+				count += sizeof(struct mpc_intsrc);
 				break;
 			}
 		case MP_LINTSRC:
@@ -938,9 +936,8 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 			assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
 			m_spare[nr_m_spare] = NULL;
 		} else {
-			struct mpc_config_intsrc *m =
-			    (struct mpc_config_intsrc *)mpt;
-			count += sizeof(struct mpc_config_intsrc);
+			struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
+			count += sizeof(struct mpc_intsrc);
 			if (!mpc_new_phys) {
 				printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count);
 			} else {
@@ -953,7 +950,7 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 			}
 			assign_to_mpc_intsrc(&mp_irqs[i], m);
 			mpc->mpc_length = count;
-			mpt += sizeof(struct mpc_config_intsrc);
+			mpt += sizeof(struct mpc_intsrc);
 		}
 		print_mp_irq_info(&mp_irqs[i]);
 	}
-- 
cgit v1.2.3


From 8fb2952b8a8b8b3f982297f0fca288ce04f419a8 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sat, 3 Jan 2009 15:51:54 +0530
Subject: x86: rename mpc_config_lintsrc to mpc_lintsrc

Impact: cleanup, solve 80 columns wrap problems

mpc_config_lintsrc should be renamed to mpc_lintsrc.

The reason: the 'c' in MPC already means 'config' -
no need to repeat that in the type name.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec_def.h |  2 +-
 arch/x86/kernel/mpparse.c         | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index e5a8468c215..fabf970813b 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -139,7 +139,7 @@ enum mp_irq_source_types {
 
 #define MP_APIC_ALL	0xFF
 
-struct mpc_config_lintsrc {
+struct mpc_lintsrc {
 	unsigned char mpc_type;
 	unsigned char mpc_irqtype;
 	unsigned short mpc_irqflag;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 6b75d314358..defe87e1205 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -233,7 +233,7 @@ static void __init MP_intsrc_info(struct mpc_intsrc *m)
 
 #endif
 
-static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
+static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
 {
 	apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
 		" IRQ %02x, APIC ID %x, APIC LINT %02x\n",
@@ -368,8 +368,8 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 			}
 		case MP_LINTSRC:
 			{
-				struct mpc_config_lintsrc *m =
-				    (struct mpc_config_lintsrc *)mpt;
+				struct mpc_lintsrc *m =
+				    (struct mpc_lintsrc *)mpt;
 				MP_lintsrc_info(m);
 				mpt += sizeof(*m);
 				count += sizeof(*m);
@@ -532,7 +532,7 @@ static inline void __init construct_ioapic_table(int mpc_default_type) { }
 static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 {
 	struct mpc_cpu processor;
-	struct mpc_config_lintsrc lintsrc;
+	struct mpc_lintsrc lintsrc;
 	int linttypes[2] = { mp_ExtINT, mp_NMI };
 	int i;
 
@@ -903,8 +903,8 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 			}
 		case MP_LINTSRC:
 			{
-				struct mpc_config_lintsrc *m =
-				    (struct mpc_config_lintsrc *)mpt;
+				struct mpc_lintsrc *m =
+				    (struct mpc_lintsrc *)mpt;
 				mpt += sizeof(*m);
 				count += sizeof(*m);
 				break;
-- 
cgit v1.2.3


From b0e239ffad5aff50823c91c10cf6b72a1a651c2f Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sat, 3 Jan 2009 15:52:54 +0530
Subject: x86: rename mpc_config_oemtable to mpc_oemtable

Impact: cleanup, solve 80 columns wrap problems

mpc_config_oemtable should be renamed to mpc_oemtable.

The reason: the 'c' in MPC already means 'config' -
no need to repeat that in the type name.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec_def.h | 2 +-
 arch/x86/include/asm/setup.h      | 4 ++--
 arch/x86/kernel/mpparse.c         | 2 +-
 arch/x86/kernel/numaq_32.c        | 2 +-
 arch/x86/mach-generic/es7000.c    | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index fabf970813b..ba8ed0cf658 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -151,7 +151,7 @@ struct mpc_lintsrc {
 
 #define MPC_OEM_SIGNATURE "_OEM"
 
-struct mp_config_oemtable {
+struct mpc_oemtable {
 	char oem_signature[4];
 	unsigned short oem_length;	/* Size of table */
 	char  oem_rev;			/* 0x01 */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 8cf77ec2088..ebe858cdc8a 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -27,7 +27,7 @@ extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
  */
 struct mpc_cpu;
 struct mpc_bus;
-struct mp_config_oemtable;
+struct mpc_oemtable;
 struct x86_quirks {
 	int (*arch_pre_time_init)(void);
 	int (*arch_time_init)(void);
@@ -42,7 +42,7 @@ struct x86_quirks {
 	int (*mpc_apic_id)(struct mpc_cpu *m);
 	void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
 	void (*mpc_oem_pci_bus)(struct mpc_bus *m);
-	void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
+	void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable,
                                     unsigned short oemsize);
 	int (*setup_ioapic_ids)(void);
 	int (*update_genapic)(void);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index defe87e1205..1ccb2537e3f 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -313,7 +313,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 		return 1;
 
 	if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) {
-		struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr;
+		struct mpc_oemtable *oem_table = (void *)(long)mpc->mpc_oemptr;
 		x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize);
 	}
 
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index bcc3da1644f..aa1e5a06855 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -186,7 +186,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
  * Read/parse the MPC oem tables
  */
 
-static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
+static void __init smp_read_mpc_oem(struct mpc_oemtable *oemtable,
 				    unsigned short oemsize)
 {
 	int count = sizeof(*oemtable);	/* the header size */
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 3b580edf254..39243a796e1 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -47,8 +47,8 @@ static __init int
 mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
 {
 	if (mpc->mpc_oemptr) {
-		struct mp_config_oemtable *oem_table =
-			(struct mp_config_oemtable *)mpc->mpc_oemptr;
+		struct mpc_oemtable *oem_table =
+			(struct mpc_oemtable *)mpc->mpc_oemptr;
 		if (!strncmp(oem, "UNISYS", 6))
 			return parse_unisys_oem((char *)oem_table);
 	}
-- 
cgit v1.2.3


From e423e33ec100a4a21943f6535e9971bb3aacfe38 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 16:16:25 +0530
Subject: x86: apic.c fix style problems

Impact: cleanup

Fix:

 WARNING: Use #include <linux/smp.h> instead of <asm/smp.h>
 WARNING: Use #include <linux/nmi.h> instead of <asm/nmi.h>
 WARNING: Use #include <linux/timex.h> instead of <asm/timex.h>
 WARNING: line over 80 characters
 ERROR: else should follow close brace '}'

 total: 2 errors, 4 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index d652515e285..f314304ed06 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -31,9 +31,11 @@
 #include <linux/dmi.h>
 #include <linux/dmar.h>
 #include <linux/ftrace.h>
+#include <linux/smp.h>
+#include <linux/nmi.h>
+#include <linux/timex.h>
 
 #include <asm/atomic.h>
-#include <asm/smp.h>
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
 #include <asm/desc.h>
@@ -41,10 +43,8 @@
 #include <asm/hpet.h>
 #include <asm/pgalloc.h>
 #include <asm/i8253.h>
-#include <asm/nmi.h>
 #include <asm/idle.h>
 #include <asm/proto.h>
-#include <asm/timex.h>
 #include <asm/apic.h>
 #include <asm/i8259.h>
 
@@ -687,7 +687,7 @@ static int __init calibrate_APIC_clock(void)
 		local_irq_enable();
 
 	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
-		pr_warning("APIC timer disabled due to verification failure.\n");
+		pr_warning("APIC timer disabled due to verification failure\n");
 			return -1;
 	}
 
@@ -2087,14 +2087,12 @@ __cpuinit int apic_is_clustered_box(void)
 		/* are we being called early in kernel startup? */
 		if (bios_cpu_apicid) {
 			id = bios_cpu_apicid[i];
-		}
-		else if (i < nr_cpu_ids) {
+		} else if (i < nr_cpu_ids) {
 			if (cpu_present(i))
 				id = per_cpu(x86_bios_cpu_apicid, i);
 			else
 				continue;
-		}
-		else
+		} else
 			break;
 
 		if (id != BAD_APICID)
-- 
cgit v1.2.3


From befa9e780d49f23b051a1ad96881274ceb275ae5 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 16:18:56 +0530
Subject: x86: process_32.c fix style problems

Impact: cleanup

Fix:

 WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>
 WARNING: Use #include <linux/io.h> instead of <asm/io.h>
 WARNING: Use #include <linux/kdebug.h> instead of <asm/kdebug.h>
 WARNING: Use #include <linux/smp.h> instead of <asm/smp.h>
 ERROR: "foo * bar" should be "foo *bar"
 ERROR: trailing whitespace
 ERROR: spaces required around that ':' (ctx:WxO)
 ERROR: spaces required around that ':' (ctx:OxW)

 total: 7 errors, 4 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/process_32.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3ba155d2488..a546f55c77b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -39,11 +39,12 @@
 #include <linux/prctl.h>
 #include <linux/dmi.h>
 #include <linux/ftrace.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/kdebug.h>
 
-#include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
-#include <asm/io.h>
 #include <asm/ldt.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
@@ -56,10 +57,8 @@
 
 #include <asm/tlbflush.h>
 #include <asm/cpu.h>
-#include <asm/kdebug.h>
 #include <asm/idle.h>
 #include <asm/syscalls.h>
-#include <asm/smp.h>
 #include <asm/ds.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -205,7 +204,7 @@ extern void kernel_thread_helper(void);
 /*
  * Create a kernel thread
  */
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 {
 	struct pt_regs regs;
 
@@ -266,7 +265,7 @@ void flush_thread(void)
 	tsk->thread.debugreg3 = 0;
 	tsk->thread.debugreg6 = 0;
 	tsk->thread.debugreg7 = 0;
-	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));	
+	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 	clear_tsk_thread_flag(tsk, TIF_DEBUG);
 	/*
 	 * Forget coprocessor state..
@@ -293,9 +292,9 @@ void prepare_to_copy(struct task_struct *tsk)
 
 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 	unsigned long unused,
-	struct task_struct * p, struct pt_regs * regs)
+	struct task_struct *p, struct pt_regs *regs)
 {
-	struct pt_regs * childregs;
+	struct pt_regs *childregs;
 	struct task_struct *tsk;
 	int err;
 
@@ -347,7 +346,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 void
 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 {
-	__asm__("movl %0, %%gs" :: "r"(0));
+	__asm__("movl %0, %%gs" : : "r"(0));
 	regs->fs		= 0;
 	set_fs(USER_DS);
 	regs->ds		= __USER_DS;
@@ -638,7 +637,7 @@ asmlinkage int sys_vfork(struct pt_regs regs)
 asmlinkage int sys_execve(struct pt_regs regs)
 {
 	int error;
-	char * filename;
+	char *filename;
 
 	filename = getname((char __user *) regs.bx);
 	error = PTR_ERR(filename);
-- 
cgit v1.2.3


From 60d53c305805fcb612d3982dfef5c9e678e27f42 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 16:20:40 +0530
Subject: x86: traps.c fix style problems

Impact: cleanup

Fix:

 WARNING: Use #include <linux/nmi.h> instead of <asm/nmi.h>
 WARNING: Use #include <linux/smp.h> instead of <asm/smp.h>
 WARNING: Use #include <linux/io.h> instead of <asm/io.h>

 total: 0 errors, 3 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/traps.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ce6650eb64e..25d5c307358 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -65,9 +65,6 @@
 #else
 #include <asm/processor-flags.h>
 #include <asm/arch_hooks.h>
-#include <asm/nmi.h>
-#include <asm/smp.h>
-#include <asm/io.h>
 #include <asm/traps.h>
 
 #include "cpu/mcheck/mce.h"
-- 
cgit v1.2.3


From 6a02e71099a87d13f2aa49cb491b5bfb5e662fae Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 16:22:17 +0530
Subject: x86: irq.c fix style problems

Impact: cleanup

Fix:

 WARNING: Use #include <linux/smp.h> instead of <asm/smp.h>

 total: 0 errors, 1 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index bce53e1352a..3973e2df7f8 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -5,10 +5,10 @@
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/seq_file.h>
+#include <linux/smp.h>
 
 #include <asm/apic.h>
 #include <asm/io_apic.h>
-#include <asm/smp.h>
 #include <asm/irq.h>
 
 atomic_t irq_err_count;
-- 
cgit v1.2.3


From 5f66b2a0d919e84ee921dc21c9dbfddd1215c0e9 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 16:25:19 +0530
Subject: x86: irq_64.c fix style problems

Impact: cleanup, fix style problems, more readable

Fix:

 WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>
 WARNING: Use #include <linux/smp.h> instead of <asm/smp.h>
 ERROR: code indent should use tabs where possible

 total: 9 errors, 2 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_64.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 6383d50f82e..63c88e6ec02 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -14,10 +14,10 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/ftrace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/smp.h>
 #include <asm/io_apic.h>
 #include <asm/idle.h>
-#include <asm/smp.h>
 
 /*
  * Probabilistic stack overflow check:
@@ -142,18 +142,18 @@ extern void call_softirq(void);
 
 asmlinkage void do_softirq(void)
 {
- 	__u32 pending;
- 	unsigned long flags;
+	__u32 pending;
+	unsigned long flags;
 
- 	if (in_interrupt())
- 		return;
+	if (in_interrupt())
+		return;
 
- 	local_irq_save(flags);
- 	pending = local_softirq_pending();
- 	/* Switch to interrupt stack */
- 	if (pending) {
+	local_irq_save(flags);
+	pending = local_softirq_pending();
+	/* Switch to interrupt stack */
+	if (pending) {
 		call_softirq();
 		WARN_ON_ONCE(softirq_count());
 	}
- 	local_irq_restore(flags);
+	local_irq_restore(flags);
 }
-- 
cgit v1.2.3


From 69036c8cc29465f866f352199534be603acc79dd Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 16:26:52 +0530
Subject: x86: time_32.c fix style problems

Impact: cleanup

Fix:

 ERROR: space prohibited after that open parenthesis '('
 ERROR: space prohibited before that close parenthesis ')'

total: 4 errors, 0 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/time_32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 65309e4cb1c..3985cac0ed4 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -105,8 +105,8 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
 		high bit of the PPI port B (0x61).  Note that some PS/2s,
 		notably the 55SX, work fine if this is removed.  */
 
-		u8 irq_v = inb_p( 0x61 );	/* read the current state */
-		outb_p( irq_v|0x80, 0x61 );	/* reset the IRQ */
+		u8 irq_v = inb_p(0x61);		/* read the current state */
+		outb_p(irq_v | 0x80, 0x61);	/* reset the IRQ */
 	}
 #endif
 
-- 
cgit v1.2.3


From fe331184a33bf0458a04e8fb3d5c006e6520a871 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 16:28:22 +0530
Subject: x86: time_64.c fix style problems

Impact: cleanup

Fix:

 WARNING: Use #include <linux/nmi.h> instead of <asm/nmi.h>

total: 0 errors, 1 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/time_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 891e7a7c433..e6e695acd72 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -17,10 +17,10 @@
 #include <linux/module.h>
 #include <linux/time.h>
 #include <linux/mca.h>
+#include <linux/nmi.h>
 
 #include <asm/i8253.h>
 #include <asm/hpet.h>
-#include <asm/nmi.h>
 #include <asm/vgtod.h>
 #include <asm/time.h>
 #include <asm/timer.h>
-- 
cgit v1.2.3


From 5866e1b49dd2cdc9024e2269e7b1201a57bf9e19 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 16:29:32 +0530
Subject: x86: ioport.c fix style problems

Impact: cleanup

Fix:

 ERROR: "foo * bar" should be "foo *bar"

total: 2 errors, 0 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ioport.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 19191430274..b12208f4dfe 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -35,8 +35,8 @@ static void set_bitmap(unsigned long *bitmap, unsigned int base,
  */
 asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 {
-	struct thread_struct * t = &current->thread;
-	struct tss_struct * tss;
+	struct thread_struct *t = &current->thread;
+	struct tss_struct *tss;
 	unsigned int i, max_long, bytes, bytes_updated;
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
-- 
cgit v1.2.3


From c2d1cec1c77f7714672c1efeae075424c929e0d5 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Sun, 4 Jan 2009 05:18:03 -0800
Subject: x86: cleanup remaining cpumask_t ops in smpboot code

Impact: use new cpumask API to reduce memory and stack usage

Allocate the following local cpumasks based on the number of cpus that
are present.  References will use new cpumask API.  (Currently only
modified for x86_64, x86_32 continues to use the *_map variants.)

    cpu_callin_mask
    cpu_callout_mask
    cpu_initialized_mask
    cpu_sibling_setup_mask

Provide the following accessor functions:

    struct cpumask *cpu_sibling_mask(int cpu)
    struct cpumask *cpu_core_mask(int cpu)

Other changes are when setting or clearing the cpu online, possible
or present maps, use the accessor functions.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/smp.h     |  32 ++++++++++-
 arch/x86/kernel/cpu/common.c   |  26 +++++++--
 arch/x86/kernel/setup_percpu.c |  25 +++++++-
 arch/x86/kernel/smp.c          |  17 ++++--
 arch/x86/kernel/smpboot.c      | 128 +++++++++++++++++++++--------------------
 5 files changed, 152 insertions(+), 76 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 830b9fcb642..19953df61c5 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -18,9 +18,26 @@
 #include <asm/pda.h>
 #include <asm/thread_info.h>
 
+#ifdef CONFIG_X86_64
+
+extern cpumask_var_t cpu_callin_mask;
+extern cpumask_var_t cpu_callout_mask;
+extern cpumask_var_t cpu_initialized_mask;
+extern cpumask_var_t cpu_sibling_setup_mask;
+
+#else /* CONFIG_X86_32 */
+
+extern cpumask_t cpu_callin_map;
 extern cpumask_t cpu_callout_map;
 extern cpumask_t cpu_initialized;
-extern cpumask_t cpu_callin_map;
+extern cpumask_t cpu_sibling_setup_map;
+
+#define cpu_callin_mask		((struct cpumask *)&cpu_callin_map)
+#define cpu_callout_mask	((struct cpumask *)&cpu_callout_map)
+#define cpu_initialized_mask	((struct cpumask *)&cpu_initialized)
+#define cpu_sibling_setup_mask	((struct cpumask *)&cpu_sibling_setup_map)
+
+#endif /* CONFIG_X86_32 */
 
 extern void (*mtrr_hook)(void);
 extern void zap_low_mappings(void);
@@ -29,7 +46,6 @@ extern int __cpuinit get_local_pda(int cpu);
 
 extern int smp_num_siblings;
 extern unsigned int num_processors;
-extern cpumask_t cpu_initialized;
 
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
@@ -38,6 +54,16 @@ DECLARE_PER_CPU(u16, cpu_llc_id);
 DECLARE_PER_CPU(int, cpu_number);
 #endif
 
+static inline struct cpumask *cpu_sibling_mask(int cpu)
+{
+	return &per_cpu(cpu_sibling_map, cpu);
+}
+
+static inline struct cpumask *cpu_core_mask(int cpu)
+{
+	return &per_cpu(cpu_core_map, cpu);
+}
+
 DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
 DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
 
@@ -149,7 +175,7 @@ void smp_store_cpu_info(int id);
 /* We don't mark CPUs online until __cpu_up(), so we need another measure */
 static inline int num_booting_cpus(void)
 {
-	return cpus_weight(cpu_callout_map);
+	return cpumask_weight(cpu_callout_mask);
 }
 #else
 static inline void prefill_possible_map(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3f95a40f718..83492b1f93b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -40,6 +40,26 @@
 
 #include "cpu.h"
 
+#ifdef CONFIG_X86_64
+
+/* all of these masks are initialized in setup_cpu_local_masks() */
+cpumask_var_t cpu_callin_mask;
+cpumask_var_t cpu_callout_mask;
+cpumask_var_t cpu_initialized_mask;
+
+/* representing cpus for which sibling maps can be computed */
+cpumask_var_t cpu_sibling_setup_mask;
+
+#else /* CONFIG_X86_32 */
+
+cpumask_t cpu_callin_map;
+cpumask_t cpu_callout_map;
+cpumask_t cpu_initialized;
+cpumask_t cpu_sibling_setup_map;
+
+#endif /* CONFIG_X86_32 */
+
+
 static struct cpu_dev *this_cpu __cpuinitdata;
 
 #ifdef CONFIG_X86_64
@@ -856,8 +876,6 @@ static __init int setup_disablecpuid(char *arg)
 }
 __setup("clearcpuid=", setup_disablecpuid);
 
-cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
-
 #ifdef CONFIG_X86_64
 struct x8664_pda **_cpu_pda __read_mostly;
 EXPORT_SYMBOL(_cpu_pda);
@@ -976,7 +994,7 @@ void __cpuinit cpu_init(void)
 
 	me = current;
 
-	if (cpu_test_and_set(cpu, cpu_initialized))
+	if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
 		panic("CPU#%d already initialized!\n", cpu);
 
 	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
@@ -1085,7 +1103,7 @@ void __cpuinit cpu_init(void)
 	struct tss_struct *t = &per_cpu(init_tss, cpu);
 	struct thread_struct *thread = &curr->thread;
 
-	if (cpu_test_and_set(cpu, cpu_initialized)) {
+	if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
 		for (;;) local_irq_enable();
 	}
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index a4b619c3310..aa55764602b 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -131,7 +131,27 @@ static void __init setup_cpu_pda_map(void)
 	/* point to new pointer table */
 	_cpu_pda = new_cpu_pda;
 }
-#endif
+
+#endif /* CONFIG_SMP && CONFIG_X86_64 */
+
+#ifdef CONFIG_X86_64
+
+/* correctly size the local cpu masks */
+static void setup_cpu_local_masks(void)
+{
+	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
+	alloc_bootmem_cpumask_var(&cpu_callin_mask);
+	alloc_bootmem_cpumask_var(&cpu_callout_mask);
+	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
+}
+
+#else /* CONFIG_X86_32 */
+
+static inline void setup_cpu_local_masks(void)
+{
+}
+
+#endif /* CONFIG_X86_32 */
 
 /*
  * Great future plan:
@@ -187,6 +207,9 @@ void __init setup_per_cpu_areas(void)
 
 	/* Setup node to cpumask map */
 	setup_node_to_cpumask_map();
+
+	/* Setup cpu initialized, callin, callout masks */
+	setup_cpu_local_masks();
 }
 
 #endif
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index beea2649a24..182135ba1ea 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -128,16 +128,23 @@ void native_send_call_func_single_ipi(int cpu)
 
 void native_send_call_func_ipi(const struct cpumask *mask)
 {
-	cpumask_t allbutself;
+	cpumask_var_t allbutself;
 
-	allbutself = cpu_online_map;
-	cpu_clear(smp_processor_id(), allbutself);
+	if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) {
+		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+		return;
+	}
 
-	if (cpus_equal(*mask, allbutself) &&
-	    cpus_equal(cpu_online_map, cpu_callout_map))
+	cpumask_copy(allbutself, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), allbutself);
+
+	if (cpumask_equal(mask, allbutself) &&
+	    cpumask_equal(cpu_online_mask, cpu_callout_mask))
 		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 	else
 		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+
+	free_cpumask_var(allbutself);
 }
 
 /*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6bd4d9b7387..00e17e58948 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -102,9 +102,6 @@ EXPORT_SYMBOL(smp_num_siblings);
 /* Last level cache ID of each logical CPU */
 DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
 
-cpumask_t cpu_callin_map;
-cpumask_t cpu_callout_map;
-
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
@@ -120,9 +117,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 static atomic_t init_deasserted;
 
 
-/* representing cpus for which sibling maps can be computed */
-static cpumask_t cpu_sibling_setup_map;
-
 /* Set if we find a B stepping CPU */
 static int __cpuinitdata smp_b_stepping;
 
@@ -140,7 +134,7 @@ EXPORT_SYMBOL(cpu_to_node_map);
 static void map_cpu_to_node(int cpu, int node)
 {
 	printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
-	cpu_set(cpu, node_to_cpumask_map[node]);
+	cpumask_set_cpu(cpu, &node_to_cpumask_map[node]);
 	cpu_to_node_map[cpu] = node;
 }
 
@@ -151,7 +145,7 @@ static void unmap_cpu_to_node(int cpu)
 
 	printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
 	for (node = 0; node < MAX_NUMNODES; node++)
-		cpu_clear(cpu, node_to_cpumask_map[node]);
+		cpumask_clear_cpu(cpu, &node_to_cpumask_map[node]);
 	cpu_to_node_map[cpu] = 0;
 }
 #else /* !(CONFIG_NUMA && CONFIG_X86_32) */
@@ -209,7 +203,7 @@ static void __cpuinit smp_callin(void)
 	 */
 	phys_id = read_apic_id();
 	cpuid = smp_processor_id();
-	if (cpu_isset(cpuid, cpu_callin_map)) {
+	if (cpumask_test_cpu(cpuid, cpu_callin_mask)) {
 		panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
 					phys_id, cpuid);
 	}
@@ -231,7 +225,7 @@ static void __cpuinit smp_callin(void)
 		/*
 		 * Has the boot CPU finished it's STARTUP sequence?
 		 */
-		if (cpu_isset(cpuid, cpu_callout_map))
+		if (cpumask_test_cpu(cpuid, cpu_callout_mask))
 			break;
 		cpu_relax();
 	}
@@ -274,7 +268,7 @@ static void __cpuinit smp_callin(void)
 	/*
 	 * Allow the master to continue.
 	 */
-	cpu_set(cpuid, cpu_callin_map);
+	cpumask_set_cpu(cpuid, cpu_callin_mask);
 }
 
 static int __cpuinitdata unsafe_smp;
@@ -332,7 +326,7 @@ notrace static void __cpuinit start_secondary(void *unused)
 	ipi_call_lock();
 	lock_vector_lock();
 	__setup_vector_irq(smp_processor_id());
-	cpu_set(smp_processor_id(), cpu_online_map);
+	set_cpu_online(smp_processor_id(), true);
 	unlock_vector_lock();
 	ipi_call_unlock();
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
@@ -438,50 +432,52 @@ void __cpuinit set_cpu_sibling_map(int cpu)
 	int i;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-	cpu_set(cpu, cpu_sibling_setup_map);
+	cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
 
 	if (smp_num_siblings > 1) {
-		for_each_cpu_mask_nr(i, cpu_sibling_setup_map) {
-			if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
-			    c->cpu_core_id == cpu_data(i).cpu_core_id) {
-				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
-				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
-				cpu_set(i, per_cpu(cpu_core_map, cpu));
-				cpu_set(cpu, per_cpu(cpu_core_map, i));
-				cpu_set(i, c->llc_shared_map);
-				cpu_set(cpu, cpu_data(i).llc_shared_map);
+		for_each_cpu(i, cpu_sibling_setup_mask) {
+			struct cpuinfo_x86 *o = &cpu_data(i);
+
+			if (c->phys_proc_id == o->phys_proc_id &&
+			    c->cpu_core_id == o->cpu_core_id) {
+				cpumask_set_cpu(i, cpu_sibling_mask(cpu));
+				cpumask_set_cpu(cpu, cpu_sibling_mask(i));
+				cpumask_set_cpu(i, cpu_core_mask(cpu));
+				cpumask_set_cpu(cpu, cpu_core_mask(i));
+				cpumask_set_cpu(i, &c->llc_shared_map);
+				cpumask_set_cpu(cpu, &o->llc_shared_map);
 			}
 		}
 	} else {
-		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
+		cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
 	}
 
-	cpu_set(cpu, c->llc_shared_map);
+	cpumask_set_cpu(cpu, &c->llc_shared_map);
 
 	if (current_cpu_data.x86_max_cores == 1) {
-		per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
+		cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
 		c->booted_cores = 1;
 		return;
 	}
 
-	for_each_cpu_mask_nr(i, cpu_sibling_setup_map) {
+	for_each_cpu(i, cpu_sibling_setup_mask) {
 		if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
 		    per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
-			cpu_set(i, c->llc_shared_map);
-			cpu_set(cpu, cpu_data(i).llc_shared_map);
+			cpumask_set_cpu(i, &c->llc_shared_map);
+			cpumask_set_cpu(cpu, &cpu_data(i).llc_shared_map);
 		}
 		if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
-			cpu_set(i, per_cpu(cpu_core_map, cpu));
-			cpu_set(cpu, per_cpu(cpu_core_map, i));
+			cpumask_set_cpu(i, cpu_core_mask(cpu));
+			cpumask_set_cpu(cpu, cpu_core_mask(i));
 			/*
 			 *  Does this new cpu bringup a new core?
 			 */
-			if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
+			if (cpumask_weight(cpu_sibling_mask(cpu)) == 1) {
 				/*
 				 * for each core in package, increment
 				 * the booted_cores for this new cpu
 				 */
-				if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
+				if (cpumask_first(cpu_sibling_mask(i)) == i)
 					c->booted_cores++;
 				/*
 				 * increment the core count for all
@@ -504,7 +500,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
 	 * And for power savings, we return cpu_core_map
 	 */
 	if (sched_mc_power_savings || sched_smt_power_savings)
-		return &per_cpu(cpu_core_map, cpu);
+		return cpu_core_mask(cpu);
 	else
 		return &c->llc_shared_map;
 }
@@ -523,7 +519,7 @@ static void impress_friends(void)
 	 */
 	pr_debug("Before bogomips.\n");
 	for_each_possible_cpu(cpu)
-		if (cpu_isset(cpu, cpu_callout_map))
+		if (cpumask_test_cpu(cpu, cpu_callout_mask))
 			bogosum += cpu_data(cpu).loops_per_jiffy;
 	printk(KERN_INFO
 		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
@@ -904,19 +900,19 @@ do_rest:
 		 * allow APs to start initializing.
 		 */
 		pr_debug("Before Callout %d.\n", cpu);
-		cpu_set(cpu, cpu_callout_map);
+		cpumask_set_cpu(cpu, cpu_callout_mask);
 		pr_debug("After Callout %d.\n", cpu);
 
 		/*
 		 * Wait 5s total for a response
 		 */
 		for (timeout = 0; timeout < 50000; timeout++) {
-			if (cpu_isset(cpu, cpu_callin_map))
+			if (cpumask_test_cpu(cpu, cpu_callin_mask))
 				break;	/* It has booted */
 			udelay(100);
 		}
 
-		if (cpu_isset(cpu, cpu_callin_map)) {
+		if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
 			/* number CPUs logically, starting from 1 (BSP is 0) */
 			pr_debug("OK.\n");
 			printk(KERN_INFO "CPU%d: ", cpu);
@@ -941,9 +937,14 @@ restore_state:
 	if (boot_error) {
 		/* Try to put things back the way they were before ... */
 		numa_remove_cpu(cpu); /* was set by numa_add_cpu */
-		cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
-		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
-		cpu_clear(cpu, cpu_present_map);
+
+		/* was set by do_boot_cpu() */
+		cpumask_clear_cpu(cpu, cpu_callout_mask);
+
+		/* was set by cpu_init() */
+		cpumask_clear_cpu(cpu, cpu_initialized_mask);
+
+		set_cpu_present(cpu, false);
 		per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
 	}
 
@@ -977,7 +978,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 	/*
 	 * Already booted CPU?
 	 */
-	if (cpu_isset(cpu, cpu_callin_map)) {
+	if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
 		pr_debug("do_boot_cpu %d Already started\n", cpu);
 		return -ENOSYS;
 	}
@@ -1032,8 +1033,9 @@ int __cpuinit native_cpu_up(unsigned int cpu)
  */
 static __init void disable_smp(void)
 {
-	cpu_present_map = cpumask_of_cpu(0);
-	cpu_possible_map = cpumask_of_cpu(0);
+	/* use the read/write pointers to the present and possible maps */
+	cpumask_copy(&cpu_present_map, cpumask_of(0));
+	cpumask_copy(&cpu_possible_map, cpumask_of(0));
 	smpboot_clear_io_apic_irqs();
 
 	if (smp_found_config)
@@ -1041,8 +1043,8 @@ static __init void disable_smp(void)
 	else
 		physid_set_mask_of_physid(0, &phys_cpu_present_map);
 	map_cpu_to_logical_apicid();
-	cpu_set(0, per_cpu(cpu_sibling_map, 0));
-	cpu_set(0, per_cpu(cpu_core_map, 0));
+	cpumask_set_cpu(0, cpu_sibling_mask(0));
+	cpumask_set_cpu(0, cpu_core_mask(0));
 }
 
 /*
@@ -1064,14 +1066,14 @@ static int __init smp_sanity_check(unsigned max_cpus)
 		nr = 0;
 		for_each_present_cpu(cpu) {
 			if (nr >= 8)
-				cpu_clear(cpu, cpu_present_map);
+				set_cpu_present(cpu, false);
 			nr++;
 		}
 
 		nr = 0;
 		for_each_possible_cpu(cpu) {
 			if (nr >= 8)
-				cpu_clear(cpu, cpu_possible_map);
+				set_cpu_possible(cpu, false);
 			nr++;
 		}
 
@@ -1167,7 +1169,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	preempt_disable();
 	smp_cpu_index_default();
 	current_cpu_data = boot_cpu_data;
-	cpu_callin_map = cpumask_of_cpu(0);
+	cpumask_copy(cpu_callin_mask, cpumask_of(0));
 	mb();
 	/*
 	 * Setup boot CPU information
@@ -1242,8 +1244,8 @@ void __init native_smp_prepare_boot_cpu(void)
 	init_gdt(me);
 #endif
 	switch_to_new_gdt();
-	/* already set me in cpu_online_map in boot_cpu_init() */
-	cpu_set(me, cpu_callout_map);
+	/* already set me in cpu_online_mask in boot_cpu_init() */
+	cpumask_set_cpu(me, cpu_callout_mask);
 	per_cpu(cpu_state, me) = CPU_ONLINE;
 }
 
@@ -1311,7 +1313,7 @@ __init void prefill_possible_map(void)
 		possible, max_t(int, possible - num_processors, 0));
 
 	for (i = 0; i < possible; i++)
-		cpu_set(i, cpu_possible_map);
+		set_cpu_possible(i, true);
 
 	nr_cpu_ids = possible;
 }
@@ -1323,31 +1325,31 @@ static void remove_siblinginfo(int cpu)
 	int sibling;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-	for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) {
-		cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
+	for_each_cpu(sibling, cpu_core_mask(cpu)) {
+		cpumask_clear_cpu(cpu, cpu_core_mask(sibling));
 		/*/
 		 * last thread sibling in this cpu core going down
 		 */
-		if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
+		if (cpumask_weight(cpu_sibling_mask(cpu)) == 1)
 			cpu_data(sibling).booted_cores--;
 	}
 
-	for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu))
-		cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
-	cpus_clear(per_cpu(cpu_sibling_map, cpu));
-	cpus_clear(per_cpu(cpu_core_map, cpu));
+	for_each_cpu(sibling, cpu_sibling_mask(cpu))
+		cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling));
+	cpumask_clear(cpu_sibling_mask(cpu));
+	cpumask_clear(cpu_core_mask(cpu));
 	c->phys_proc_id = 0;
 	c->cpu_core_id = 0;
-	cpu_clear(cpu, cpu_sibling_setup_map);
+	cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
 }
 
 static void __ref remove_cpu_from_maps(int cpu)
 {
-	cpu_clear(cpu, cpu_online_map);
-	cpu_clear(cpu, cpu_callout_map);
-	cpu_clear(cpu, cpu_callin_map);
+	set_cpu_online(cpu, false);
+	cpumask_clear_cpu(cpu, cpu_callout_mask);
+	cpumask_clear_cpu(cpu, cpu_callin_mask);
 	/* was set by cpu_init() */
-	cpu_clear(cpu, cpu_initialized);
+	cpumask_clear_cpu(cpu, cpu_initialized_mask);
 	numa_remove_cpu(cpu);
 }
 
-- 
cgit v1.2.3


From 72ade5f9ca7b384a180ae6b42987f627acd9fe95 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 16:32:36 +0530
Subject: x86: irq_32.c fix style problems

Impact: cleanup

Fix:

 WARNING: Use #include <linux/uaccess.h> instead of <asm/uaccess.h>
 ERROR: "(foo*)" should be "(foo *)"
 ERROR: space required after that ',' (ctx:VxV)

total: 5 errors, 1 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq_32.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 9dc5588f336..74b9ff7341e 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -15,9 +15,9 @@
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
+#include <linux/uaccess.h>
 
 #include <asm/apic.h>
-#include <asm/uaccess.h>
 
 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
 EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -93,7 +93,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
 		return 0;
 
 	/* build the stack frame on the IRQ stack */
-	isp = (u32 *) ((char*)irqctx + sizeof(*irqctx));
+	isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
 	irqctx->tinfo.task = curctx->tinfo.task;
 	irqctx->tinfo.previous_esp = current_stack_pointer;
 
@@ -137,7 +137,7 @@ void __cpuinit irq_ctx_init(int cpu)
 
 	hardirq_ctx[cpu] = irqctx;
 
-	irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
+	irqctx = (union irq_ctx *) &softirq_stack[cpu*THREAD_SIZE];
 	irqctx->tinfo.task		= NULL;
 	irqctx->tinfo.exec_domain	= NULL;
 	irqctx->tinfo.cpu		= cpu;
@@ -147,7 +147,7 @@ void __cpuinit irq_ctx_init(int cpu)
 	softirq_ctx[cpu] = irqctx;
 
 	printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
-	       cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
+	       cpu, hardirq_ctx[cpu], softirq_ctx[cpu]);
 }
 
 void irq_ctx_exit(int cpu)
@@ -174,7 +174,7 @@ asmlinkage void do_softirq(void)
 		irqctx->tinfo.previous_esp = current_stack_pointer;
 
 		/* build the stack frame on the softirq stack */
-		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+		isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
 
 		call_on_stack(__do_softirq, isp);
 		/*
-- 
cgit v1.2.3


From 7bafaf306769348723471e133adaa57238770492 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 16:33:52 +0530
Subject: x86: i8259.c fix style problems

Impact: cleanup

Fix:

 WARNING: Use #include <linux/acpi.h> instead of <asm/acpi.h>
 WARNING: Use #include <linux/io.h> instead of <asm/io.h>
 WARNING: Use #include <linux/delay.h> instead of <asm/delay.h>
 ERROR: code indent should use tabs where possible

 total: 1 errors, 3 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/i8259.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 4b8a53d841f..11d5093eb28 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -11,15 +11,15 @@
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
 #include <linux/bitops.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/delay.h>
 
-#include <asm/acpi.h>
 #include <asm/atomic.h>
 #include <asm/system.h>
-#include <asm/io.h>
 #include <asm/timer.h>
 #include <asm/hw_irq.h>
 #include <asm/pgtable.h>
-#include <asm/delay.h>
 #include <asm/desc.h>
 #include <asm/apic.h>
 #include <asm/arch_hooks.h>
@@ -323,7 +323,7 @@ void init_8259A(int auto_eoi)
 	outb_pic(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
 
 	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64,
-	                       to 0x20-0x27 on i386 */
+	   to 0x20-0x27 on i386 */
 	outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
 
 	/* 8259A-1 (the master) has a slave on IR2 */
-- 
cgit v1.2.3


From aa09e6cdae4c60c3070176498abf99b81e1b40fe Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 16:35:17 +0530
Subject: x86: irqinit_32.c fix style problems

Impact: cleanup

Fix:

 WARNING: Use #include <linux/io.h> instead of <asm/io.h>
 WARNING: Use #include <linux/delay.h> instead of <asm/delay.h>
 ERROR: trailing whitespace
 WARNING: externs should be avoided in .c files
 ERROR: space required after that ',' (ctx:VxV)
 WARNING: space prohibited between function name and open parenthesis '('

 total: 2 errors, 4 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irqinit_32.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 84723295f88..1507ad4e674 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -9,18 +9,18 @@
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
 #include <linux/bitops.h>
+#include <linux/io.h>
+#include <linux/delay.h>
 
 #include <asm/atomic.h>
 #include <asm/system.h>
-#include <asm/io.h>
 #include <asm/timer.h>
 #include <asm/pgtable.h>
-#include <asm/delay.h>
 #include <asm/desc.h>
 #include <asm/apic.h>
 #include <asm/arch_hooks.h>
 #include <asm/i8259.h>
-
+#include <asm/traps.h>
 
 
 /*
@@ -34,12 +34,10 @@
  * leads to races. IBM designers who came up with it should
  * be shot.
  */
- 
 
 static irqreturn_t math_error_irq(int cpl, void *dev_id)
 {
-	extern void math_error(void __user *);
-	outb(0,0xF0);
+	outb(0, 0xF0);
 	if (ignore_fpu_irq || !boot_cpu_data.hard_math)
 		return IRQ_NONE;
 	math_error((void __user *)get_irq_regs()->ip);
@@ -56,7 +54,7 @@ static struct irqaction fpu_irq = {
 	.name = "fpu",
 };
 
-void __init init_ISA_irqs (void)
+void __init init_ISA_irqs(void)
 {
 	int i;
 
-- 
cgit v1.2.3


From f50cec36403674ca39663f44b6a491daa1731920 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 16:36:25 +0530
Subject: x86: irqinit_64.c fix style problems

Impact: cleanup, fix style problems

Fix:

 WARNING: Use #include <linux/acpi.h> instead of <asm/acpi.h>
 WARNING: Use #include <linux/io.h> instead of <asm/io.h>
 WARNING: Use #include <linux/delay.h> instead of <asm/delay.h>

 total: 0 errors, 3 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irqinit_64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 31ebfe38e96..cb875a63867 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -11,14 +11,14 @@
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
 #include <linux/bitops.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/delay.h>
 
-#include <asm/acpi.h>
 #include <asm/atomic.h>
 #include <asm/system.h>
-#include <asm/io.h>
 #include <asm/hw_irq.h>
 #include <asm/pgtable.h>
-#include <asm/delay.h>
 #include <asm/desc.h>
 #include <asm/apic.h>
 #include <asm/i8259.h>
-- 
cgit v1.2.3


From 8a87dd9a20ca895527b039e64e54adcbc64b256a Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 17:04:26 +0530
Subject: x86: setup_percpu.c fix style problems

Impact: cleanup

Fix:

 WARNING: Use #include <linux/smp.h> instead of <asm/smp.h>
 WARNING: Use #include <linux/percpu.h> instead of <asm/percpu.h>
 WARNING: Use #include <linux/topology.h> instead of <asm/topology.h>
 WARNING: EXPORT_SYMBOL(foo); should immediately follow its function/variable
 ERROR: spaces required around that '?' (ctx:VxW)
 ERROR: spaces required around that ':' (ctx:VxV)

 total: 2 errors, 4 warnings

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 0b63b08e753..c73d9a815b6 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -5,12 +5,11 @@
 #include <linux/percpu.h>
 #include <linux/kexec.h>
 #include <linux/crash_dump.h>
-#include <asm/smp.h>
-#include <asm/percpu.h>
+#include <linux/smp.h>
+#include <linux/topology.h>
 #include <asm/sections.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
-#include <asm/topology.h>
 #include <asm/mpspec.h>
 #include <asm/apicdef.h>
 #include <asm/highmem.h>
@@ -20,8 +19,8 @@ unsigned int num_processors;
 unsigned disabled_cpus __cpuinitdata;
 /* Processor that is doing the boot up */
 unsigned int boot_cpu_physical_apicid = -1U;
-unsigned int max_physical_apicid;
 EXPORT_SYMBOL(boot_cpu_physical_apicid);
+unsigned int max_physical_apicid;
 
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map;
@@ -289,8 +288,8 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
 
 	cpulist_scnprintf(buf, sizeof(buf), mask);
 	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
-		enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
- }
+		enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
+}
 
 void __cpuinit numa_add_cpu(int cpu)
 {
-- 
cgit v1.2.3


From dd399dcb4830cd0e9f51d014ec098421171876d4 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderlinux@gmail.com>
Date: Tue, 30 Dec 2008 21:10:13 +0530
Subject: x86: irqinit_64.c init_ISA_irqs should be static

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irqinit_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index cb875a63867..da481a1e3f3 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -81,7 +81,7 @@ int vector_used_by_percpu_irq(unsigned int vector)
 	return 0;
 }
 
-void __init init_ISA_irqs(void)
+static void __init init_ISA_irqs(void)
 {
 	int i;
 
-- 
cgit v1.2.3


From 5df82c7d18f2dd51aeb763f2a3c1d7714a28218c Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 21:54:39 +0530
Subject: x86: rename all fields of mpc_iopic mpc_X to X

Impact: cleanup, solve 80 columns wrap problems

It would be cleaner to rename all the mpc->mpc_X fields to
mpc->X - that alone would give 4 characters per usage site.
(we already know that it's an 'mpc' entity -
no need to duplicate that in the field too)

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec_def.h | 10 +++++-----
 arch/x86/kernel/mpparse.c         | 26 +++++++++++++-------------
 2 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index ba8ed0cf658..f805682d80d 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -109,11 +109,11 @@ struct mpc_bus {
 #define MPC_APIC_USABLE		0x01
 
 struct mpc_ioapic {
-	unsigned char mpc_type;
-	unsigned char mpc_apicid;
-	unsigned char mpc_apicver;
-	unsigned char mpc_flags;
-	unsigned int mpc_apicaddr;
+	unsigned char type;
+	unsigned char apicid;
+	unsigned char apicver;
+	unsigned char flags;
+	unsigned int apicaddr;
 };
 
 struct mpc_intsrc {
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 1ccb2537e3f..f36d9daaf12 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -135,20 +135,20 @@ static int bad_ioapic(unsigned long address)
 
 static void __init MP_ioapic_info(struct mpc_ioapic *m)
 {
-	if (!(m->mpc_flags & MPC_APIC_USABLE))
+	if (!(m->flags & MPC_APIC_USABLE))
 		return;
 
 	printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
-	       m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+	       m->apicid, m->apicver, m->apicaddr);
 
-	if (bad_ioapic(m->mpc_apicaddr))
+	if (bad_ioapic(m->apicaddr))
 		return;
 
-	mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr;
-	mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid;
-	mp_ioapics[nr_ioapics].mp_type = m->mpc_type;
-	mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver;
-	mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags;
+	mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr;
+	mp_ioapics[nr_ioapics].mp_apicid = m->apicid;
+	mp_ioapics[nr_ioapics].mp_type = m->type;
+	mp_ioapics[nr_ioapics].mp_apicver = m->apicver;
+	mp_ioapics[nr_ioapics].mp_flags = m->flags;
 	nr_ioapics++;
 }
 
@@ -513,11 +513,11 @@ static void __init construct_ioapic_table(int mpc_default_type)
 		MP_bus_info(&bus);
 	}
 
-	ioapic.mpc_type = MP_IOAPIC;
-	ioapic.mpc_apicid = 2;
-	ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
-	ioapic.mpc_flags = MPC_APIC_USABLE;
-	ioapic.mpc_apicaddr = 0xFEC00000;
+	ioapic.type = MP_IOAPIC;
+	ioapic.apicid = 2;
+	ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+	ioapic.flags = MPC_APIC_USABLE;
+	ioapic.apicaddr = 0xFEC00000;
 	MP_ioapic_info(&ioapic);
 
 	/*
-- 
cgit v1.2.3


From b5ced7cdb018b8728ddcfb175c26f30cc185cf66 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 21:55:53 +0530
Subject: x86: rename all fields of mpc_lintsrc mpc_X to X

Impact: cleanup, solve 80 columns wrap problems

It would be cleaner to rename all the mpc->mpc_X fields to
mpc->X - that alone would give 4 characters per usage site.
(we already know that it's an 'mpc' entity -
no need to duplicate that in the field too)

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec_def.h | 14 +++++++-------
 arch/x86/kernel/mpparse.c         | 19 +++++++++----------
 2 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index f805682d80d..c8bbb3212bd 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -140,13 +140,13 @@ enum mp_irq_source_types {
 #define MP_APIC_ALL	0xFF
 
 struct mpc_lintsrc {
-	unsigned char mpc_type;
-	unsigned char mpc_irqtype;
-	unsigned short mpc_irqflag;
-	unsigned char mpc_srcbusid;
-	unsigned char mpc_srcbusirq;
-	unsigned char mpc_destapic;
-	unsigned char mpc_destapiclint;
+	unsigned char type;
+	unsigned char irqtype;
+	unsigned short irqflag;
+	unsigned char srcbusid;
+	unsigned char srcbusirq;
+	unsigned char destapic;
+	unsigned char destapiclint;
 };
 
 #define MPC_OEM_SIGNATURE "_OEM"
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index f36d9daaf12..a5c728be41c 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -237,9 +237,8 @@ static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
 {
 	apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
 		" IRQ %02x, APIC ID %x, APIC LINT %02x\n",
-		m->mpc_irqtype, m->mpc_irqflag & 3,
-		(m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
-		m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+		m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbusid,
+		m->srcbusirq, m->destapic, m->destapiclint);
 }
 
 /*
@@ -560,14 +559,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 
 	construct_ioapic_table(mpc_default_type);
 
-	lintsrc.mpc_type = MP_LINTSRC;
-	lintsrc.mpc_irqflag = 0;	/* conforming */
-	lintsrc.mpc_srcbusid = 0;
-	lintsrc.mpc_srcbusirq = 0;
-	lintsrc.mpc_destapic = MP_APIC_ALL;
+	lintsrc.type = MP_LINTSRC;
+	lintsrc.irqflag = 0;		/* conforming */
+	lintsrc.srcbusid = 0;
+	lintsrc.srcbusirq = 0;
+	lintsrc.destapic = MP_APIC_ALL;
 	for (i = 0; i < 2; i++) {
-		lintsrc.mpc_irqtype = linttypes[i];
-		lintsrc.mpc_destapiclint = i;
+		lintsrc.irqtype = linttypes[i];
+		lintsrc.destapiclint = i;
 		MP_lintsrc_info(&lintsrc);
 	}
 }
-- 
cgit v1.2.3


From e253b396b145311477b23c176e868e5af6f79174 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 21:56:44 +0530
Subject: x86: rename all fields of mpc_intsrc mpc_X to X

Impact: cleanup, solve 80 columns wrap problems

It would be cleaner to rename all the mpc->mpc_X fields to
mpc->X - that alone would give 4 characters per usage site.
(we already know that it's an 'mpc' entity -
no need to duplicate that in the field too)

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec_def.h | 14 +++----
 arch/x86/kernel/mpparse.c         | 79 +++++++++++++++++++--------------------
 2 files changed, 46 insertions(+), 47 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index c8bbb3212bd..e24e1bc9c15 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -117,13 +117,13 @@ struct mpc_ioapic {
 };
 
 struct mpc_intsrc {
-	unsigned char mpc_type;
-	unsigned char mpc_irqtype;
-	unsigned short mpc_irqflag;
-	unsigned char mpc_srcbus;
-	unsigned char mpc_srcbusirq;
-	unsigned char mpc_dstapic;
-	unsigned char mpc_dstirq;
+	unsigned char type;
+	unsigned char irqtype;
+	unsigned short irqflag;
+	unsigned char srcbus;
+	unsigned char srcbusirq;
+	unsigned char dstapic;
+	unsigned char dstirq;
 };
 
 enum mp_irq_source_types {
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index a5c728be41c..a804f107db8 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -156,9 +156,8 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m)
 {
 	apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
 		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
-		m->mpc_irqtype, m->mpc_irqflag & 3,
-		(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
-		m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+		m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
+		m->srcbusirq, m->dstapic, m->dstirq);
 }
 
 static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
@@ -173,43 +172,43 @@ static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
 static void __init assign_to_mp_irq(struct mpc_intsrc *m,
 				    struct mp_config_intsrc *mp_irq)
 {
-	mp_irq->mp_dstapic = m->mpc_dstapic;
-	mp_irq->mp_type = m->mpc_type;
-	mp_irq->mp_irqtype = m->mpc_irqtype;
-	mp_irq->mp_irqflag = m->mpc_irqflag;
-	mp_irq->mp_srcbus = m->mpc_srcbus;
-	mp_irq->mp_srcbusirq = m->mpc_srcbusirq;
-	mp_irq->mp_dstirq = m->mpc_dstirq;
+	mp_irq->mp_dstapic = m->dstapic;
+	mp_irq->mp_type = m->type;
+	mp_irq->mp_irqtype = m->irqtype;
+	mp_irq->mp_irqflag = m->irqflag;
+	mp_irq->mp_srcbus = m->srcbus;
+	mp_irq->mp_srcbusirq = m->srcbusirq;
+	mp_irq->mp_dstirq = m->dstirq;
 }
 
 static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
 					struct mpc_intsrc *m)
 {
-	m->mpc_dstapic = mp_irq->mp_dstapic;
-	m->mpc_type = mp_irq->mp_type;
-	m->mpc_irqtype = mp_irq->mp_irqtype;
-	m->mpc_irqflag = mp_irq->mp_irqflag;
-	m->mpc_srcbus = mp_irq->mp_srcbus;
-	m->mpc_srcbusirq = mp_irq->mp_srcbusirq;
-	m->mpc_dstirq = mp_irq->mp_dstirq;
+	m->dstapic = mp_irq->mp_dstapic;
+	m->type = mp_irq->mp_type;
+	m->irqtype = mp_irq->mp_irqtype;
+	m->irqflag = mp_irq->mp_irqflag;
+	m->srcbus = mp_irq->mp_srcbus;
+	m->srcbusirq = mp_irq->mp_srcbusirq;
+	m->dstirq = mp_irq->mp_dstirq;
 }
 
 static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
 					struct mpc_intsrc *m)
 {
-	if (mp_irq->mp_dstapic != m->mpc_dstapic)
+	if (mp_irq->mp_dstapic != m->dstapic)
 		return 1;
-	if (mp_irq->mp_type != m->mpc_type)
+	if (mp_irq->mp_type != m->type)
 		return 2;
-	if (mp_irq->mp_irqtype != m->mpc_irqtype)
+	if (mp_irq->mp_irqtype != m->irqtype)
 		return 3;
-	if (mp_irq->mp_irqflag != m->mpc_irqflag)
+	if (mp_irq->mp_irqflag != m->irqflag)
 		return 4;
-	if (mp_irq->mp_srcbus != m->mpc_srcbus)
+	if (mp_irq->mp_srcbus != m->srcbus)
 		return 5;
-	if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq)
+	if (mp_irq->mp_srcbusirq != m->srcbusirq)
 		return 6;
-	if (mp_irq->mp_dstirq != m->mpc_dstirq)
+	if (mp_irq->mp_dstirq != m->dstirq)
 		return 7;
 
 	return 0;
@@ -415,12 +414,12 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
 	int i;
 	int ELCR_fallback = 0;
 
-	intsrc.mpc_type = MP_INTSRC;
-	intsrc.mpc_irqflag = 0;	/* conforming */
-	intsrc.mpc_srcbus = 0;
-	intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid;
+	intsrc.type = MP_INTSRC;
+	intsrc.irqflag = 0;	/* conforming */
+	intsrc.srcbus = 0;
+	intsrc.dstapic = mp_ioapics[0].mp_apicid;
 
-	intsrc.mpc_irqtype = mp_INT;
+	intsrc.irqtype = mp_INT;
 
 	/*
 	 *  If true, we have an ISA/PCI system with no IRQ entries
@@ -463,19 +462,19 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
 			 *  irqflag field (level sensitive, active high polarity).
 			 */
 			if (ELCR_trigger(i))
-				intsrc.mpc_irqflag = 13;
+				intsrc.irqflag = 13;
 			else
-				intsrc.mpc_irqflag = 0;
+				intsrc.irqflag = 0;
 		}
 
-		intsrc.mpc_srcbusirq = i;
-		intsrc.mpc_dstirq = i ? i : 2;	/* IRQ0 to INTIN2 */
+		intsrc.srcbusirq = i;
+		intsrc.dstirq = i ? i : 2;	/* IRQ0 to INTIN2 */
 		MP_intsrc_info(&intsrc);
 	}
 
-	intsrc.mpc_irqtype = mp_ExtINT;
-	intsrc.mpc_srcbusirq = 0;
-	intsrc.mpc_dstirq = 0;	/* 8259A to INTIN0 */
+	intsrc.irqtype = mp_ExtINT;
+	intsrc.srcbusirq = 0;
+	intsrc.dstirq = 0;	/* 8259A to INTIN0 */
 	MP_intsrc_info(&intsrc);
 }
 
@@ -801,10 +800,10 @@ static int  __init get_MP_intsrc_index(struct mpc_intsrc *m)
 {
 	int i;
 
-	if (m->mpc_irqtype != mp_INT)
+	if (m->irqtype != mp_INT)
 		return 0;
 
-	if (m->mpc_irqflag != 0x0f)
+	if (m->irqflag != 0x0f)
 		return 0;
 
 	/* not legacy */
@@ -816,9 +815,9 @@ static int  __init get_MP_intsrc_index(struct mpc_intsrc *m)
 		if (mp_irqs[i].mp_irqflag != 0x0f)
 			continue;
 
-		if (mp_irqs[i].mp_srcbus != m->mpc_srcbus)
+		if (mp_irqs[i].mp_srcbus != m->srcbus)
 			continue;
-		if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq)
+		if (mp_irqs[i].mp_srcbusirq != m->srcbusirq)
 			continue;
 		if (irq_used[i]) {
 			/* already claimed */
-- 
cgit v1.2.3


From c4563826b72b0d12500260093179a660e79bf412 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 21:58:25 +0530
Subject: x86: rename all fields of mpc_cpu mpc_X to X

Impact: cleanup, solve 80 columns wrap problems

It would be cleaner to rename all the mpc->mpc_X fields to
mpc->X - that alone would give 4 characters per usage site.
(we already know that it's an 'mpc' entity -
no need to duplicate that in the field too)

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec_def.h | 14 +++++++-------
 arch/x86/kernel/mpparse.c         | 28 ++++++++++++++--------------
 arch/x86/kernel/numaq_32.c        |  9 ++++-----
 arch/x86/kernel/visws_quirks.c    | 28 +++++++++++++---------------
 4 files changed, 38 insertions(+), 41 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index e24e1bc9c15..78057aaba25 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -71,13 +71,13 @@ struct mpc_table {
 #define CPU_FAMILY_MASK		0x0F00
 
 struct mpc_cpu {
-	unsigned char mpc_type;
-	unsigned char mpc_apicid;	/* Local APIC number */
-	unsigned char mpc_apicver;	/* Its versions */
-	unsigned char mpc_cpuflag;
-	unsigned int mpc_cpufeature;
-	unsigned int mpc_featureflag;	/* CPUID feature value */
-	unsigned int mpc_reserved[2];
+	unsigned char type;
+	unsigned char apicid;		/* Local APIC number */
+	unsigned char apicver;		/* Its versions */
+	unsigned char cpuflag;
+	unsigned int cpufeature;
+	unsigned int featureflag;	/* CPUID feature value */
+	unsigned int reserved[2];
 };
 
 struct mpc_bus {
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index a804f107db8..a8b9ba9709f 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -54,7 +54,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
 	int apicid;
 	char *bootup_cpu = "";
 
-	if (!(m->mpc_cpuflag & CPU_ENABLED)) {
+	if (!(m->cpuflag & CPU_ENABLED)) {
 		disabled_cpus++;
 		return;
 	}
@@ -62,15 +62,15 @@ static void __init MP_processor_info(struct mpc_cpu *m)
 	if (x86_quirks->mpc_apic_id)
 		apicid = x86_quirks->mpc_apic_id(m);
 	else
-		apicid = m->mpc_apicid;
+		apicid = m->apicid;
 
-	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+	if (m->cpuflag & CPU_BOOTPROCESSOR) {
 		bootup_cpu = " (Bootup-CPU)";
-		boot_cpu_physical_apicid = m->mpc_apicid;
+		boot_cpu_physical_apicid = m->apicid;
 	}
 
-	printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
-	generic_processor_info(apicid, m->mpc_apicver);
+	printk(KERN_INFO "Processor #%d%s\n", m->apicid, bootup_cpu);
+	generic_processor_info(apicid, m->apicver);
 }
 
 #ifdef CONFIG_X86_IO_APIC
@@ -542,17 +542,17 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 	/*
 	 * 2 CPUs, numbered 0 & 1.
 	 */
-	processor.mpc_type = MP_PROCESSOR;
+	processor.type = MP_PROCESSOR;
 	/* Either an integrated APIC or a discrete 82489DX. */
-	processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
-	processor.mpc_cpuflag = CPU_ENABLED;
-	processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+	processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+	processor.cpuflag = CPU_ENABLED;
+	processor.cpufeature = (boot_cpu_data.x86 << 8) |
 	    (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
-	processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
-	processor.mpc_reserved[0] = 0;
-	processor.mpc_reserved[1] = 0;
+	processor.featureflag = boot_cpu_data.x86_capability[0];
+	processor.reserved[0] = 0;
+	processor.reserved[1] = 0;
 	for (i = 0; i < 2; i++) {
-		processor.mpc_apicid = i;
+		processor.apicid = i;
 		MP_processor_info(&processor);
 	}
 
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index aa1e5a06855..a53831c64c5 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -120,13 +120,12 @@ static inline int generate_logical_apicid(int quad, int phys_apicid)
 static int mpc_apic_id(struct mpc_cpu *m)
 {
 	int quad = translation_table[mpc_record]->trans_quad;
-	int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
+	int logical_apicid = generate_logical_apicid(quad, m->apicid);
 
 	printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
-	       m->mpc_apicid,
-	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-	       m->mpc_apicver, quad, logical_apicid);
+	       m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->apicver, quad, logical_apicid);
 	return logical_apicid;
 }
 
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index ca12afa6347..d801d06af06 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -181,28 +181,26 @@ static void __init MP_processor_info(struct mpc_cpu *m)
 	int ver, logical_apicid;
 	physid_mask_t apic_cpus;
 
-	if (!(m->mpc_cpuflag & CPU_ENABLED))
+	if (!(m->cpuflag & CPU_ENABLED))
 		return;
 
-	logical_apicid = m->mpc_apicid;
+	logical_apicid = m->apicid;
 	printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
-	       m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
-	       m->mpc_apicid,
-	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-	       m->mpc_apicver);
+	       m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
+	       m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver);
 
-	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR)
-		boot_cpu_physical_apicid = m->mpc_apicid;
+	if (m->cpuflag & CPU_BOOTPROCESSOR)
+		boot_cpu_physical_apicid = m->apicid;
 
-	ver = m->mpc_apicver;
-	if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) {
+	ver = m->apicver;
+	if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
 		printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
-			m->mpc_apicid, MAX_APICS);
+			m->apicid, MAX_APICS);
 		return;
 	}
 
-	apic_cpus = apicid_to_cpu_present(m->mpc_apicid);
+	apic_cpus = apicid_to_cpu_present(m->apicid);
 	physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
 	/*
 	 * Validate version
@@ -210,10 +208,10 @@ static void __init MP_processor_info(struct mpc_cpu *m)
 	if (ver == 0x0) {
 		printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
 			"fixing up to 0x10. (tell your hw vendor)\n",
-			m->mpc_apicid);
+			m->apicid);
 		ver = 0x10;
 	}
-	apic_version[m->mpc_apicid] = ver;
+	apic_version[m->apicid] = ver;
 }
 
 static int __init visws_find_smp_config(unsigned int reserve)
-- 
cgit v1.2.3


From d4c715fad5604f25ea1e5c90f280cb818851c10b Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 21:59:26 +0530
Subject: x86: rename all fields of mpc_bus mpc_X to X

Impact: cleanup, solve 80 columns wrap problems

It would be cleaner to rename all the mpc->mpc_X fields to
mpc->X - that alone would give 4 characters per usage site.
(we already know that it's an 'mpc' entity -
no need to duplicate that in the field too)

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec_def.h |  6 +++---
 arch/x86/kernel/mpparse.c         | 40 +++++++++++++++++++--------------------
 arch/x86/kernel/numaq_32.c        |  8 ++++----
 3 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 78057aaba25..60ba5439e9f 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -81,9 +81,9 @@ struct mpc_cpu {
 };
 
 struct mpc_bus {
-	unsigned char mpc_type;
-	unsigned char mpc_busid;
-	unsigned char mpc_bustype[6];
+	unsigned char type;
+	unsigned char busid;
+	unsigned char bustype[6];
 };
 
 /* List of Bus Type string values, Intel MP Spec. */
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index a8b9ba9709f..d780ddddbc3 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -77,39 +77,39 @@ static void __init MP_processor_info(struct mpc_cpu *m)
 static void __init MP_bus_info(struct mpc_bus *m)
 {
 	char str[7];
-	memcpy(str, m->mpc_bustype, 6);
+	memcpy(str, m->bustype, 6);
 	str[6] = 0;
 
 	if (x86_quirks->mpc_oem_bus_info)
 		x86_quirks->mpc_oem_bus_info(m, str);
 	else
-		apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str);
+		apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
 
 #if MAX_MP_BUSSES < 256
-	if (m->mpc_busid >= MAX_MP_BUSSES) {
+	if (m->busid >= MAX_MP_BUSSES) {
 		printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
 		       " is too large, max. supported is %d\n",
-		       m->mpc_busid, str, MAX_MP_BUSSES - 1);
+		       m->busid, str, MAX_MP_BUSSES - 1);
 		return;
 	}
 #endif
 
 	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
-		set_bit(m->mpc_busid, mp_bus_not_pci);
+		set_bit(m->busid, mp_bus_not_pci);
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+		mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
 #endif
 	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
 		if (x86_quirks->mpc_oem_pci_bus)
 			x86_quirks->mpc_oem_pci_bus(m);
 
-		clear_bit(m->mpc_busid, mp_bus_not_pci);
+		clear_bit(m->busid, mp_bus_not_pci);
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
-		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+		mp_bus_id_to_type[m->busid] = MP_BUS_PCI;
 	} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
-		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+		mp_bus_id_to_type[m->busid] = MP_BUS_EISA;
 	} else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) {
-		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+		mp_bus_id_to_type[m->busid] = MP_BUS_MCA;
 #endif
 	} else
 		printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
@@ -484,8 +484,8 @@ static void __init construct_ioapic_table(int mpc_default_type)
 	struct mpc_ioapic ioapic;
 	struct mpc_bus bus;
 
-	bus.mpc_type = MP_BUS;
-	bus.mpc_busid = 0;
+	bus.type = MP_BUS;
+	bus.busid = 0;
 	switch (mpc_default_type) {
 	default:
 		printk(KERN_ERR "???\nUnknown standard configuration %d\n",
@@ -493,21 +493,21 @@ static void __init construct_ioapic_table(int mpc_default_type)
 		/* fall through */
 	case 1:
 	case 5:
-		memcpy(bus.mpc_bustype, "ISA   ", 6);
+		memcpy(bus.bustype, "ISA   ", 6);
 		break;
 	case 2:
 	case 6:
 	case 3:
-		memcpy(bus.mpc_bustype, "EISA  ", 6);
+		memcpy(bus.bustype, "EISA  ", 6);
 		break;
 	case 4:
 	case 7:
-		memcpy(bus.mpc_bustype, "MCA   ", 6);
+		memcpy(bus.bustype, "MCA   ", 6);
 	}
 	MP_bus_info(&bus);
 	if (mpc_default_type > 4) {
-		bus.mpc_busid = 1;
-		memcpy(bus.mpc_bustype, "PCI   ", 6);
+		bus.busid = 1;
+		memcpy(bus.bustype, "PCI   ", 6);
 		MP_bus_info(&bus);
 	}
 
@@ -656,9 +656,9 @@ static void __init __get_smp_config(unsigned int early)
 			       "using default mptable. "
 			       "(tell your hw vendor)\n");
 
-			bus.mpc_type = MP_BUS;
-			bus.mpc_busid = 0;
-			memcpy(bus.mpc_bustype, "ISA   ", 6);
+			bus.type = MP_BUS;
+			bus.busid = 0;
+			memcpy(bus.bustype, "ISA   ", 6);
 			MP_bus_info(&bus);
 
 			construct_default_ioirq_mptable(0);
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index a53831c64c5..8242fef6d0e 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -139,10 +139,10 @@ static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
 	int quad = translation_table[mpc_record]->trans_quad;
 	int local = translation_table[mpc_record]->trans_local;
 
-	mp_bus_id_to_node[m->mpc_busid] = quad;
-	mp_bus_id_to_local[m->mpc_busid] = local;
+	mp_bus_id_to_node[m->busid] = quad;
+	mp_bus_id_to_local[m->busid] = local;
 	printk(KERN_INFO "Bus #%d is %s (node %d)\n",
-	       m->mpc_busid, name, quad);
+	       m->busid, name, quad);
 }
 
 int quad_local_to_mp_bus_id [NR_CPUS/4][4];
@@ -153,7 +153,7 @@ static void mpc_oem_pci_bus(struct mpc_bus *m)
 	int quad = translation_table[mpc_record]->trans_quad;
 	int local = translation_table[mpc_record]->trans_local;
 
-	quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
+	quad_local_to_mp_bus_id[quad][local] = m->busid;
 }
 
 static void __init MP_translation_info(struct mpc_config_translation *m)
-- 
cgit v1.2.3


From a1d0272a4676460787bcd7352414e0286763968d Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 22:00:46 +0530
Subject: x86: rename all fields of mpc_oemtable oem_X to X

Impact: cleanup, solve 80 columns wrap problems

It would be cleaner to rename all the mpc->oem_X fields to
mpc->X - that alone would give 4 characters per usage site.
(we already know that it's an 'oem' entity -
no need to duplicate that in the field too)

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec_def.h | 10 +++++-----
 arch/x86/kernel/numaq_32.c        | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 60ba5439e9f..6ea62ea2dc2 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -152,11 +152,11 @@ struct mpc_lintsrc {
 #define MPC_OEM_SIGNATURE "_OEM"
 
 struct mpc_oemtable {
-	char oem_signature[4];
-	unsigned short oem_length;	/* Size of table */
-	char  oem_rev;			/* 0x01 */
-	char  oem_checksum;
-	char  mpc_oem[8];
+	char signature[4];
+	unsigned short length;		/* Size of table */
+	char  rev;			/* 0x01 */
+	char  checksum;
+	char  mpc[8];
 };
 
 /*
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 8242fef6d0e..f2191d4f271 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -194,18 +194,18 @@ static void __init smp_read_mpc_oem(struct mpc_oemtable *oemtable,
 	mpc_record = 0;
 	printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
 	       oemtable);
-	if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
+	if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
 		printk(KERN_WARNING
 		       "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
-		       oemtable->oem_signature[0], oemtable->oem_signature[1],
-		       oemtable->oem_signature[2], oemtable->oem_signature[3]);
+		       oemtable->signature[0], oemtable->signature[1],
+		       oemtable->signature[2], oemtable->signature[3]);
 		return;
 	}
-	if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
+	if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
 		printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
 		return;
 	}
-	while (count < oemtable->oem_length) {
+	while (count < oemtable->length) {
 		switch (*oemptr) {
 		case MP_TRANSLATION:
 			{
-- 
cgit v1.2.3


From 6c65da50bd4589b6b627d4842b8e6705a0ccaab5 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Sun, 4 Jan 2009 22:22:56 +0530
Subject: x86: rename all fields of mpc_table mpc_X to X

Impact: cleanup, solve 80 columns wrap problems

It would be cleaner to rename all the mpc->mpc_X fields to
mpc->X - that alone would give 4 characters per usage site.
(we already know that it's an 'mpc' entity -
no need to duplicate that in the field too)

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec_def.h | 20 ++++++-------
 arch/x86/kernel/mpparse.c         | 59 +++++++++++++++++++--------------------
 arch/x86/mach-generic/es7000.c    |  4 +--
 3 files changed, 41 insertions(+), 42 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 6ea62ea2dc2..59568bc4767 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -40,16 +40,16 @@ struct intel_mp_floating {
 #define MPC_SIGNATURE "PCMP"
 
 struct mpc_table {
-	char mpc_signature[4];
-	unsigned short mpc_length;	/* Size of table */
-	char mpc_spec;			/* 0x01 */
-	char mpc_checksum;
-	char mpc_oem[8];
-	char mpc_productid[12];
-	unsigned int mpc_oemptr;	/* 0 if not present */
-	unsigned short mpc_oemsize;	/* 0 if not present */
-	unsigned short mpc_oemcount;
-	unsigned int mpc_lapic;	/* APIC address */
+	char signature[4];
+	unsigned short length;		/* Size of table */
+	char spec;			/* 0x01 */
+	char checksum;
+	char oem[8];
+	char productid[12];
+	unsigned int oemptr;		/* 0 if not present */
+	unsigned short oemsize;		/* 0 if not present */
+	unsigned short oemcount;
+	unsigned int lapic;		/* APIC address */
 	unsigned int reserved;
 };
 
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index d780ddddbc3..e6a9724fefc 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -247,35 +247,35 @@ static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
 static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
 {
 
-	if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
+	if (memcmp(mpc->signature, MPC_SIGNATURE, 4)) {
 		printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
-		       mpc->mpc_signature[0], mpc->mpc_signature[1],
-		       mpc->mpc_signature[2], mpc->mpc_signature[3]);
+		       mpc->signature[0], mpc->signature[1],
+		       mpc->signature[2], mpc->signature[3]);
 		return 0;
 	}
-	if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) {
+	if (mpf_checksum((unsigned char *)mpc, mpc->length)) {
 		printk(KERN_ERR "MPTABLE: checksum error!\n");
 		return 0;
 	}
-	if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) {
+	if (mpc->spec != 0x01 && mpc->spec != 0x04) {
 		printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
-		       mpc->mpc_spec);
+		       mpc->spec);
 		return 0;
 	}
-	if (!mpc->mpc_lapic) {
+	if (!mpc->lapic) {
 		printk(KERN_ERR "MPTABLE: null local APIC address!\n");
 		return 0;
 	}
-	memcpy(oem, mpc->mpc_oem, 8);
+	memcpy(oem, mpc->oem, 8);
 	oem[8] = 0;
 	printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem);
 
-	memcpy(str, mpc->mpc_productid, 12);
+	memcpy(str, mpc->productid, 12);
 	str[12] = 0;
 
 	printk(KERN_INFO "MPTABLE: Product ID: %s\n", str);
 
-	printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
+	printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->lapic);
 
 	return 1;
 }
@@ -305,14 +305,14 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 #endif
 	/* save the local APIC address, it might be non-default */
 	if (!acpi_lapic)
-		mp_lapic_addr = mpc->mpc_lapic;
+		mp_lapic_addr = mpc->lapic;
 
 	if (early)
 		return 1;
 
-	if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) {
-		struct mpc_oemtable *oem_table = (void *)(long)mpc->mpc_oemptr;
-		x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize);
+	if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) {
+		struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr;
+		x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize);
 	}
 
 	/*
@@ -321,7 +321,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 	if (x86_quirks->mpc_record)
 		*x86_quirks->mpc_record = 0;
 
-	while (count < mpc->mpc_length) {
+	while (count < mpc->length) {
 		switch (*mpt) {
 		case MP_PROCESSOR:
 			{
@@ -378,8 +378,8 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 			printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
 			printk(KERN_ERR "type %x\n", *mpt);
 			print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_ADDRESS, 16,
-					1, mpc, mpc->mpc_length, 1);
-			count = mpc->mpc_length;
+					1, mpc, mpc->length, 1);
+			count = mpc->length;
 			break;
 		}
 		if (x86_quirks->mpc_record)
@@ -848,8 +848,8 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 	int count = sizeof(*mpc);
 	unsigned char *mpt = ((unsigned char *)mpc) + count;
 
-	printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length);
-	while (count < mpc->mpc_length) {
+	printk(KERN_INFO "mpc_length %x\n", mpc->length);
+	while (count < mpc->length) {
 		switch (*mpt) {
 		case MP_PROCESSOR:
 			{
@@ -912,7 +912,7 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 			printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
 			printk(KERN_ERR "type %x\n", *mpt);
 			print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_ADDRESS, 16,
-					1, mpc, mpc->mpc_length, 1);
+					1, mpc, mpc->length, 1);
 			goto out;
 		}
 	}
@@ -947,7 +947,7 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 				}
 			}
 			assign_to_mpc_intsrc(&mp_irqs[i], m);
-			mpc->mpc_length = count;
+			mpc->length = count;
 			mpt += sizeof(struct mpc_intsrc);
 		}
 		print_mp_irq_info(&mp_irqs[i]);
@@ -955,9 +955,8 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 #endif
 out:
 	/* update checksum */
-	mpc->mpc_checksum = 0;
-	mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc,
-					   mpc->mpc_length);
+	mpc->checksum = 0;
+	mpc->checksum -= mpf_checksum((unsigned char *)mpc, mpc->length);
 
 	return 0;
 }
@@ -1029,7 +1028,7 @@ static int __init update_mp_table(void)
 	printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
 	printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
 
-	if (mpc_new_phys && mpc->mpc_length > mpc_new_length) {
+	if (mpc_new_phys && mpc->length > mpc_new_length) {
 		mpc_new_phys = 0;
 		printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n",
 			 mpc_new_length);
@@ -1038,10 +1037,10 @@ static int __init update_mp_table(void)
 	if (!mpc_new_phys) {
 		unsigned char old, new;
 		/* check if we can change the postion */
-		mpc->mpc_checksum = 0;
-		old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
-		mpc->mpc_checksum = 0xff;
-		new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
+		mpc->checksum = 0;
+		old = mpf_checksum((unsigned char *)mpc, mpc->length);
+		mpc->checksum = 0xff;
+		new = mpf_checksum((unsigned char *)mpc, mpc->length);
 		if (old == new) {
 			printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n");
 			return 0;
@@ -1050,7 +1049,7 @@ static int __init update_mp_table(void)
 	} else {
 		mpf->mpf_physptr = mpc_new_phys;
 		mpc_new = phys_to_virt(mpc_new_phys);
-		memcpy(mpc_new, mpc, mpc->mpc_length);
+		memcpy(mpc_new, mpc, mpc->length);
 		mpc = mpc_new;
 		/* check if we can modify that */
 		if (mpc_new_phys - mpf->mpf_physptr) {
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 39243a796e1..c2ded144802 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -46,9 +46,9 @@ static void __init enable_apic_mode(void)
 static __init int
 mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
 {
-	if (mpc->mpc_oemptr) {
+	if (mpc->oemptr) {
 		struct mpc_oemtable *oem_table =
-			(struct mpc_oemtable *)mpc->mpc_oemptr;
+			(struct mpc_oemtable *)mpc->oemptr;
 		if (!strncmp(oem, "UNISYS", 6))
 			return parse_unisys_oem((char *)oem_table);
 	}
-- 
cgit v1.2.3


From 87c6fe26186d734e932426cc8ab9fd8cf9aeed94 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Mon, 5 Jan 2009 14:08:04 +0000
Subject: x86: update Alan Cox's email addresses

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apm_32.c             | 4 ++--
 arch/x86/kernel/cpu/mcheck/mce_32.c  | 2 +-
 arch/x86/kernel/cpu/mcheck/p5.c      | 2 +-
 arch/x86/kernel/cpu/mcheck/p6.c      | 2 +-
 arch/x86/kernel/cpu/mcheck/winchip.c | 2 +-
 arch/x86/kernel/mpparse.c            | 2 +-
 arch/x86/kernel/smp.c                | 2 +-
 arch/x86/kernel/smpboot.c            | 2 +-
 8 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 3a26525a3f3..98807bb095a 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -160,9 +160,9 @@
  *         Work around byte swap bug in one of the Vaio's BIOS's
  *         (Marc Boucher <marc@mbsi.ca>).
  *         Exposed the disable flag to dmi so that we can handle known
- *         broken APM (Alan Cox <alan@redhat.com>).
+ *         broken APM (Alan Cox <alan@lxorguk.ukuu.org.uk>).
  *   1.14ac: If the BIOS says "I slowed the CPU down" then don't spin
- *         calling it - instead idle. (Alan Cox <alan@redhat.com>)
+ *         calling it - instead idle. (Alan Cox <alan@lxorguk.ukuu.org.uk>)
  *         If an APM idle fails log it and idle sensibly
  *   1.15: Don't queue events to clients who open the device O_WRONLY.
  *         Don't expect replies from clients who open the device O_RDONLY.
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index 0ebf3fc6a61..dfaebce3633 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -1,6 +1,6 @@
 /*
  * mce.c - x86 Machine Check Exception Reporting
- * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@redhat.com>
+ * (c) 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>, Dave Jones <davej@redhat.com>
  */
 
 #include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index bfa5817afdd..c9f77ea69ed 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -1,6 +1,6 @@
 /*
  * P5 specific Machine Check Exception Reporting
- * (C) Copyright 2002 Alan Cox <alan@redhat.com>
+ * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
  */
 
 #include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index 62efc9c2b3a..2ac52d7b434 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -1,6 +1,6 @@
 /*
  * P6 specific Machine Check Exception Reporting
- * (C) Copyright 2002 Alan Cox <alan@redhat.com>
+ * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
  */
 
 #include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index f2be3e190c6..2a043d89811 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -1,6 +1,6 @@
 /*
  * IDT Winchip specific Machine Check Exception Reporting
- * (C) Copyright 2002 Alan Cox <alan@redhat.com>
+ * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
  */
 
 #include <linux/init.h>
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index c5c5b8df1db..cead7e27585 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -2,7 +2,7 @@
  *	Intel Multiprocessor Specification 1.1 and 1.4
  *	compliant MP-table parsing routines.
  *
- *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *	(c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
  *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
  *      (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
  */
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index beea2649a24..cf1f075886b 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -1,7 +1,7 @@
 /*
  *	Intel SMP support routines.
  *
- *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *	(c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
  *	(c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
  *      (c) 2002,2003 Andi Kleen, SuSE Labs.
  *
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 31869bf5fab..c628f9178cd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1,7 +1,7 @@
 /*
  *	x86 SMP booting functions
  *
- *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *	(c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
  *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
  *	Copyright 2001 Andi Kleen, SuSE Labs.
  *
-- 
cgit v1.2.3


From 5641f1fde074651ce2488e93944cf05dedd9bf74 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Mon, 5 Jan 2009 17:19:02 +0000
Subject: X86_DEBUGCTLMSR won't work on uml

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig.cpu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 85a78575956..8078955845a 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -408,7 +408,7 @@ config X86_MINIMUM_CPU_FAMILY
 
 config X86_DEBUGCTLMSR
 	def_bool y
-	depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
+	depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) && !UML
 
 menuconfig PROCESSOR_SELECT
 	bool "Supported processor vendors" if EMBEDDED
-- 
cgit v1.2.3


From 5cb0535f1713b51610f2881b17d0fe3656114364 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sun, 4 Jan 2009 05:18:05 -0800
Subject: cpumask: replace CPUMASK_ALLOC etc with cpumask_var_t

Impact: cleanup

There's only one user, and it's a fairly easy conversion.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Dave Jones <davej@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index f0ea6fa2f53..d2cc4991cba 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -458,11 +458,6 @@ static int centrino_verify (struct cpufreq_policy *policy)
  *
  * Sets a new CPUFreq policy.
  */
-struct allmasks {
-	cpumask_t		saved_mask;
-	cpumask_t		covered_cpus;
-};
-
 static int centrino_target (struct cpufreq_policy *policy,
 			    unsigned int target_freq,
 			    unsigned int relation)
@@ -472,12 +467,15 @@ static int centrino_target (struct cpufreq_policy *policy,
 	struct cpufreq_freqs	freqs;
 	int			retval = 0;
 	unsigned int		j, k, first_cpu, tmp;
-	CPUMASK_ALLOC(allmasks);
-	CPUMASK_PTR(saved_mask, allmasks);
-	CPUMASK_PTR(covered_cpus, allmasks);
+	cpumask_var_t saved_mask, covered_cpus;
 
-	if (unlikely(allmasks == NULL))
+	if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL)))
 		return -ENOMEM;
+	if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
+		free_cpumask_var(saved_mask);
+		return -ENOMEM;
+	}
+	cpumask_copy(saved_mask, &current->cpus_allowed);
 
 	if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
 		retval = -ENODEV;
@@ -493,9 +491,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 		goto out;
 	}
 
-	*saved_mask = current->cpus_allowed;
 	first_cpu = 1;
-	cpus_clear(*covered_cpus);
 	for_each_cpu_mask_nr(j, policy->cpus) {
 		const cpumask_t *mask;
 
@@ -605,7 +601,8 @@ migrate_end:
 	preempt_enable();
 	set_cpus_allowed_ptr(current, saved_mask);
 out:
-	CPUMASK_FREE(allmasks);
+	free_cpumask_var(saved_mask);
+	free_cpumask_var(covered_cpus);
 	return retval;
 }
 
-- 
cgit v1.2.3


From 835481d9bcd65720b473db6b38746a74a3964218 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sun, 4 Jan 2009 05:18:06 -0800
Subject: cpumask: convert struct cpufreq_policy to cpumask_var_t

Impact: use new cpumask API to reduce memory usage

This is part of an effort to reduce structure sizes for machines
configured with large NR_CPUS.  cpumask_t gets replaced by
cpumask_var_t, which is either struct cpumask[1] (small NR_CPUS) or
struct cpumask * (large NR_CPUS).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Dave Jones <davej@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c       | 10 +++++-----
 arch/x86/kernel/cpu/cpufreq/p4-clockmod.c        |  8 ++++----
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c        |  6 +++---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.h        |  2 +-
 arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 14 +++++++-------
 arch/x86/kernel/cpu/cpufreq/speedstep-ich.c      | 18 +++++++++---------
 6 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 28102ad1a36..0b31939862d 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -411,7 +411,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 
 #ifdef CONFIG_HOTPLUG_CPU
 	/* cpufreq holds the hotplug lock, so we are safe from here on */
-	cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
+	cpumask_and(&online_policy_cpus, cpu_online_mask, policy->cpus);
 #else
 	online_policy_cpus = policy->cpus;
 #endif
@@ -626,15 +626,15 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	 */
 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
 	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
-		cpumask_copy(&policy->cpus, perf->shared_cpu_map);
+		cpumask_copy(policy->cpus, perf->shared_cpu_map);
 	}
-	cpumask_copy(&policy->related_cpus, perf->shared_cpu_map);
+	cpumask_copy(policy->related_cpus, perf->shared_cpu_map);
 
 #ifdef CONFIG_SMP
 	dmi_check_system(sw_any_bug_dmi_table);
-	if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
+	if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) {
 		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
-		policy->cpus = per_cpu(cpu_core_map, cpu);
+		cpumask_copy(policy->cpus, cpu_core_mask(cpu));
 	}
 #endif
 
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index beea4466b06..b585e04cbc9 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -122,7 +122,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
 		return 0;
 
 	/* notifiers */
-	for_each_cpu_mask_nr(i, policy->cpus) {
+	for_each_cpu(i, policy->cpus) {
 		freqs.cpu = i;
 		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 	}
@@ -130,11 +130,11 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
 	/* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
 	 * Developer's Manual, Volume 3
 	 */
-	for_each_cpu_mask_nr(i, policy->cpus)
+	for_each_cpu(i, policy->cpus)
 		cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
 
 	/* notifiers */
-	for_each_cpu_mask_nr(i, policy->cpus) {
+	for_each_cpu(i, policy->cpus) {
 		freqs.cpu = i;
 		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 	}
@@ -203,7 +203,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 	unsigned int i;
 
 #ifdef CONFIG_SMP
-	policy->cpus = per_cpu(cpu_sibling_map, policy->cpu);
+	cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
 #endif
 
 	/* Errata workaround */
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index c3c9adbaa26..5c28b37dea1 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1199,10 +1199,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	set_cpus_allowed_ptr(current, &oldmask);
 
 	if (cpu_family == CPU_HW_PSTATE)
-		pol->cpus = cpumask_of_cpu(pol->cpu);
+		cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
 	else
-		pol->cpus = per_cpu(cpu_core_map, pol->cpu);
-	data->available_cores = &(pol->cpus);
+		cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu));
+	data->available_cores = pol->cpus;
 
 	/* Take a crude guess here.
 	 * That guess was in microseconds, so multiply with 1000 */
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index 65cfb5d7f77..8ecc75b6c7c 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -53,7 +53,7 @@ struct powernow_k8_data {
 	/* we need to keep track of associated cores, but let cpufreq
 	 * handle hotplug events - so just point at cpufreq pol->cpus
 	 * structure */
-	cpumask_t *available_cores;
+	struct cpumask *available_cores;
 };
 
 
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index d2cc4991cba..f08998278a3 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -492,8 +492,8 @@ static int centrino_target (struct cpufreq_policy *policy,
 	}
 
 	first_cpu = 1;
-	for_each_cpu_mask_nr(j, policy->cpus) {
-		const cpumask_t *mask;
+	for_each_cpu(j, policy->cpus) {
+		const struct cpumask *mask;
 
 		/* cpufreq holds the hotplug lock, so we are safe here */
 		if (!cpu_online(j))
@@ -504,9 +504,9 @@ static int centrino_target (struct cpufreq_policy *policy,
 		 * Make sure we are running on CPU that wants to change freq
 		 */
 		if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
-			mask = &policy->cpus;
+			mask = policy->cpus;
 		else
-			mask = &cpumask_of_cpu(j);
+			mask = cpumask_of(j);
 
 		set_cpus_allowed_ptr(current, mask);
 		preempt_disable();
@@ -538,7 +538,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 			dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
 				target_freq, freqs.old, freqs.new, msr);
 
-			for_each_cpu_mask_nr(k, policy->cpus) {
+			for_each_cpu(k, policy->cpus) {
 				if (!cpu_online(k))
 					continue;
 				freqs.cpu = k;
@@ -563,7 +563,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 		preempt_enable();
 	}
 
-	for_each_cpu_mask_nr(k, policy->cpus) {
+	for_each_cpu(k, policy->cpus) {
 		if (!cpu_online(k))
 			continue;
 		freqs.cpu = k;
@@ -586,7 +586,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 		tmp = freqs.new;
 		freqs.new = freqs.old;
 		freqs.old = tmp;
-		for_each_cpu_mask_nr(j, policy->cpus) {
+		for_each_cpu(j, policy->cpus) {
 			if (!cpu_online(j))
 				continue;
 			cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 04d0376b64b..dedc1e98f16 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -229,7 +229,7 @@ static unsigned int speedstep_detect_chipset (void)
 	return 0;
 }
 
-static unsigned int _speedstep_get(const cpumask_t *cpus)
+static unsigned int _speedstep_get(const struct cpumask *cpus)
 {
 	unsigned int speed;
 	cpumask_t cpus_allowed;
@@ -244,7 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus)
 
 static unsigned int speedstep_get(unsigned int cpu)
 {
-	return _speedstep_get(&cpumask_of_cpu(cpu));
+	return _speedstep_get(cpumask_of(cpu));
 }
 
 /**
@@ -267,7 +267,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
 	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
 		return -EINVAL;
 
-	freqs.old = _speedstep_get(&policy->cpus);
+	freqs.old = _speedstep_get(policy->cpus);
 	freqs.new = speedstep_freqs[newstate].frequency;
 	freqs.cpu = policy->cpu;
 
@@ -279,20 +279,20 @@ static int speedstep_target (struct cpufreq_policy *policy,
 
 	cpus_allowed = current->cpus_allowed;
 
-	for_each_cpu_mask_nr(i, policy->cpus) {
+	for_each_cpu(i, policy->cpus) {
 		freqs.cpu = i;
 		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 	}
 
 	/* switch to physical CPU where state is to be changed */
-	set_cpus_allowed_ptr(current, &policy->cpus);
+	set_cpus_allowed_ptr(current, policy->cpus);
 
 	speedstep_set_state(newstate);
 
 	/* allow to be run on all CPUs */
 	set_cpus_allowed_ptr(current, &cpus_allowed);
 
-	for_each_cpu_mask_nr(i, policy->cpus) {
+	for_each_cpu(i, policy->cpus) {
 		freqs.cpu = i;
 		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 	}
@@ -322,11 +322,11 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 
 	/* only run on CPU to be set, or on its sibling */
 #ifdef CONFIG_SMP
-	policy->cpus = per_cpu(cpu_sibling_map, policy->cpu);
+	cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
 #endif
 
 	cpus_allowed = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, &policy->cpus);
+	set_cpus_allowed_ptr(current, policy->cpus);
 
 	/* detect low and high frequency and transition latency */
 	result = speedstep_get_freqs(speedstep_processor,
@@ -339,7 +339,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 		return result;
 
 	/* get current speed setting */
-	speed = _speedstep_get(&policy->cpus);
+	speed = _speedstep_get(policy->cpus);
 	if (!speed)
 		return -EIO;
 
-- 
cgit v1.2.3


From c74f31c035f46a095a0c72f80246a65b314205a5 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Sun, 4 Jan 2009 05:18:07 -0800
Subject: cpumask: use work_on_cpu in acpi/cstate.c

Impact: use new cpumask API to reduce stack usage

Replace the saving of current->cpus_allowed and set_cpus_allowed_ptr() with
a work_on_cpu function for the acpi_processor_ffh_cstate_probe() function.

Basically splits acpi_processor_ffh_cstate_probe() into two functions, the
other being acpi_processor_ffh_cstate_probe_cpu which is the work function
run on the designated cpu.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/acpi/cstate.c | 70 +++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 33 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index c2502eb9aa8..cf5ec586f22 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -66,35 +66,15 @@ static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
 
 #define NATIVE_CSTATE_BEYOND_HALT	(2)
 
-int acpi_processor_ffh_cstate_probe(unsigned int cpu,
-		struct acpi_processor_cx *cx, struct acpi_power_register *reg)
+static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
 {
-	struct cstate_entry *percpu_entry;
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-	cpumask_t saved_mask;
-	int retval;
+	struct acpi_processor_cx *cx = _cx;
+	long retval;
 	unsigned int eax, ebx, ecx, edx;
 	unsigned int edx_part;
 	unsigned int cstate_type; /* C-state type and not ACPI C-state type */
 	unsigned int num_cstate_subtype;
 
-	if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF )
-		return -1;
-
-	if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT)
-		return -1;
-
-	percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
-	percpu_entry->states[cx->index].eax = 0;
-	percpu_entry->states[cx->index].ecx = 0;
-
-	/* Make sure we are running on right CPU */
-	saved_mask = current->cpus_allowed;
-	retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-	if (retval)
-		return -1;
-
 	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
 
 	/* Check whether this particular cx_type (in CST) is supported or not */
@@ -114,21 +94,45 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 		retval = -1;
 		goto out;
 	}
-	percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK;
-
-	/* Use the hint in CST */
-	percpu_entry->states[cx->index].eax = cx->address;
 
 	if (!mwait_supported[cstate_type]) {
 		mwait_supported[cstate_type] = 1;
-		printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d "
-		       "state\n", cx->type);
+		printk(KERN_DEBUG
+			"Monitor-Mwait will be used to enter C-%d "
+			"state\n", cx->type);
 	}
-	snprintf(cx->desc, ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
-		 cx->address);
-
+	snprintf(cx->desc,
+			ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
+			cx->address);
 out:
-	set_cpus_allowed_ptr(current, &saved_mask);
+	return retval;
+}
+
+int acpi_processor_ffh_cstate_probe(unsigned int cpu,
+		struct acpi_processor_cx *cx, struct acpi_power_register *reg)
+{
+	struct cstate_entry *percpu_entry;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	long retval;
+
+	if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF)
+		return -1;
+
+	if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT)
+		return -1;
+
+	percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
+	percpu_entry->states[cx->index].eax = 0;
+	percpu_entry->states[cx->index].ecx = 0;
+
+	/* Make sure we are running on right CPU */
+
+	retval = work_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx);
+	if (retval == 0) {
+		/* Use the hint in CST */
+		percpu_entry->states[cx->index].eax = cx->address;
+		percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK;
+	}
 	return retval;
 }
 EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
-- 
cgit v1.2.3


From 4d8bb5374924fc736ce275bfd1619b87a2106860 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Sun, 4 Jan 2009 05:18:08 -0800
Subject: cpumask: use cpumask_var_t in acpi-cpufreq.c

Impact: cleanup, reduce stack usage, use new cpumask API.

Replace the cpumask_t in struct drv_cmd with a cpumask_var_t.  Remove unneeded
online_policy_cpus cpumask_t in acpi_cpufreq_target.  Update refs to use
new cpumask API.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 58 +++++++++++++++---------------
 1 file changed, 29 insertions(+), 29 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 0b31939862d..fb594170dc5 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -145,7 +145,7 @@ typedef union {
 
 struct drv_cmd {
 	unsigned int type;
-	cpumask_t mask;
+	cpumask_var_t mask;
 	drv_addr_union addr;
 	u32 val;
 };
@@ -193,7 +193,7 @@ static void drv_read(struct drv_cmd *cmd)
 	cpumask_t saved_mask = current->cpus_allowed;
 	cmd->val = 0;
 
-	set_cpus_allowed_ptr(current, &cmd->mask);
+	set_cpus_allowed_ptr(current, cmd->mask);
 	do_drv_read(cmd);
 	set_cpus_allowed_ptr(current, &saved_mask);
 }
@@ -203,8 +203,8 @@ static void drv_write(struct drv_cmd *cmd)
 	cpumask_t saved_mask = current->cpus_allowed;
 	unsigned int i;
 
-	for_each_cpu_mask_nr(i, cmd->mask) {
-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
+	for_each_cpu(i, cmd->mask) {
+		set_cpus_allowed_ptr(current, cpumask_of(i));
 		do_drv_write(cmd);
 	}
 
@@ -212,22 +212,22 @@ static void drv_write(struct drv_cmd *cmd)
 	return;
 }
 
-static u32 get_cur_val(const cpumask_t *mask)
+static u32 get_cur_val(const struct cpumask *mask)
 {
 	struct acpi_processor_performance *perf;
 	struct drv_cmd cmd;
 
-	if (unlikely(cpus_empty(*mask)))
+	if (unlikely(cpumask_empty(mask)))
 		return 0;
 
-	switch (per_cpu(drv_data, first_cpu(*mask))->cpu_feature) {
+	switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
 		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
 		cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
 		break;
 	case SYSTEM_IO_CAPABLE:
 		cmd.type = SYSTEM_IO_CAPABLE;
-		perf = per_cpu(drv_data, first_cpu(*mask))->acpi_data;
+		perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data;
 		cmd.addr.io.port = perf->control_register.address;
 		cmd.addr.io.bit_width = perf->control_register.bit_width;
 		break;
@@ -235,7 +235,7 @@ static u32 get_cur_val(const cpumask_t *mask)
 		return 0;
 	}
 
-	cmd.mask = *mask;
+	cpumask_copy(cmd.mask, mask);
 
 	drv_read(&cmd);
 
@@ -386,7 +386,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
 	struct acpi_processor_performance *perf;
 	struct cpufreq_freqs freqs;
-	cpumask_t online_policy_cpus;
 	struct drv_cmd cmd;
 	unsigned int next_state = 0; /* Index into freq_table */
 	unsigned int next_perf_state = 0; /* Index into perf table */
@@ -401,20 +400,18 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 		return -ENODEV;
 	}
 
+	if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL)))
+		return -ENOMEM;
+
 	perf = data->acpi_data;
 	result = cpufreq_frequency_table_target(policy,
 						data->freq_table,
 						target_freq,
 						relation, &next_state);
-	if (unlikely(result))
-		return -ENODEV;
-
-#ifdef CONFIG_HOTPLUG_CPU
-	/* cpufreq holds the hotplug lock, so we are safe from here on */
-	cpumask_and(&online_policy_cpus, cpu_online_mask, policy->cpus);
-#else
-	online_policy_cpus = policy->cpus;
-#endif
+	if (unlikely(result)) {
+		result = -ENODEV;
+		goto out;
+	}
 
 	next_perf_state = data->freq_table[next_state].index;
 	if (perf->state == next_perf_state) {
@@ -425,7 +422,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 		} else {
 			dprintk("Already at target state (P%d)\n",
 				next_perf_state);
-			return 0;
+			goto out;
 		}
 	}
 
@@ -444,19 +441,19 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 		cmd.val = (u32) perf->states[next_perf_state].control;
 		break;
 	default:
-		return -ENODEV;
+		result = -ENODEV;
+		goto out;
 	}
 
-	cpus_clear(cmd.mask);
-
+	/* cpufreq holds the hotplug lock, so we are safe from here on */
 	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
-		cmd.mask = online_policy_cpus;
+		cpumask_and(cmd.mask, cpu_online_mask, policy->cpus);
 	else
-		cpu_set(policy->cpu, cmd.mask);
+		cpumask_copy(cmd.mask, cpumask_of(policy->cpu));
 
 	freqs.old = perf->states[perf->state].core_frequency * 1000;
 	freqs.new = data->freq_table[next_state].frequency;
-	for_each_cpu_mask_nr(i, cmd.mask) {
+	for_each_cpu(i, cmd.mask) {
 		freqs.cpu = i;
 		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 	}
@@ -464,19 +461,22 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	drv_write(&cmd);
 
 	if (acpi_pstate_strict) {
-		if (!check_freqs(&cmd.mask, freqs.new, data)) {
+		if (!check_freqs(cmd.mask, freqs.new, data)) {
 			dprintk("acpi_cpufreq_target failed (%d)\n",
 				policy->cpu);
-			return -EAGAIN;
+			result = -EAGAIN;
+			goto out;
 		}
 	}
 
-	for_each_cpu_mask_nr(i, cmd.mask) {
+	for_each_cpu(i, cmd.mask) {
 		freqs.cpu = i;
 		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 	}
 	perf->state = next_perf_state;
 
+out:
+	free_cpumask_var(cmd.mask);
 	return result;
 }
 
-- 
cgit v1.2.3


From 7503bfbae89eba07b46441a5d1594647f6b8ab7d Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Sun, 4 Jan 2009 05:18:09 -0800
Subject: cpumask: use work_on_cpu in acpi-cpufreq.c for drv_read and drv_write

Impact: use new cpumask API to reduce stack usage

Replace the saving of current->cpus_allowed and set_cpus_allowed_ptr() with
a work_on_cpu function for drv_read() and drv_write().

Basically converts do_drv_{read,write} into "work_on_cpu" functions that
are now called by drv_read and drv_write.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index fb594170dc5..4e4f2b04dac 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -150,8 +150,9 @@ struct drv_cmd {
 	u32 val;
 };
 
-static void do_drv_read(struct drv_cmd *cmd)
+static long do_drv_read(void *_cmd)
 {
+	struct drv_cmd *cmd = _cmd;
 	u32 h;
 
 	switch (cmd->type) {
@@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd)
 	default:
 		break;
 	}
+	return 0;
 }
 
-static void do_drv_write(struct drv_cmd *cmd)
+static long do_drv_write(void *_cmd)
 {
+	struct drv_cmd *cmd = _cmd;
 	u32 lo, hi;
 
 	switch (cmd->type) {
@@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd)
 	default:
 		break;
 	}
+	return 0;
 }
 
 static void drv_read(struct drv_cmd *cmd)
 {
-	cpumask_t saved_mask = current->cpus_allowed;
 	cmd->val = 0;
 
-	set_cpus_allowed_ptr(current, cmd->mask);
-	do_drv_read(cmd);
-	set_cpus_allowed_ptr(current, &saved_mask);
+	work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd);
 }
 
 static void drv_write(struct drv_cmd *cmd)
 {
-	cpumask_t saved_mask = current->cpus_allowed;
 	unsigned int i;
 
 	for_each_cpu(i, cmd->mask) {
-		set_cpus_allowed_ptr(current, cpumask_of(i));
-		do_drv_write(cmd);
+		work_on_cpu(i, do_drv_write, cmd);
 	}
-
-	set_cpus_allowed_ptr(current, &saved_mask);
-	return;
 }
 
 static u32 get_cur_val(const struct cpumask *mask)
@@ -235,10 +231,15 @@ static u32 get_cur_val(const struct cpumask *mask)
 		return 0;
 	}
 
+	if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL)))
+		return 0;
+
 	cpumask_copy(cmd.mask, mask);
 
 	drv_read(&cmd);
 
+	free_cpumask_var(cmd.mask);
+
 	dprintk("get_cur_val = %u\n", cmd.val);
 
 	return cmd.val;
-- 
cgit v1.2.3


From e39ad415ac15116df213dfa2aa2a4f1b0857af9c Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Sun, 4 Jan 2009 05:18:10 -0800
Subject: cpumask: use work_on_cpu in acpi-cpufreq.c for
 read_measured_perf_ctrs

Impact: use new cpumask API to reduce stack usage

Replace the saving of current->cpus_allowed and set_cpus_allowed_ptr() with
a work_on_cpu function for read_measured_perf_ctrs().

Basically splits off the work function from get_measured_perf which is
run on the designated cpu.  Moves definition of struct perf_cur out of
function local namespace, and is used as the work function argument.
References in get_measured_perf use values in the perf_cur struct.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 83 ++++++++++++++++--------------
 1 file changed, 43 insertions(+), 40 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 4e4f2b04dac..06fcd8f9323 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -245,6 +245,30 @@ static u32 get_cur_val(const struct cpumask *mask)
 	return cmd.val;
 }
 
+struct perf_cur {
+	union {
+		struct {
+			u32 lo;
+			u32 hi;
+		} split;
+		u64 whole;
+	} aperf_cur, mperf_cur;
+};
+
+
+static long read_measured_perf_ctrs(void *_cur)
+{
+	struct perf_cur *cur = _cur;
+
+	rdmsr(MSR_IA32_APERF, cur->aperf_cur.split.lo, cur->aperf_cur.split.hi);
+	rdmsr(MSR_IA32_MPERF, cur->mperf_cur.split.lo, cur->mperf_cur.split.hi);
+
+	wrmsr(MSR_IA32_APERF, 0, 0);
+	wrmsr(MSR_IA32_MPERF, 0, 0);
+
+	return 0;
+}
+
 /*
  * Return the measured active (C0) frequency on this CPU since last call
  * to this function.
@@ -261,31 +285,12 @@ static u32 get_cur_val(const struct cpumask *mask)
 static unsigned int get_measured_perf(struct cpufreq_policy *policy,
 				      unsigned int cpu)
 {
-	union {
-		struct {
-			u32 lo;
-			u32 hi;
-		} split;
-		u64 whole;
-	} aperf_cur, mperf_cur;
-
-	cpumask_t saved_mask;
+	struct perf_cur cur;
 	unsigned int perf_percent;
 	unsigned int retval;
 
-	saved_mask = current->cpus_allowed;
-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-	if (get_cpu() != cpu) {
-		/* We were not able to run on requested processor */
-		put_cpu();
+	if (!work_on_cpu(cpu, read_measured_perf_ctrs, &cur))
 		return 0;
-	}
-
-	rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
-	rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);
-
-	wrmsr(MSR_IA32_APERF, 0,0);
-	wrmsr(MSR_IA32_MPERF, 0,0);
 
 #ifdef __i386__
 	/*
@@ -293,37 +298,39 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
 	 * Get an approximate value. Return failure in case we cannot get
 	 * an approximate value.
 	 */
-	if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) {
+	if (unlikely(cur.aperf_cur.split.hi || cur.mperf_cur.split.hi)) {
 		int shift_count;
 		u32 h;
 
-		h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi);
+		h = max_t(u32, cur.aperf_cur.split.hi, cur.mperf_cur.split.hi);
 		shift_count = fls(h);
 
-		aperf_cur.whole >>= shift_count;
-		mperf_cur.whole >>= shift_count;
+		cur.aperf_cur.whole >>= shift_count;
+		cur.mperf_cur.whole >>= shift_count;
 	}
 
-	if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) {
+	if (((unsigned long)(-1) / 100) < cur.aperf_cur.split.lo) {
 		int shift_count = 7;
-		aperf_cur.split.lo >>= shift_count;
-		mperf_cur.split.lo >>= shift_count;
+		cur.aperf_cur.split.lo >>= shift_count;
+		cur.mperf_cur.split.lo >>= shift_count;
 	}
 
-	if (aperf_cur.split.lo && mperf_cur.split.lo)
-		perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo;
+	if (cur.aperf_cur.split.lo && cur.mperf_cur.split.lo)
+		perf_percent = (cur.aperf_cur.split.lo * 100) /
+				cur.mperf_cur.split.lo;
 	else
 		perf_percent = 0;
 
 #else
-	if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) {
+	if (unlikely(((unsigned long)(-1) / 100) < cur.aperf_cur.whole)) {
 		int shift_count = 7;
-		aperf_cur.whole >>= shift_count;
-		mperf_cur.whole >>= shift_count;
+		cur.aperf_cur.whole >>= shift_count;
+		cur.mperf_cur.whole >>= shift_count;
 	}
 
-	if (aperf_cur.whole && mperf_cur.whole)
-		perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole;
+	if (cur.aperf_cur.whole && cur.mperf_cur.whole)
+		perf_percent = (cur.aperf_cur.whole * 100) /
+				cur.mperf_cur.whole;
 	else
 		perf_percent = 0;
 
@@ -331,10 +338,6 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
 
 	retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100;
 
-	put_cpu();
-	set_cpus_allowed_ptr(current, &saved_mask);
-
-	dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
 	return retval;
 }
 
@@ -352,7 +355,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	}
 
 	cached_freq = data->freq_table[data->acpi_data->state].frequency;
-	freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data);
+	freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
 	if (freq != cached_freq) {
 		/*
 		 * The dreaded BIOS frequency change behind our back.
-- 
cgit v1.2.3


From 025dfdafe77f20b3890981a394774baab7b9c827 Mon Sep 17 00:00:00 2001
From: Frederik Schwarzer <schwarzerf@gmail.com>
Date: Thu, 16 Oct 2008 19:02:37 +0200
Subject: trivial: fix then -> than typos in comments and documentation

- (better, more, bigger ...) then -> (...) than

Signed-off-by: Frederik Schwarzer <schwarzerf@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/x86/kernel/kprobes.c  | 2 +-
 arch/x86/kernel/mfgpt_32.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 6c27679ec6a..a116e6d5726 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -694,7 +694,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	/*
 	 * It is possible to have multiple instances associated with a given
 	 * task either because multiple functions in the call path have
-	 * return probes installed on them, and/or more then one
+	 * return probes installed on them, and/or more than one
 	 * return probe was registered for a target function.
 	 *
 	 * We can handle this because:
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index c12314c9e86..8815f3c7fec 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -252,7 +252,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
 /*
  * The MFPGT timers on the CS5536 provide us with suitable timers to use
  * as clock event sources - not as good as a HPET or APIC, but certainly
- * better then the PIT.  This isn't a general purpose MFGPT driver, but
+ * better than the PIT.  This isn't a general purpose MFGPT driver, but
  * a simplified one designed specifically to act as a clock event source.
  * For full details about the MFGPT, please consult the CS5536 data sheet.
  */
-- 
cgit v1.2.3


From 40bcc69b399ddbcd2d5c52b277a1b27398339b27 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 5 Jan 2009 18:39:01 -0800
Subject: x86: k8 numa register active regions later

Impact: cleanup

don't register early, so we don't need to clear actived regions if it fail
to get node hash shift or wild set in nb config.

also remove nodeids array that is not needed

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/k8topology_64.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 41f1b5c00a1..268f8255280 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -81,7 +81,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
 	unsigned numnodes, cores, bits, apicid_base;
 	unsigned long prevbase;
 	struct bootnode nodes[8];
-	unsigned char nodeids[8];
 	int i, j, nb, found = 0;
 	u32 nodeid, reg;
 
@@ -110,7 +109,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
 		limit = read_pci_config(0, nb, 1, 0x44 + i*8);
 
 		nodeid = limit & 7;
-		nodeids[i] = nodeid;
 		if ((base & 3) == 0) {
 			if (i < numnodes)
 				printk("Skipping disabled node %d\n", i);
@@ -179,9 +177,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
 
 		nodes[nodeid].start = base;
 		nodes[nodeid].end = limit;
-		e820_register_active_regions(nodeid,
-				nodes[nodeid].start >> PAGE_SHIFT,
-				nodes[nodeid].end >> PAGE_SHIFT);
 
 		prevbase = base;
 
@@ -211,12 +206,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
 	}
 
 	for (i = 0; i < 8; i++) {
-		if (nodes[i].start != nodes[i].end) {
-			nodeid = nodeids[i];
-			for (j = apicid_base; j < cores + apicid_base; j++)
-				apicid_to_node[(nodeid << bits) + j] = i;
-			setup_node_bootmem(i, nodes[i].start, nodes[i].end);
-		}
+		if (nodes[i].start == nodes[i].end)
+			continue;
+
+		e820_register_active_regions(i,
+				nodes[i].start >> PAGE_SHIFT,
+				nodes[i].end >> PAGE_SHIFT);
+		for (j = apicid_base; j < cores + apicid_base; j++)
+			apicid_to_node[(i << bits) + j] = i;
+		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 	}
 
 	numa_init_array();
-- 
cgit v1.2.3


From 4d9f94319c485f5af6717dfd7a333949636aed16 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 5 Jan 2009 17:09:41 -0800
Subject: x86: fix x86_32 builds for summit and es7000 arch's

Fix the following build errors reported by Yinghai Lu:

| In file included from arch/x86/mach-generic/summit.c:16:
| tip/linux-2.6/arch/x86/include/asm/summit/apic.h:
| In function 'cpu_mask_to_apicid_and':
| tip/linux-2.6/arch/x86/include/asm/summit/apic.h:179:
| error: 'GFP_ATOMIC' undeclared (first use in this function)

Reported-by: Yinghai Lu <Yinghai.Lu@Sun.COM>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/es7000/apic.h | 2 ++
 arch/x86/include/asm/summit/apic.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index bc53d5ef138..c58b9cc7446 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_ES7000_APIC_H
 #define __ASM_ES7000_APIC_H
 
+#include <linux/gfp.h>
+
 #define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu)
 #define esr_disable (1)
 
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 4bb5fb34f03..93d2c8667cf 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -2,6 +2,7 @@
 #define __ASM_SUMMIT_APIC_H
 
 #include <asm/smp.h>
+#include <linux/gfp.h>
 
 #define esr_disable (1)
 #define NO_BALANCE_IRQ (0)
-- 
cgit v1.2.3


From 9e9197370dafa7ebc7191d835f0403b13855ca35 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Tue, 6 Jan 2009 06:58:39 +0800
Subject: x86: remove duplicated #include's

Removed duplicated #include's in:

  arch/x86/kernel/mpparse.c
  arch/x86/kernel/nmi.c

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mpparse.c | 1 -
 arch/x86/kernel/nmi.c     | 1 -
 2 files changed, 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index e6a9724fefc..fc971973f03 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -17,7 +17,6 @@
 #include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/smp.h>
-#include <linux/acpi.h>
 
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 45a09ccdc21..7228979f1e7 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -26,7 +26,6 @@
 #include <linux/kernel_stat.h>
 #include <linux/kdebug.h>
 #include <linux/smp.h>
-#include <linux/nmi.h>
 
 #include <asm/i8259.h>
 #include <asm/io_apic.h>
-- 
cgit v1.2.3


From 1c0fe6e3bda0464728c23c8d84aa47567e8b716c Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 6 Jan 2009 14:38:59 -0800
Subject: mm: invoke oom-killer from page fault

Rather than have the pagefault handler kill a process directly if it gets
a VM_FAULT_OOM, have it call into the OOM killer.

With increasingly sophisticated oom behaviour (cpusets, memory cgroups,
oom killing throttling, oom priority adjustment or selective disabling,
panic on oom, etc), it's silly to unconditionally kill the faulting
process at page fault time.  Create a hook for pagefault oom path to call
into instead.

Only converted x86 and uml so far.

[akpm@linux-foundation.org: make __out_of_memory() static]
[akpm@linux-foundation.org: fix comment]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jeff Dike <jdike@addtoit.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/fault.c | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 57ec8c86a87..9e268b6b204 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -667,7 +667,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(in_atomic() || !mm))
 		goto bad_area_nosemaphore;
 
-again:
 	/*
 	 * When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -859,25 +858,14 @@ no_context:
 	oops_end(flags, regs, sig);
 #endif
 
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
 out_of_memory:
+	/*
+	 * We ran out of memory, call the OOM killer, and return the userspace
+	 * (which will retry the fault, or kill us if we got oom-killed).
+	 */
 	up_read(&mm->mmap_sem);
-	if (is_global_init(tsk)) {
-		yield();
-		/*
-		 * Re-lookup the vma - in theory the vma tree might
-		 * have changed:
-		 */
-		goto again;
-	}
-
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & PF_USER)
-		do_group_exit(SIGKILL);
-	goto no_context;
+	pagefault_out_of_memory();
+	return;
 
 do_sigbus:
 	up_read(&mm->mmap_sem);
-- 
cgit v1.2.3


From c04fc586c1a480ba198f03ae7b6cbd7b57380b91 Mon Sep 17 00:00:00 2001
From: Gary Hade <garyhade@us.ibm.com>
Date: Tue, 6 Jan 2009 14:39:14 -0800
Subject: mm: show node to memory section relationship with symlinks in sysfs

Show node to memory section relationship with symlinks in sysfs

Add /sys/devices/system/node/nodeX/memoryY symlinks for all
the memory sections located on nodeX.  For example:
/sys/devices/system/node/node1/memory135 -> ../../memory/memory135
indicates that memory section 135 resides on node1.

Also revises documentation to cover this change as well as updating
Documentation/ABI/testing/sysfs-devices-memory to include descriptions
of memory hotremove files 'phys_device', 'phys_index', and 'state'
that were previously not described there.

In addition to it always being a good policy to provide users with
the maximum possible amount of physical location information for
resources that can be hot-added and/or hot-removed, the following
are some (but likely not all) of the user benefits provided by
this change.
Immediate:
  - Provides information needed to determine the specific node
    on which a defective DIMM is located.  This will reduce system
    downtime when the node or defective DIMM is swapped out.
  - Prevents unintended onlining of a memory section that was
    previously offlined due to a defective DIMM.  This could happen
    during node hot-add when the user or node hot-add assist script
    onlines _all_ offlined sections due to user or script inability
    to identify the specific memory sections located on the hot-added
    node.  The consequences of reintroducing the defective memory
    could be ugly.
  - Provides information needed to vary the amount and distribution
    of memory on specific nodes for testing or debugging purposes.
Future:
  - Will provide information needed to identify the memory
    sections that need to be offlined prior to physical removal
    of a specific node.

Symlink creation during boot was tested on 2-node x86_64, 2-node
ppc64, and 2-node ia64 systems.  Symlink creation during physical
memory hot-add tested on a 2-node x86_64 system.

Signed-off-by: Gary Hade <garyhade@us.ibm.com>
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/init_32.c | 2 +-
 arch/x86/mm/init_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f99a6c6c432..544d724caee 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1079,7 +1079,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 
-	return __add_pages(zone, start_pfn, nr_pages);
+	return __add_pages(nid, zone, start_pfn, nr_pages);
 }
 #endif
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9f7a0d24d42..54c437e9654 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -857,7 +857,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	if (last_mapped_pfn > max_pfn_mapped)
 		max_pfn_mapped = last_mapped_pfn;
 
-	ret = __add_pages(zone, start_pfn, nr_pages);
+	ret = __add_pages(nid, zone, start_pfn, nr_pages);
 	WARN_ON_ONCE(ret);
 
 	return ret;
-- 
cgit v1.2.3


From ea435467500612636f8f4fb639ff6e76b2496e4b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Tue, 6 Jan 2009 14:40:39 -0800
Subject: atomic_t: unify all arch definitions

The atomic_t type cannot currently be used in some header files because it
would create an include loop with asm/atomic.h.  Move the type definition
to linux/types.h to break the loop.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/atomic_32.h | 10 +---------
 arch/x86/include/asm/atomic_64.h | 18 ++----------------
 2 files changed, 3 insertions(+), 25 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index ad5b9f6ecdd..85b46fba422 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_ATOMIC_32_H
 
 #include <linux/compiler.h>
+#include <linux/types.h>
 #include <asm/processor.h>
 #include <asm/cmpxchg.h>
 
@@ -10,15 +11,6 @@
  * resource counting etc..
  */
 
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct {
-	int counter;
-} atomic_t;
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 /**
diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h
index 279d2a731f3..8c21731984d 100644
--- a/arch/x86/include/asm/atomic_64.h
+++ b/arch/x86/include/asm/atomic_64.h
@@ -1,25 +1,15 @@
 #ifndef _ASM_X86_ATOMIC_64_H
 #define _ASM_X86_ATOMIC_64_H
 
+#include <linux/types.h>
 #include <asm/alternative.h>
 #include <asm/cmpxchg.h>
 
-/* atomic_t should be 32 bit signed type */
-
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
  */
 
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct {
-	int counter;
-} atomic_t;
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 /**
@@ -191,11 +181,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 #define atomic_inc_return(v)  (atomic_add_return(1, v))
 #define atomic_dec_return(v)  (atomic_sub_return(1, v))
 
-/* An 64bit atomic type */
-
-typedef struct {
-	long counter;
-} atomic64_t;
+/* The 64-bit atomic type */
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
-- 
cgit v1.2.3


From f1883f86dea84fe47a71a39fc1afccc005915ed8 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Jan 2009 14:40:45 -0800
Subject: Remove remaining unwinder code

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Gabor Gombas <gombasg@sztaki.hu>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@elte.hu>,
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/unwind.h | 13 -------------
 arch/x86/kernel/traps.c       |  2 --
 2 files changed, 15 deletions(-)
 delete mode 100644 arch/x86/include/asm/unwind.h

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
deleted file mode 100644
index 8b064bd9c55..00000000000
--- a/arch/x86/include/asm/unwind.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _ASM_X86_UNWIND_H
-#define _ASM_X86_UNWIND_H
-
-#define UNW_PC(frame) ((void)(frame), 0UL)
-#define UNW_SP(frame) ((void)(frame), 0UL)
-#define UNW_FP(frame) ((void)(frame), 0UL)
-
-static inline int arch_unw_user_mode(const void *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ce6650eb64e..c9a666cdd3d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -20,7 +20,6 @@
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/string.h>
-#include <linux/unwind.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/kexec.h>
@@ -51,7 +50,6 @@
 #include <asm/debugreg.h>
 #include <asm/atomic.h>
 #include <asm/system.h>
-#include <asm/unwind.h>
 #include <asm/traps.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
-- 
cgit v1.2.3


From 129415607845d4daea11ddcba706005c69dcb942 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 6 Jan 2009 14:41:50 -0800
Subject: kprobes: add kprobe_insn_mutex and cleanup arch_remove_kprobe()

Add kprobe_insn_mutex for protecting kprobe_insn_pages hlist, and remove
kprobe_mutex from architecture dependent code.

This allows us to call arch_remove_kprobe() (and free_insn_slot) while
holding kprobe_mutex.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/kprobes.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 6c27679ec6a..eead6f8f921 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -376,9 +376,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
-	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
-	mutex_unlock(&kprobe_mutex);
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
+		p->ainsn.insn = NULL;
+	}
 }
 
 static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
-- 
cgit v1.2.3


From 5d30a683888c60b8f93bef3ddc139d1a91ca58f4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 6 Jan 2009 14:56:28 -0800
Subject: x86: introduce asm/swab.h

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/Kbuild      |  1 +
 arch/x86/include/asm/byteorder.h | 62 ++--------------------------------------
 arch/x86/include/asm/swab.h      | 61 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 60 deletions(-)
 create mode 100644 arch/x86/include/asm/swab.h

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4a8e80cdcfa..a9f8a814a1f 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -22,3 +22,4 @@ unifdef-y += unistd_32.h
 unifdef-y += unistd_64.h
 unifdef-y += vm86.h
 unifdef-y += vsyscall.h
+unifdef-y += swab.h
diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h
index f110ad417df..7c49917e3d9 100644
--- a/arch/x86/include/asm/byteorder.h
+++ b/arch/x86/include/asm/byteorder.h
@@ -1,65 +1,7 @@
 #ifndef _ASM_X86_BYTEORDER_H
 #define _ASM_X86_BYTEORDER_H
 
-#include <asm/types.h>
-#include <linux/compiler.h>
-
-#define __LITTLE_ENDIAN
-
-static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
-{
-#ifdef __i386__
-# ifdef CONFIG_X86_BSWAP
-	asm("bswap %0" : "=r" (val) : "0" (val));
-# else
-	asm("xchgb %b0,%h0\n\t"	/* swap lower bytes	*/
-	    "rorl $16,%0\n\t"	/* swap words		*/
-	    "xchgb %b0,%h0"	/* swap higher bytes	*/
-	    : "=q" (val)
-	    : "0" (val));
-# endif
-
-#else /* __i386__ */
-	asm("bswapl %0"
-	    : "=r" (val)
-	    : "0" (val));
-#endif
-	return val;
-}
-#define __arch_swab32 __arch_swab32
-
-static inline __attribute_const__ __u64 __arch_swab64(__u64 val)
-{
-#ifdef __i386__
-	union {
-		struct {
-			__u32 a;
-			__u32 b;
-		} s;
-		__u64 u;
-	} v;
-	v.u = val;
-# ifdef CONFIG_X86_BSWAP
-	asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
-	    : "=r" (v.s.a), "=r" (v.s.b)
-	    : "0" (v.s.a), "1" (v.s.b));
-# else
-	v.s.a = __arch_swab32(v.s.a);
-	v.s.b = __arch_swab32(v.s.b);
-	asm("xchgl %0,%1"
-	    : "=r" (v.s.a), "=r" (v.s.b)
-	    : "0" (v.s.a), "1" (v.s.b));
-# endif
-	return v.u;
-#else /* __i386__ */
-	asm("bswapq %0"
-	    : "=r" (val)
-	    : "0" (val));
-	return val;
-#endif
-}
-#define __arch_swab64 __arch_swab64
-
-#include <linux/byteorder.h>
+#include <asm/swab.h>
+#include <linux/byteorder/little_endian.h>
 
 #endif /* _ASM_X86_BYTEORDER_H */
diff --git a/arch/x86/include/asm/swab.h b/arch/x86/include/asm/swab.h
new file mode 100644
index 00000000000..306d4178ffc
--- /dev/null
+++ b/arch/x86/include/asm/swab.h
@@ -0,0 +1,61 @@
+#ifndef _ASM_X86_SWAB_H
+#define _ASM_X86_SWAB_H
+
+#include <asm/types.h>
+#include <linux/compiler.h>
+
+static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
+{
+#ifdef __i386__
+# ifdef CONFIG_X86_BSWAP
+	asm("bswap %0" : "=r" (val) : "0" (val));
+# else
+	asm("xchgb %b0,%h0\n\t"	/* swap lower bytes	*/
+	    "rorl $16,%0\n\t"	/* swap words		*/
+	    "xchgb %b0,%h0"	/* swap higher bytes	*/
+	    : "=q" (val)
+	    : "0" (val));
+# endif
+
+#else /* __i386__ */
+	asm("bswapl %0"
+	    : "=r" (val)
+	    : "0" (val));
+#endif
+	return val;
+}
+#define __arch_swab32 __arch_swab32
+
+static inline __attribute_const__ __u64 __arch_swab64(__u64 val)
+{
+#ifdef __i386__
+	union {
+		struct {
+			__u32 a;
+			__u32 b;
+		} s;
+		__u64 u;
+	} v;
+	v.u = val;
+# ifdef CONFIG_X86_BSWAP
+	asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
+	    : "=r" (v.s.a), "=r" (v.s.b)
+	    : "0" (v.s.a), "1" (v.s.b));
+# else
+	v.s.a = __arch_swab32(v.s.a);
+	v.s.b = __arch_swab32(v.s.b);
+	asm("xchgl %0,%1"
+	    : "=r" (v.s.a), "=r" (v.s.b)
+	    : "0" (v.s.a), "1" (v.s.b));
+# endif
+	return v.u;
+#else /* __i386__ */
+	asm("bswapq %0"
+	    : "=r" (val)
+	    : "0" (val));
+	return val;
+#endif
+}
+#define __arch_swab64 __arch_swab64
+
+#endif /* _ASM_X86_SWAB_H */
-- 
cgit v1.2.3


From da4276b8299a6544dc41ac2485d3ffca5811b3fb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 7 Jan 2009 11:05:10 +0100
Subject: x86: offer frame pointers in all build modes

CONFIG_FRAME_POINTERS=y results in much better debug info for the
kernel (clear and precise backtraces), with the only drawback being
a ~1% increase in kernel size.

So offer it unconditionally and enable it by default.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 862adb9bf0d..73f7fe8fd4d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -27,6 +27,7 @@ config X86
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select ARCH_WANT_FRAME_POINTERS
 	select HAVE_KRETPROBES
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
-- 
cgit v1.2.3


From 51d7a1398d1851e892504e97ca20521610dfcece Mon Sep 17 00:00:00 2001
From: Leonardo Potenza <lpotenza@inwind.it>
Date: Tue, 6 Jan 2009 23:58:29 +0100
Subject: x86: fix section mismatch warnings in mcheck/mce_amd_64.c

Mark the function local_allocate_threshold_blocks() with __cpuinit,
in order to remove the following section mismatch messages:

WARNING: arch/x86/kernel/cpu/mcheck/built-in.o(.text+0x1363): Section mismatch in reference from the function local_allocate_threshold_blocks() to the function .cpuinit.text:allocate_threshold_blocks()
The function local_allocate_threshold_blocks() references
the function __cpuinit allocate_threshold_blocks().
This is often because local_allocate_threshold_blocks lacks a __cpuinit
annotation or the annotation of allocate_threshold_blocks is wrong.

WARNING: arch/x86/kernel/cpu/built-in.o(.text+0x1def): Section mismatch in reference from the function local_allocate_threshold_blocks() to the function .cpuinit.text:allocate_threshold_blocks()
The function local_allocate_threshold_blocks() references
the function __cpuinit allocate_threshold_blocks().
This is often because local_allocate_threshold_blocks lacks a __cpuinit
annotation or the annotation of allocate_threshold_blocks is wrong.

WARNING: arch/x86/kernel/built-in.o(.text+0xef2b): Section mismatch in reference from the function local_allocate_threshold_blocks() to the function .cpuinit.text:allocate_threshold_blocks()
The function local_allocate_threshold_blocks() references
the function __cpuinit allocate_threshold_blocks().
This is often because local_allocate_threshold_blocks lacks a __cpuinit
annotation or the annotation of allocate_threshold_blocks is wrong.

All the callsites of this function are __cpuinit already, and all the
functions it calls are __cpuinit as well.

Signed-off-by: Leonardo Potenza <lpotenza@inwind.it>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index a5a5e053037..8ae8c4ff094 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -462,7 +462,7 @@ out_free:
 	return err;
 }
 
-static long local_allocate_threshold_blocks(void *_bank)
+static __cpuinit long local_allocate_threshold_blocks(void *_bank)
 {
 	unsigned int *bank = _bank;
 
-- 
cgit v1.2.3


From 1a9271331ab663f3c7cda78d86b884f2ea86d4d7 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 30 Oct 2008 02:17:49 +0100
Subject: PCI: struct device - replace bus_id with dev_name(), dev_set_name()

This patch is part of a larger patch series which will remove
the "char bus_id[20]" name string from struct device. The device
name is managed in the kobject anyway, and without any size
limitation, and just needlessly copied into "struct device".

To set and read the device name dev_name(dev) and dev_set_name(dev)
must be used. If your code uses static kobjects, which it shouldn't
do, "const char *init_name" can be used to statically provide the
name the registered device should have. At registration time, the
init_name field is cleared, to enforce the use of dev_name(dev) to
access the device name at a later time.

We need to get rid of all occurrences of bus_id in the entire tree
to be able to enable the new interface. Please apply this patch,
and possibly convert any remaining remaining occurrences of bus_id.

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-Off-By: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/kernel/pci-dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 19a1044a0cd..b2542853314 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -38,7 +38,7 @@ EXPORT_SYMBOL(bad_dma_address);
    be probably a smaller DMA mask, but this is bug-to-bug compatible
    to older i386. */
 struct device x86_dma_fallback_dev = {
-	.bus_id = "fallback device",
+	.init_name = "fallback device",
 	.coherent_dma_mask = DMA_32BIT_MASK,
 	.dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
 };
-- 
cgit v1.2.3


From 0ef5f8f6159e44b4faa997be08d1a3bcbf44ad08 Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 10 Nov 2008 15:30:50 -0700
Subject: ACPI/PCI: PCI extended config _OSC support called when root bridge
 added

The _OSC capability OSC_EXT_PCI_CONFIG_SUPPORT is set when the root
bridge is added with pci_acpi_osc_support() if we can access PCI
extended config space.

This adds the function pci_ext_cfg_avail which returns true if we can
access PCI extended config space (offset greater than 0xff). It
currently only returns false if arch=x86 and raw_pci_ext_ops is not set
(which might happen if pci=nommcfg is set on the kernel command-line).

Signed-off-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/common.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 62ddb73e09e..9ab8509f7b1 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -562,6 +562,14 @@ void pcibios_disable_device (struct pci_dev *dev)
 		pcibios_disable_irq(dev);
 }
 
+int pci_ext_cfg_avail(struct pci_dev *dev)
+{
+	if (raw_pci_ext_ops)
+		return 1;
+	else
+		return 0;
+}
+
 struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
 {
 	struct pci_bus *bus = NULL;
-- 
cgit v1.2.3


From e8de1481fd7126ee9e93d6889da6f00c05e1e019 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 22 Oct 2008 19:55:31 -0700
Subject: resource: allow MMIO exclusivity for device drivers

Device drivers that use pci_request_regions() (and similar APIs) have a
reasonable expectation that they are the only ones accessing their device.
As part of the e1000e hunt, we were afraid that some userland (X or some
bootsplash stuff) was mapping the MMIO region that the driver thought it
had exclusively via /dev/mem or via various sysfs resource mappings.

This patch adds the option for device drivers to cause their reserved
regions to the "banned from /dev/mem use" list, so now both kernel memory
and device-exclusive MMIO regions are banned.
NOTE: This is only active when CONFIG_STRICT_DEVMEM is set.

In addition to the config option, a kernel parameter iomem=relaxed is
provided for the cases where developers want to diagnose, in the field,
drivers issues from userspace.

Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/mm/init_32.c | 2 ++
 arch/x86/mm/init_64.c | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 544d724caee..88f1b10de3b 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -328,6 +328,8 @@ int devmem_is_allowed(unsigned long pagenr)
 {
 	if (pagenr <= 256)
 		return 1;
+	if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+		return 0;
 	if (!page_is_ram(pagenr))
 		return 1;
 	return 0;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 54c437e9654..23f68e77ad1 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -888,6 +888,8 @@ int devmem_is_allowed(unsigned long pagenr)
 {
 	if (pagenr <= 256)
 		return 1;
+	if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+		return 0;
 	if (!page_is_ram(pagenr))
 		return 1;
 	return 0;
-- 
cgit v1.2.3


From 104bafcfab7ce3031399e60069949f10acecc022 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 12 Dec 2008 06:49:40 +0100
Subject: PCI: Don't carp about BAR allocation failures in quiet boot

These are easy to trigger (more or less harmlessly) with multiple video
cards, since the ROM BAR will typically not be given any space by the
BIOS bridge setup.  No reason to punish quiet boot for this.

Signed-off-by: Adam Jackson <ajax@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/i386.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index e51bf2cda4b..f884740da31 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -129,7 +129,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
 				pr = pci_find_parent_resource(dev, r);
 				if (!r->start || !pr ||
 				    request_resource(pr, r) < 0) {
-					dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx);
+					dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx);
 					/*
 					 * Something is wrong with the region.
 					 * Invalidate the resource to prevent
@@ -170,7 +170,7 @@ static void __init pcibios_allocate_resources(int pass)
 					r->flags, disabled, pass);
 				pr = pci_find_parent_resource(dev, r);
 				if (!pr || request_resource(pr, r) < 0) {
-					dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx);
+					dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx);
 					/* We'll assign a new address later */
 					r->end -= r->start;
 					r->start = 0;
-- 
cgit v1.2.3


From 0663a36284586ac9a9781be8aa7e8ca9fff16d06 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Wed, 10 Dec 2008 13:12:00 -0700
Subject: x86/PCI: make PCI bus locality messages more meaningful

Change PCI bus locality messages so they have a bit more context
and look like the rest of PCI, e.g.,

    - bus 01 -> node 0
    - bus 04 -> node 0
    + pci 0000:01: bus on NUMA node 0
    + pci 0000:04: bus on NUMA node 0

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/acpi.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 9e5752fe4d1..6c7683709c4 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -210,11 +210,12 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
 	if (bus && node != -1) {
 #ifdef CONFIG_ACPI_NUMA
 		if (pxm >= 0)
-			printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n",
-				busnum, pxm, node);
+			printk(KERN_DEBUG
+			       "pci %04x:%02x: bus on NUMA node %d (pxm %d)\n",
+				domain, busnum, node, pxm);
 #else
-		printk(KERN_DEBUG "bus %02x -> node %d\n",
-			busnum, node);
+		printk(KERN_DEBUG "pci %04x:%02x: bus on NUMA node %d\n",
+			domain, busnum, node);
 #endif
 	}
 
-- 
cgit v1.2.3


From 23a36002742bca87510201770a7d931c4aa63e97 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@intel.com>
Date: Mon, 8 Dec 2008 09:44:16 -0800
Subject: PCI: avoid early PCI mmconfig init if pci=noearly is given in cmdline

Early type 1 accesses can cause problems on some platforms, and
pci=noearly is supposed to prevent them from occurring.  However, early
mcfg probing code uses type 1 and  isn't protected by a check for
noearly.  This patch fixes that problem.

Signed-off-by: Jacob Pan <jacob.jun.pan@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index bec3b048e72..25a1f8efed4 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -12,7 +12,8 @@ static __init int pci_arch_init(void)
 	type = pci_direct_probe();
 #endif
 
-	pci_mmcfg_early_init();
+	if (!(pci_probe & PCI_PROBE_NOEARLY))
+		pci_mmcfg_early_init();
 
 #ifdef CONFIG_PCI_OLPC
 	if (!pci_olpc_init())
-- 
cgit v1.2.3


From 878f2e50fd1cfea575cdca5bf019c2175dc64131 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 9 Dec 2008 16:11:46 -0700
Subject: PCI: use config space encoding in pci_get_interrupt_pin()

This patch makes pci_get_interrupt_pin() return values encoded
the same way as the "Interrupt Pin" value in PCI config space,
i.e., 1=INTA, ..., 4=INTD.

pirq_bios_set() is the only in-tree caller of pci_get_interrupt_pin()
and pci_get_interrupt_pin() is not exported.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: hpa@zytor.com
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 373b9afe6d4..399a172f047 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -533,7 +533,7 @@ static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
 {
 	struct pci_dev *bridge;
 	int pin = pci_get_interrupt_pin(dev, &bridge);
-	return pcibios_set_irq_routing(bridge, pin, irq);
+	return pcibios_set_irq_routing(bridge, pin - 1, irq);
 }
 
 #endif
-- 
cgit v1.2.3


From f672c392b9c61bcdfb1247561d404b2c3ed4b0b3 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 9 Dec 2008 16:11:51 -0700
Subject: x86/PCI: use config space encoding for interrupt pins

Keep "pin" encoded as it is in the "Interrupt Pin" value in PCI config
space, i.e., 0=device doesn't use interrupts, 1=INTA, ..., 4=INTD.

This makes the bridge INTx swizzle match other architectures.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: hpa@zytor.com
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/irq.c | 44 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 399a172f047..cc9d5e25406 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -887,7 +887,6 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 		dev_dbg(&dev->dev, "no interrupt pin\n");
 		return 0;
 	}
-	pin = pin - 1;
 
 	/* Find IRQ routing entry */
 
@@ -897,17 +896,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 	info = pirq_get_info(dev);
 	if (!info) {
 		dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n",
-			'A' + pin);
+			'A' + pin - 1);
 		return 0;
 	}
-	pirq = info->irq[pin].link;
-	mask = info->irq[pin].bitmap;
+	pirq = info->irq[pin - 1].link;
+	mask = info->irq[pin - 1].bitmap;
 	if (!pirq) {
-		dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin);
+		dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin - 1);
 		return 0;
 	}
 	dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x",
-		'A' + pin, pirq, mask, pirq_table->exclusive_irqs);
+		'A' + pin - 1, pirq, mask, pirq_table->exclusive_irqs);
 	mask &= pcibios_irq_mask;
 
 	/* Work around broken HP Pavilion Notebooks which assign USB to
@@ -949,7 +948,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 				newirq = i;
 		}
 	}
-	dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq);
+	dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin - 1, newirq);
 
 	/* Check if it is hardcoded */
 	if ((pirq & 0xf0) == 0xf0) {
@@ -977,18 +976,18 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
 			return 0;
 		}
 	}
-	dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq);
+	dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq);
 
 	/* Update IRQ for all devices with the same pirq value */
 	while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
 		pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
 		if (!pin)
 			continue;
-		pin--;
+
 		info = pirq_get_info(dev2);
 		if (!info)
 			continue;
-		if (info->irq[pin].link == pirq) {
+		if (info->irq[pin - 1].link == pirq) {
 			/*
 			 * We refuse to override the dev->irq
 			 * information. Give a warning!
@@ -1055,9 +1054,8 @@ static void __init pcibios_fixup_irqs(void)
 			/*
 			 * interrupt pins are numbered starting from 1
 			 */
-			pin--;
 			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-				PCI_SLOT(dev->devfn), pin);
+				PCI_SLOT(dev->devfn), pin - 1);
 			/*
 			 * Busses behind bridges are typically not listed in the
 			 * MP-table.  In this case we have to look up the IRQ
@@ -1070,22 +1068,22 @@ static void __init pcibios_fixup_irqs(void)
 				struct pci_dev *bridge = dev->bus->self;
 				int bus;
 
-				pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+				pin = (((pin - 1) + PCI_SLOT(dev->devfn)) % 4) + 1;
 				bus = bridge->bus->number;
 				irq = IO_APIC_get_PCI_irq_vector(bus,
-						PCI_SLOT(bridge->devfn), pin);
+						PCI_SLOT(bridge->devfn), pin - 1);
 				if (irq >= 0)
 					dev_warn(&dev->dev,
 						"using bridge %s INT %c to "
 							"get IRQ %d\n",
 						 pci_name(bridge),
-						 'A' + pin, irq);
+						 'A' + pin - 1, irq);
 			}
 			if (irq >= 0) {
 				dev_info(&dev->dev,
 					"PCI->APIC IRQ transform: INT %c "
 						"-> IRQ %d\n",
-					'A' + pin, irq);
+					'A' + pin - 1, irq);
 				dev->irq = irq;
 			}
 		}
@@ -1220,12 +1218,10 @@ static int pirq_enable_irq(struct pci_dev *dev)
 	if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
 		char *msg = "";
 
-		pin--; /* interrupt pins are numbered starting from 1 */
-
 		if (io_apic_assign_pci_irqs) {
 			int irq;
 
-			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin - 1);
 			/*
 			 * Busses behind bridges are typically not listed in the MP-table.
 			 * In this case we have to look up the IRQ based on the parent bus,
@@ -1236,20 +1232,20 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			while (irq < 0 && dev->bus->parent) { /* go back to the bridge */
 				struct pci_dev *bridge = dev->bus->self;
 
-				pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+				pin = (((pin - 1) + PCI_SLOT(dev->devfn)) % 4) + 1;
 				irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
-						PCI_SLOT(bridge->devfn), pin);
+						PCI_SLOT(bridge->devfn), pin - 1);
 				if (irq >= 0)
 					dev_warn(&dev->dev, "using bridge %s "
 						 "INT %c to get IRQ %d\n",
-						 pci_name(bridge), 'A' + pin,
+						 pci_name(bridge), 'A' + pin - 1,
 						 irq);
 				dev = bridge;
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
-					 "INT %c -> IRQ %d\n", 'A' + pin, irq);
+					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
 				dev->irq = irq;
 				return 0;
 			} else
@@ -1268,7 +1264,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			return 0;
 
 		dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n",
-			 'A' + pin, msg);
+			 'A' + pin - 1, msg);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 12b955ff63db0b75cfc2d4939696c57b31891ec6 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 9 Dec 2008 16:11:57 -0700
Subject: x86/PCI: minor logic simplications

Test "pin" immediately to simplify the subsequent code.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: hpa@zytor.com
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/irq.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index cc9d5e25406..e1d605bfeb4 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1041,6 +1041,9 @@ static void __init pcibios_fixup_irqs(void)
 	dev = NULL;
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
 		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+		if (!pin)
+			continue;
+
 #ifdef CONFIG_X86_IO_APIC
 		/*
 		 * Recalculate IRQ numbers if we use the I/O APIC.
@@ -1048,9 +1051,6 @@ static void __init pcibios_fixup_irqs(void)
 		if (io_apic_assign_pci_irqs) {
 			int irq;
 
-			if (!pin)
-				continue;
-
 			/*
 			 * interrupt pins are numbered starting from 1
 			 */
@@ -1091,7 +1091,7 @@ static void __init pcibios_fixup_irqs(void)
 		/*
 		 * Still no IRQ? Try to lookup one...
 		 */
-		if (pin && !dev->irq)
+		if (!dev->irq)
 			pcibios_lookup_irq(dev, 0);
 	}
 }
-- 
cgit v1.2.3


From b1c86792a0f3cf24a12c1ac7d452d665d90284b1 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 9 Dec 2008 16:12:37 -0700
Subject: PCI: x86: use generic pci_swizzle_interrupt_pin()

Use the generic pci_swizzle_interrupt_pin() instead of arch-specific code.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: hpa@zytor.com
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/irq.c   | 4 ++--
 arch/x86/pci/visws.c | 7 +------
 2 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index e1d605bfeb4..4064345cf14 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1068,7 +1068,7 @@ static void __init pcibios_fixup_irqs(void)
 				struct pci_dev *bridge = dev->bus->self;
 				int bus;
 
-				pin = (((pin - 1) + PCI_SLOT(dev->devfn)) % 4) + 1;
+				pin = pci_swizzle_interrupt_pin(dev, pin);
 				bus = bridge->bus->number;
 				irq = IO_APIC_get_PCI_irq_vector(bus,
 						PCI_SLOT(bridge->devfn), pin - 1);
@@ -1232,7 +1232,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			while (irq < 0 && dev->bus->parent) { /* go back to the bridge */
 				struct pci_dev *bridge = dev->bus->self;
 
-				pin = (((pin - 1) + PCI_SLOT(dev->devfn)) % 4) + 1;
+				pin = pci_swizzle_interrupt_pin(dev, pin);
 				irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
 						PCI_SLOT(bridge->devfn), pin - 1);
 				if (irq >= 0)
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 16d0c0eb0d1..2c54e7e03f5 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -24,17 +24,12 @@ static void pci_visws_disable_irq(struct pci_dev *dev) { }
 
 unsigned int pci_bus0, pci_bus1;
 
-static inline u8 bridge_swizzle(u8 pin, u8 slot) 
-{
-	return (((pin - 1) + slot) % 4) + 1;
-}
-
 static u8 __init visws_swizzle(struct pci_dev *dev, u8 *pinp)
 {
 	u8 pin = *pinp;
 
 	while (dev->bus->self) {	/* Move up the chain of bridges. */
-		pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn));
+		pin = pci_swizzle_interrupt_pin(dev, pin);
 		dev = dev->bus->self;
 	}
 	*pinp = pin;
-- 
cgit v1.2.3


From 904d6a303361a85bfa4c8181ef62a24edb8da0a8 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 16 Dec 2008 21:37:20 -0700
Subject: PCI: x86/visws: use generic INTx swizzle from PCI core

Use the generic pci_common_swizzle() instead of arch-specific code.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/visws.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 2c54e7e03f5..bcead7a4687 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -24,19 +24,6 @@ static void pci_visws_disable_irq(struct pci_dev *dev) { }
 
 unsigned int pci_bus0, pci_bus1;
 
-static u8 __init visws_swizzle(struct pci_dev *dev, u8 *pinp)
-{
-	u8 pin = *pinp;
-
-	while (dev->bus->self) {	/* Move up the chain of bridges. */
-		pin = pci_swizzle_interrupt_pin(dev, pin);
-		dev = dev->bus->self;
-	}
-	*pinp = pin;
-
-	return PCI_SLOT(dev->devfn);
-}
-
 static int __init visws_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq, bus = dev->bus->number;
@@ -101,7 +88,7 @@ int __init pci_visws_init(void)
 	raw_pci_ops = &pci_direct_conf1;
 	pci_scan_bus_with_sysdata(pci_bus0);
 	pci_scan_bus_with_sysdata(pci_bus1);
-	pci_fixup_irqs(visws_swizzle, visws_map_irq);
+	pci_fixup_irqs(pci_common_swizzle, visws_map_irq);
 	pcibios_resource_survey();
 	return 0;
 }
-- 
cgit v1.2.3


From 2b8c2efe44ed897fc958131d70addc89876d806b Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 18 Dec 2008 16:34:51 -0700
Subject: x86/PCI: use dev_printk for PCI bus locality messages

Since pci_bus has a struct device, use dev_printk directly instead
of faking it by hand.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/acpi.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 6c7683709c4..c0ecf250fe5 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -210,12 +210,10 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
 	if (bus && node != -1) {
 #ifdef CONFIG_ACPI_NUMA
 		if (pxm >= 0)
-			printk(KERN_DEBUG
-			       "pci %04x:%02x: bus on NUMA node %d (pxm %d)\n",
-				domain, busnum, node, pxm);
+			dev_printk(KERN_DEBUG, &bus->dev,
+				   "on NUMA node %d (pxm %d)\n", node, pxm);
 #else
-		printk(KERN_DEBUG "pci %04x:%02x: bus on NUMA node %d\n",
-			domain, busnum, node);
+		dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node);
 #endif
 	}
 
-- 
cgit v1.2.3


From 16cf0ebc35dd63f72628ba1246132a6fd17bced2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 5 Jan 2009 14:50:27 +0100
Subject: x86/PCI: Do not use interrupt links for devices using MSI-X

pcibios_enable_device() and pcibios_disable_device() don't handle
IRQs for devices that have MSI enabled and it should treat the
devices with MSI-X enabled in the same way.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 9ab8509f7b1..82d22fc601a 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -551,14 +551,14 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	if ((err = pci_enable_resources(dev, mask)) < 0)
 		return err;
 
-	if (!dev->msi_enabled)
+	if (!pci_dev_msi_enabled(dev))
 		return pcibios_enable_irq(dev);
 	return 0;
 }
 
 void pcibios_disable_device (struct pci_dev *dev)
 {
-	if (!dev->msi_enabled && pcibios_disable_irq)
+	if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq)
 		pcibios_disable_irq(dev);
 }
 
-- 
cgit v1.2.3


From fc81be8ca29e28bfb89aa23359036a8ad4118d0f Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 18 Dec 2008 00:28:27 +0100
Subject: oprofile: rename variable ibs_allowed to has_ibs in op_model_amd.c

This patch renames ibs_allowed to has_ibs. Varible name fits better
now.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index c5b5c7fb3ce..423a95438cb 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -106,7 +106,7 @@ struct ibs_op_sample {
 	unsigned int ibs_dc_phys_high;
 };
 
-static int ibs_allowed;	/* AMD Family10h and later */
+static int has_ibs;	/* AMD Family10h and later */
 
 struct op_ibs_config {
 	unsigned long op_enabled;
@@ -201,7 +201,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 	struct ibs_fetch_sample ibs_fetch;
 	struct ibs_op_sample ibs_op;
 
-	if (!ibs_allowed)
+	if (!has_ibs)
 		return 1;
 
 	if (ibs_config.fetch_enabled) {
@@ -305,14 +305,14 @@ static void op_amd_start(struct op_msrs const * const msrs)
 	}
 
 #ifdef CONFIG_OPROFILE_IBS
-	if (ibs_allowed && ibs_config.fetch_enabled) {
+	if (has_ibs && ibs_config.fetch_enabled) {
 		low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
 		high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */
 			+ IBS_FETCH_HIGH_ENABLE;
 		wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
 	}
 
-	if (ibs_allowed && ibs_config.op_enabled) {
+	if (has_ibs && ibs_config.op_enabled) {
 		low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF)
 			+ ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */
 			+ IBS_OP_LOW_ENABLE;
@@ -341,14 +341,14 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 	}
 
 #ifdef CONFIG_OPROFILE_IBS
-	if (ibs_allowed && ibs_config.fetch_enabled) {
+	if (has_ibs && ibs_config.fetch_enabled) {
 		/* clear max count and enable */
 		low = 0;
 		high = 0;
 		wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
 	}
 
-	if (ibs_allowed && ibs_config.op_enabled) {
+	if (has_ibs && ibs_config.op_enabled) {
 		/* clear max count and enable */
 		low = 0;
 		high = 0;
@@ -437,20 +437,20 @@ static int init_ibs_nmi(void)
 /* uninitialize the APIC for the IBS interrupts if needed */
 static void clear_ibs_nmi(void)
 {
-	if (ibs_allowed)
+	if (has_ibs)
 		on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
 }
 
 /* initialize the APIC for the IBS interrupts if available */
 static void ibs_init(void)
 {
-	ibs_allowed = boot_cpu_has(X86_FEATURE_IBS);
+	has_ibs = boot_cpu_has(X86_FEATURE_IBS);
 
-	if (!ibs_allowed)
+	if (!has_ibs)
 		return;
 
 	if (init_ibs_nmi()) {
-		ibs_allowed = 0;
+		has_ibs = 0;
 		return;
 	}
 
@@ -459,7 +459,7 @@ static void ibs_init(void)
 
 static void ibs_exit(void)
 {
-	if (!ibs_allowed)
+	if (!has_ibs)
 		return;
 
 	clear_ibs_nmi();
@@ -479,7 +479,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
 	if (ret)
 		return ret;
 
-	if (!ibs_allowed)
+	if (!has_ibs)
 		return ret;
 
 	/* model specific files */
-- 
cgit v1.2.3


From ae735e9964b4584923f2997d98a8d80ae9c1a75c Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 25 Dec 2008 17:26:07 +0100
Subject: oprofile: rework implementation of cpu buffer events

Special events such as task or context switches are marked with an
escape code in the cpu buffer followed by an event code or a task
identifier. There is one escape code per event. To make escape
sequences also available for data samples the internal cpu buffer
format must be changed. The current implementation does not allow the
extension of event codes since this would lead to collisions with the
task identifiers. To avoid this, this patch introduces an event mask
that allows the storage of multiple events with one escape code. Now,
task identifiers are stored in the data section of the sample. The
implementation also allows the usage of custom data in a sample. As a
side effect the new code is much more readable and easier to
understand.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 423a95438cb..f101724db80 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -2,7 +2,7 @@
  * @file op_model_amd.c
  * athlon / K7 / K8 / Family 10h model-specific MSR operations
  *
- * @remark Copyright 2002-2008 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
  * @remark Read the file COPYING
  *
  * @author John Levon
@@ -10,7 +10,7 @@
  * @author Graydon Hoare
  * @author Robert Richter <robert.richter@amd.com>
  * @author Barry Kasindorf
-*/
+ */
 
 #include <linux/oprofile.h>
 #include <linux/device.h>
@@ -62,8 +62,8 @@ static unsigned long reset_value[NUM_COUNTERS];
 
 /* Codes used in cpu_buffer.c */
 /* This produces duplicate code, need to be fixed */
-#define IBS_FETCH_BEGIN 3
-#define IBS_OP_BEGIN    4
+#define IBS_FETCH_BEGIN         (1UL << 4)
+#define IBS_OP_BEGIN            (1UL << 5)
 
 /*
  * The function interface needs to be fixed, something like add
-- 
cgit v1.2.3


From 1acda878e20ea0cd3708ba66dca67d52eaafdd2b Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 5 Jan 2009 10:35:31 +0100
Subject: oprofile: use new data sample format for ibs

The new ring buffer implementation allows the storage of samples with
different size. This patch implements the usage of the new sample
format to store ibs samples in the cpu buffer. Until now, writing to
the cpu buffer could lead to incomplete sampling sequences since IBS
samples were transfered in multiple samples. Due to a full buffer,
data could be lost at any time. This can't happen any more since the
complete data is reserved in advance and then stored in a single
sample.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 119 ++++++++++++++-------------------------
 1 file changed, 41 insertions(+), 78 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index f101724db80..cf310aeb462 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -22,6 +22,7 @@
 
 #include "op_x86_model.h"
 #include "op_counter.h"
+#include "../../../drivers/oprofile/cpu_buffer.h"
 
 #define NUM_COUNTERS 4
 #define NUM_CONTROLS 4
@@ -60,51 +61,16 @@ static unsigned long reset_value[NUM_COUNTERS];
 #define IBS_OP_LOW_VALID_BIT		(1ULL<<18)	/* bit 18 */
 #define IBS_OP_LOW_ENABLE		(1ULL<<17)	/* bit 17 */
 
-/* Codes used in cpu_buffer.c */
-/* This produces duplicate code, need to be fixed */
-#define IBS_FETCH_BEGIN         (1UL << 4)
-#define IBS_OP_BEGIN            (1UL << 5)
-
 /*
  * The function interface needs to be fixed, something like add
  * data. Should then be added to linux/oprofile.h.
  */
-extern void
-oprofile_add_ibs_sample(struct pt_regs * const regs,
-			unsigned int * const ibs_sample, int ibs_code);
-
-struct ibs_fetch_sample {
-	/* MSRC001_1031 IBS Fetch Linear Address Register */
-	unsigned int ibs_fetch_lin_addr_low;
-	unsigned int ibs_fetch_lin_addr_high;
-	/* MSRC001_1030 IBS Fetch Control Register */
-	unsigned int ibs_fetch_ctl_low;
-	unsigned int ibs_fetch_ctl_high;
-	/* MSRC001_1032 IBS Fetch Physical Address Register */
-	unsigned int ibs_fetch_phys_addr_low;
-	unsigned int ibs_fetch_phys_addr_high;
-};
+extern
+void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs,
+		       unsigned long pc, int code, int size);
 
-struct ibs_op_sample {
-	/* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */
-	unsigned int ibs_op_rip_low;
-	unsigned int ibs_op_rip_high;
-	/* MSRC001_1035 IBS Op Data Register */
-	unsigned int ibs_op_data1_low;
-	unsigned int ibs_op_data1_high;
-	/* MSRC001_1036 IBS Op Data 2 Register */
-	unsigned int ibs_op_data2_low;
-	unsigned int ibs_op_data2_high;
-	/* MSRC001_1037 IBS Op Data 3 Register */
-	unsigned int ibs_op_data3_low;
-	unsigned int ibs_op_data3_high;
-	/* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */
-	unsigned int ibs_dc_linear_low;
-	unsigned int ibs_dc_linear_high;
-	/* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */
-	unsigned int ibs_dc_phys_low;
-	unsigned int ibs_dc_phys_high;
-};
+#define IBS_FETCH_SIZE	6
+#define IBS_OP_SIZE	12
 
 static int has_ibs;	/* AMD Family10h and later */
 
@@ -197,9 +163,9 @@ static inline int
 op_amd_handle_ibs(struct pt_regs * const regs,
 		  struct op_msrs const * const msrs)
 {
-	unsigned int low, high;
-	struct ibs_fetch_sample ibs_fetch;
-	struct ibs_op_sample ibs_op;
+	u32 low, high;
+	u64 msr;
+	struct op_entry entry;
 
 	if (!has_ibs)
 		return 1;
@@ -207,21 +173,19 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 	if (ibs_config.fetch_enabled) {
 		rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
 		if (high & IBS_FETCH_HIGH_VALID_BIT) {
-			ibs_fetch.ibs_fetch_ctl_high = high;
-			ibs_fetch.ibs_fetch_ctl_low = low;
-			rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high);
-			ibs_fetch.ibs_fetch_lin_addr_high = high;
-			ibs_fetch.ibs_fetch_lin_addr_low = low;
-			rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high);
-			ibs_fetch.ibs_fetch_phys_addr_high = high;
-			ibs_fetch.ibs_fetch_phys_addr_low = low;
-
-			oprofile_add_ibs_sample(regs,
-						(unsigned int *)&ibs_fetch,
-						IBS_FETCH_BEGIN);
+			rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr);
+			oprofile_add_data(&entry, regs, msr, IBS_FETCH_CODE,
+					  IBS_FETCH_SIZE);
+			op_cpu_buffer_add_data(&entry, (u32)msr);
+			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+			op_cpu_buffer_add_data(&entry, low);
+			op_cpu_buffer_add_data(&entry, high);
+			rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr);
+			op_cpu_buffer_add_data(&entry, (u32)msr);
+			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+			op_cpu_buffer_write_commit(&entry);
 
 			/* reenable the IRQ */
-			rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
 			high &= ~IBS_FETCH_HIGH_VALID_BIT;
 			high |= IBS_FETCH_HIGH_ENABLE;
 			low &= IBS_FETCH_LOW_MAX_CNT_MASK;
@@ -232,30 +196,29 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 	if (ibs_config.op_enabled) {
 		rdmsr(MSR_AMD64_IBSOPCTL, low, high);
 		if (low & IBS_OP_LOW_VALID_BIT) {
-			rdmsr(MSR_AMD64_IBSOPRIP, low, high);
-			ibs_op.ibs_op_rip_low = low;
-			ibs_op.ibs_op_rip_high = high;
-			rdmsr(MSR_AMD64_IBSOPDATA, low, high);
-			ibs_op.ibs_op_data1_low = low;
-			ibs_op.ibs_op_data1_high = high;
-			rdmsr(MSR_AMD64_IBSOPDATA2, low, high);
-			ibs_op.ibs_op_data2_low = low;
-			ibs_op.ibs_op_data2_high = high;
-			rdmsr(MSR_AMD64_IBSOPDATA3, low, high);
-			ibs_op.ibs_op_data3_low = low;
-			ibs_op.ibs_op_data3_high = high;
-			rdmsr(MSR_AMD64_IBSDCLINAD, low, high);
-			ibs_op.ibs_dc_linear_low = low;
-			ibs_op.ibs_dc_linear_high = high;
-			rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high);
-			ibs_op.ibs_dc_phys_low = low;
-			ibs_op.ibs_dc_phys_high = high;
+			rdmsrl(MSR_AMD64_IBSOPRIP, msr);
+			oprofile_add_data(&entry, regs, msr, IBS_OP_CODE,
+					  IBS_OP_SIZE);
+			op_cpu_buffer_add_data(&entry, (u32)msr);
+			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+			rdmsrl(MSR_AMD64_IBSOPDATA, msr);
+			op_cpu_buffer_add_data(&entry, (u32)msr);
+			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+			rdmsrl(MSR_AMD64_IBSOPDATA2, msr);
+			op_cpu_buffer_add_data(&entry, (u32)msr);
+			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+			rdmsrl(MSR_AMD64_IBSOPDATA3, msr);
+			op_cpu_buffer_add_data(&entry, (u32)msr);
+			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+			rdmsrl(MSR_AMD64_IBSDCLINAD, msr);
+			op_cpu_buffer_add_data(&entry, (u32)msr);
+			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+			rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr);
+			op_cpu_buffer_add_data(&entry, (u32)msr);
+			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+			op_cpu_buffer_write_commit(&entry);
 
 			/* reenable the IRQ */
-			oprofile_add_ibs_sample(regs,
-						(unsigned int *)&ibs_op,
-						IBS_OP_BEGIN);
-			rdmsr(MSR_AMD64_IBSOPCTL, low, high);
 			high = 0;
 			low &= ~IBS_OP_LOW_VALID_BIT;
 			low |= IBS_OP_LOW_ENABLE;
-- 
cgit v1.2.3


From 14f0ca8eaea42a5b5a69cfcb699665dd2618db5f Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 7 Jan 2009 21:50:22 +0100
Subject: oprofile: make new cpu buffer functions part of the api

This patch creates the new functions

 oprofile_write_reserve()
 oprofile_add_data()
 oprofile_write_commit()

and makes them part of the oprofile api.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 57 +++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 33 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index cf310aeb462..8fdf06e4edf 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -22,7 +22,6 @@
 
 #include "op_x86_model.h"
 #include "op_counter.h"
-#include "../../../drivers/oprofile/cpu_buffer.h"
 
 #define NUM_COUNTERS 4
 #define NUM_CONTROLS 4
@@ -61,14 +60,6 @@ static unsigned long reset_value[NUM_COUNTERS];
 #define IBS_OP_LOW_VALID_BIT		(1ULL<<18)	/* bit 18 */
 #define IBS_OP_LOW_ENABLE		(1ULL<<17)	/* bit 17 */
 
-/*
- * The function interface needs to be fixed, something like add
- * data. Should then be added to linux/oprofile.h.
- */
-extern
-void oprofile_add_data(struct op_entry *entry, struct pt_regs * const regs,
-		       unsigned long pc, int code, int size);
-
 #define IBS_FETCH_SIZE	6
 #define IBS_OP_SIZE	12
 
@@ -174,16 +165,16 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 		rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
 		if (high & IBS_FETCH_HIGH_VALID_BIT) {
 			rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr);
-			oprofile_add_data(&entry, regs, msr, IBS_FETCH_CODE,
-					  IBS_FETCH_SIZE);
-			op_cpu_buffer_add_data(&entry, (u32)msr);
-			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
-			op_cpu_buffer_add_data(&entry, low);
-			op_cpu_buffer_add_data(&entry, high);
+			oprofile_write_reserve(&entry, regs, msr,
+					       IBS_FETCH_CODE, IBS_FETCH_SIZE);
+			oprofile_add_data(&entry, (u32)msr);
+			oprofile_add_data(&entry, (u32)(msr >> 32));
+			oprofile_add_data(&entry, low);
+			oprofile_add_data(&entry, high);
 			rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr);
-			op_cpu_buffer_add_data(&entry, (u32)msr);
-			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
-			op_cpu_buffer_write_commit(&entry);
+			oprofile_add_data(&entry, (u32)msr);
+			oprofile_add_data(&entry, (u32)(msr >> 32));
+			oprofile_write_commit(&entry);
 
 			/* reenable the IRQ */
 			high &= ~IBS_FETCH_HIGH_VALID_BIT;
@@ -197,26 +188,26 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 		rdmsr(MSR_AMD64_IBSOPCTL, low, high);
 		if (low & IBS_OP_LOW_VALID_BIT) {
 			rdmsrl(MSR_AMD64_IBSOPRIP, msr);
-			oprofile_add_data(&entry, regs, msr, IBS_OP_CODE,
-					  IBS_OP_SIZE);
-			op_cpu_buffer_add_data(&entry, (u32)msr);
-			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+			oprofile_write_reserve(&entry, regs, msr,
+					       IBS_OP_CODE, IBS_OP_SIZE);
+			oprofile_add_data(&entry, (u32)msr);
+			oprofile_add_data(&entry, (u32)(msr >> 32));
 			rdmsrl(MSR_AMD64_IBSOPDATA, msr);
-			op_cpu_buffer_add_data(&entry, (u32)msr);
-			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+			oprofile_add_data(&entry, (u32)msr);
+			oprofile_add_data(&entry, (u32)(msr >> 32));
 			rdmsrl(MSR_AMD64_IBSOPDATA2, msr);
-			op_cpu_buffer_add_data(&entry, (u32)msr);
-			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+			oprofile_add_data(&entry, (u32)msr);
+			oprofile_add_data(&entry, (u32)(msr >> 32));
 			rdmsrl(MSR_AMD64_IBSOPDATA3, msr);
-			op_cpu_buffer_add_data(&entry, (u32)msr);
-			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+			oprofile_add_data(&entry, (u32)msr);
+			oprofile_add_data(&entry, (u32)(msr >> 32));
 			rdmsrl(MSR_AMD64_IBSDCLINAD, msr);
-			op_cpu_buffer_add_data(&entry, (u32)msr);
-			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
+			oprofile_add_data(&entry, (u32)msr);
+			oprofile_add_data(&entry, (u32)(msr >> 32));
 			rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr);
-			op_cpu_buffer_add_data(&entry, (u32)msr);
-			op_cpu_buffer_add_data(&entry, (u32)(msr >> 32));
-			op_cpu_buffer_write_commit(&entry);
+			oprofile_add_data(&entry, (u32)msr);
+			oprofile_add_data(&entry, (u32)(msr >> 32));
+			oprofile_write_commit(&entry);
 
 			/* reenable the IRQ */
 			high = 0;
-- 
cgit v1.2.3


From 9b4778f680aa79d838ae2be6ab958938f744ce5f Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 7 Jan 2009 14:42:41 -0800
Subject: trivial: replace last usages of __FUNCTION__ in kernel

__FUNCTION__ is gcc-specific, use __func__

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/numa_32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 8518c678d83..d1f7439d173 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -239,7 +239,7 @@ void resume_map_numa_kva(pgd_t *pgd_base)
 		start_pfn = node_remap_start_pfn[node];
 		size = node_remap_size[node];
 
-		printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node);
+		printk(KERN_DEBUG "%s: node %d\n", __func__, node);
 
 		for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
 			unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
@@ -251,7 +251,7 @@ void resume_map_numa_kva(pgd_t *pgd_base)
 						PAGE_KERNEL_LARGE_EXEC));
 
 			printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
-				__FUNCTION__, vaddr, start_pfn + pfn);
+				__func__, vaddr, start_pfn + pfn);
 		}
 	}
 }
-- 
cgit v1.2.3


From c19a28e1193a6c854738d609ae9b2fe2f6e6bea4 Mon Sep 17 00:00:00 2001
From: Fernando Carrijo <fcarrijo@yahoo.com.br>
Date: Wed, 7 Jan 2009 18:09:08 -0800
Subject: remove lots of double-semicolons

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Theodore Ts'o <tytso@mit.edu>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/cpu/cpufreq/longhaul.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index b0461856acf..a4cff5d6e38 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -982,7 +982,7 @@ static int __init longhaul_init(void)
 	case 10:
 		printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n");
 	default:
-		;;
+		;
 	}
 
 	return -ENODEV;
-- 
cgit v1.2.3


From 13b40a1a065824d2d4e55c8b48ea9f3f9d162929 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Sun, 4 Jan 2009 12:04:21 +0800
Subject: ACPI: Avoid array address overflow when _CST MWAIT hint bits are set

The Cx Register address obtained from the _CST object is used as the MWAIT
hints if the register type is FFixedHW. And it is used to check whether
the Cx type is supported or not.

On some boxes the following Cx state package is obtained from _CST object:
    >{
                ResourceTemplate ()
                {
                    Register (FFixedHW,
                        0x01,               // Bit Width
                        0x02,               // Bit Offset
                        0x0000000000889759, // Address
                        0x03,               // Access Size
                        )
                },

                0x03,
                0xF5,
                0x015E }

   In such case we should use the bit[7:4] of Cx address to check whether
the Cx type is supported or not.

mask the MWAIT hint to avoid array address overflow

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Acked-by:Venki Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/cstate.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index c2502eb9aa8..a4805b3b409 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -56,6 +56,7 @@ static struct cstate_entry *cpu_cstate_entry;	/* per CPU ptr */
 static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
 
 #define MWAIT_SUBSTATE_MASK	(0xf)
+#define MWAIT_CSTATE_MASK	(0xf)
 #define MWAIT_SUBSTATE_SIZE	(4)
 
 #define CPUID_MWAIT_LEAF (5)
@@ -98,7 +99,8 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
 
 	/* Check whether this particular cx_type (in CST) is supported or not */
-	cstate_type = (cx->address >> MWAIT_SUBSTATE_SIZE) + 1;
+	cstate_type = ((cx->address >> MWAIT_SUBSTATE_SIZE) &
+			MWAIT_CSTATE_MASK) + 1;
 	edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
 	num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
 
-- 
cgit v1.2.3


From 237889bf0a62f1399fb2ba0c2a259e6a96597131 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Wed, 17 Dec 2008 16:55:18 +0800
Subject: ACPI : Use RSDT instead of XSDT by adding boot option of "acpi=rsdt"

On some boxes there exist both RSDT and XSDT table. But unfortunately
sometimes there exists the following error when XSDT table is used:
   a. 32/64X address mismatch
   b. The 32/64X FACS address mismatch

   In such case the boot option of "acpi=rsdt" is provided so that
RSDT is tried instead of XSDT table when the system can't work well.

http://bugzilla.kernel.org/show_bug.cgi?id=8246

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
cc:Thomas Renninger <trenn@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/boot.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4c51a2f8fd3..db1a90a76b3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -47,7 +47,7 @@
 #endif
 
 static int __initdata acpi_force = 0;
-
+u32 acpi_rsdt_forced;
 #ifdef	CONFIG_ACPI
 int acpi_disabled = 0;
 #else
@@ -1783,6 +1783,10 @@ static int __init parse_acpi(char *arg)
 			disable_acpi();
 		acpi_ht = 1;
 	}
+	/* acpi=rsdt use RSDT instead of XSDT */
+	else if (strcmp(arg, "rsdt") == 0) {
+		acpi_rsdt_forced = 1;
+	}
 	/* "acpi=noirq" disables ACPI interrupt routing */
 	else if (strcmp(arg, "noirq") == 0) {
 		acpi_noirq_set();
-- 
cgit v1.2.3


From 8659c406ade32f47da2c95889094801921d6330a Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 9 Jan 2009 12:17:39 -0800
Subject: x86: only scan the root bus in early PCI quirks

We found a situation on Linus' machine that the Nvidia timer quirk hit on
a Intel chipset system.  The problem is that the system has a fancy Nvidia
card with an own PCI bridge, and the early-quirks code looking for any
NVidia bridge triggered on it incorrectly.  This didn't lead a boot
failure by luck, but the timer routing code selecting the wrong timer
first and some ugly messages.  It might lead to real problems on other
systems.

I checked all the devices which are currently checked for by early_quirks
and it turns out they are all located in the root bus zero.

So change the early-quirks loop to only scan bus 0.  This incidently also
saves quite some unnecessary scanning work, because early_quirks doesn't
go through all the non root busses.

The graphics card is not on bus 0, so it is not matched anymore.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/early-quirks.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 744aa7fc49d..76b8cd953de 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -201,6 +201,12 @@ struct chipset {
 	void (*f)(int num, int slot, int func);
 };
 
+/*
+ * Only works for devices on the root bus. If you add any devices
+ * not on bus 0 readd another loop level in early_quirks(). But
+ * be careful because at least the Nvidia quirk here relies on
+ * only matching on bus 0.
+ */
 static struct chipset early_qrk[] __initdata = {
 	{ PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
 	  PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
@@ -267,17 +273,17 @@ static int __init check_dev_quirk(int num, int slot, int func)
 
 void __init early_quirks(void)
 {
-	int num, slot, func;
+	int slot, func;
 
 	if (!early_pci_allowed())
 		return;
 
 	/* Poor man's PCI discovery */
-	for (num = 0; num < 32; num++)
-		for (slot = 0; slot < 32; slot++)
-			for (func = 0; func < 8; func++) {
-				/* Only probe function 0 on single fn devices */
-				if (check_dev_quirk(num, slot, func))
-					break;
-			}
+	/* Only scan the root bus */
+	for (slot = 0; slot < 32; slot++)
+		for (func = 0; func < 8; func++) {
+			/* Only probe function 0 on single fn devices */
+			if (check_dev_quirk(0, slot, func))
+				break;
+		}
 }
-- 
cgit v1.2.3


From c4295fbb6048d85f0b41c5ced5cbf63f6811c46c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 9 Jan 2009 12:49:50 -0800
Subject: x86: make 'constant_test_bit()' take an unsigned bit number

Ingo noticed that using signed arithmetic seems to confuse the gcc
inliner, and make it potentially decide that it's all too complicated.

(Yeah, yeah, it's a constant. It's always positive. Still..)

Based-on: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/bitops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 9fa9dcdf344..e02a359d2aa 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -300,7 +300,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
 	return oldbit;
 }
 
-static inline int constant_test_bit(int nr, const volatile unsigned long *addr)
+static inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
 {
 	return ((1UL << (nr % BITS_PER_LONG)) &
 		(((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
-- 
cgit v1.2.3