aboutsummaryrefslogtreecommitdiff
path: root/arch/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/boot/Makefile2
-rw-r--r--arch/x86_64/boot/compressed/misc.c6
-rw-r--r--arch/x86_64/defconfig65
-rw-r--r--arch/x86_64/ia32/ia32entry.S51
-rw-r--r--arch/x86_64/ia32/sys_ia32.c2
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/acpi/sleep.c1
-rw-r--r--arch/x86_64/kernel/aperture.c2
-rw-r--r--arch/x86_64/kernel/apic.c42
-rw-r--r--arch/x86_64/kernel/asm-offsets.c1
-rw-r--r--arch/x86_64/kernel/crash.c1
-rw-r--r--arch/x86_64/kernel/e820.c2
-rw-r--r--arch/x86_64/kernel/early_printk.c46
-rw-r--r--arch/x86_64/kernel/entry.S222
-rw-r--r--arch/x86_64/kernel/genapic.c18
-rw-r--r--arch/x86_64/kernel/genapic_cluster.c4
-rw-r--r--arch/x86_64/kernel/genapic_flat.c12
-rw-r--r--arch/x86_64/kernel/head.S2
-rw-r--r--arch/x86_64/kernel/i8259.c23
-rw-r--r--arch/x86_64/kernel/init_task.c2
-rw-r--r--arch/x86_64/kernel/io_apic.c34
-rw-r--r--arch/x86_64/kernel/irq.c1
-rw-r--r--arch/x86_64/kernel/mce.c42
-rw-r--r--arch/x86_64/kernel/mpparse.c5
-rw-r--r--arch/x86_64/kernel/msr.c279
-rw-r--r--arch/x86_64/kernel/nmi.c5
-rw-r--r--arch/x86_64/kernel/pci-gart.c8
-rw-r--r--arch/x86_64/kernel/process.c38
-rw-r--r--arch/x86_64/kernel/setup.c87
-rw-r--r--arch/x86_64/kernel/setup64.c3
-rw-r--r--arch/x86_64/kernel/smp.c129
-rw-r--r--arch/x86_64/kernel/smpboot.c47
-rw-r--r--arch/x86_64/kernel/suspend.c17
-rw-r--r--arch/x86_64/kernel/time.c2
-rw-r--r--arch/x86_64/kernel/traps.c57
-rw-r--r--arch/x86_64/kernel/vsyscall.c8
-rw-r--r--arch/x86_64/mm/fault.c10
-rw-r--r--arch/x86_64/mm/init.c79
-rw-r--r--arch/x86_64/mm/k8topology.c13
-rw-r--r--arch/x86_64/mm/numa.c9
-rw-r--r--arch/x86_64/mm/srat.c68
-rw-r--r--arch/x86_64/pci/k8-bus.c10
-rw-r--r--arch/x86_64/pci/mmconfig.c7
43 files changed, 714 insertions, 749 deletions
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile
index f4399c701b7..18c6e915d69 100644
--- a/arch/x86_64/boot/Makefile
+++ b/arch/x86_64/boot/Makefile
@@ -46,7 +46,7 @@ cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \
$(obj)/bzImage: $(obj)/bootsect $(obj)/setup \
$(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image)
- @echo 'Kernel: $@ is ready'
+ @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
$(call if_changed,objcopy)
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
index b38d5b8b5fb..0e10fd84c7c 100644
--- a/arch/x86_64/boot/compressed/misc.c
+++ b/arch/x86_64/boot/compressed/misc.c
@@ -83,7 +83,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */
#endif
#define SCREEN_INFO (*(struct screen_info *)(real_mode+0))
-extern char input_data[];
+extern unsigned char input_data[];
extern int input_len;
static long bytes_out = 0;
@@ -288,7 +288,7 @@ void setup_normal_output_buffer(void)
#else
if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024) error("Less than 2MB of memory");
#endif
- output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */
+ output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
free_mem_end_ptr = (long)real_mode;
}
@@ -305,7 +305,7 @@ void setup_output_buffer_if_we_run_high(struct moveparams *mv)
#else
if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
#endif
- mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START;
+ mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
low_buffer_size = low_buffer_end - LOW_BUFFER_START;
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index bf57e2362bf..f8db7e500fb 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,11 +1,12 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.13-rc6-git3
-# Fri Aug 12 16:40:34 2005
+# Linux kernel version: 2.6.13-git11
+# Mon Sep 12 16:16:16 2005
#
CONFIG_X86_64=y
CONFIG_64BIT=y
CONFIG_X86=y
+CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_MMU=y
CONFIG_RWSEM_GENERIC_SPINLOCK=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
@@ -13,6 +14,7 @@ CONFIG_X86_CMPXCHG=y
CONFIG_EARLY_PRINTK=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
#
# Code maturity level options
@@ -26,6 +28,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32
# General setup
#
CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -37,6 +40,7 @@ CONFIG_KOBJECT_UEVENT=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
# CONFIG_CPUSETS is not set
+CONFIG_INITRAMFS_SOURCE=""
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
@@ -102,6 +106,7 @@ CONFIG_DISCONTIGMEM_MANUAL=y
CONFIG_DISCONTIGMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_NEED_MULTIPLE_NODES=y
+# CONFIG_SPARSEMEM_STATIC is not set
CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
CONFIG_HAVE_DEC_LOCK=y
CONFIG_NR_CPUS=32
@@ -122,6 +127,7 @@ CONFIG_HZ=250
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_ISA_DMA_API=y
+CONFIG_GENERIC_PENDING_IRQ=y
#
# Power management options
@@ -194,7 +200,6 @@ CONFIG_UNORDERED_IO=y
# CONFIG_PCIEPORTBUS is not set
CONFIG_PCI_MSI=y
# CONFIG_PCI_LEGACY_PROC is not set
-# CONFIG_PCI_NAMES is not set
# CONFIG_PCI_DEBUG is not set
#
@@ -234,7 +239,10 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
# CONFIG_IP_ADVANCED_ROUTER is not set
CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE is not set
# CONFIG_IP_MROUTE is not set
@@ -244,8 +252,8 @@ CONFIG_IP_FIB_HASH=y
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
# CONFIG_INET_TUNNEL is not set
-CONFIG_IP_TCPDIAG=y
-CONFIG_IP_TCPDIAG_IPV6=y
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
# CONFIG_TCP_CONG_ADVANCED is not set
CONFIG_TCP_CONG_BIC=y
CONFIG_IPV6=y
@@ -258,6 +266,11 @@ CONFIG_IPV6=y
# CONFIG_NETFILTER is not set
#
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
+#
# SCTP Configuration (EXPERIMENTAL)
#
# CONFIG_IP_SCTP is not set
@@ -280,9 +293,11 @@ CONFIG_IPV6=y
# Network testing
#
# CONFIG_NET_PKTGEN is not set
+# CONFIG_NETFILTER_NETLINK is not set
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
#
# Device Drivers
@@ -329,7 +344,6 @@ CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=4096
CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
CONFIG_LBD=y
# CONFIG_CDROM_PKTCDVD is not set
@@ -409,6 +423,7 @@ CONFIG_IDEDMA_AUTO=y
#
# SCSI device support
#
+# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
# CONFIG_SCSI_PROC_FS is not set
@@ -432,7 +447,7 @@ CONFIG_BLK_DEV_SD=y
#
# SCSI Transport Attributes
#
-# CONFIG_SCSI_SPI_ATTRS is not set
+CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_FC_ATTRS is not set
# CONFIG_SCSI_ISCSI_ATTRS is not set
@@ -458,6 +473,7 @@ CONFIG_SCSI_SATA=y
# CONFIG_SCSI_SATA_AHCI is not set
# CONFIG_SCSI_SATA_SVW is not set
CONFIG_SCSI_ATA_PIIX=y
+# CONFIG_SCSI_SATA_MV is not set
# CONFIG_SCSI_SATA_NV is not set
# CONFIG_SCSI_SATA_PROMISE is not set
# CONFIG_SCSI_SATA_QSTOR is not set
@@ -537,6 +553,11 @@ CONFIG_TUN=y
# CONFIG_ARCNET is not set
#
+# PHY device support
+#
+# CONFIG_PHYLIB is not set
+
+#
# Ethernet (10 or 100Mbit)
#
CONFIG_NET_ETHERNET=y
@@ -586,6 +607,7 @@ CONFIG_E1000=y
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
# CONFIG_R8169 is not set
+# CONFIG_SIS190 is not set
# CONFIG_SKGE is not set
# CONFIG_SK98LIN is not set
# CONFIG_VIA_VELOCITY is not set
@@ -595,6 +617,7 @@ CONFIG_TIGON3=y
#
# Ethernet (10000 Mbit)
#
+# CONFIG_CHELSIO_T1 is not set
# CONFIG_IXGB is not set
CONFIG_S2IO=m
# CONFIG_S2IO_NAPI is not set
@@ -749,7 +772,6 @@ CONFIG_MAX_RAW_DEVS=256
# I2C support
#
# CONFIG_I2C is not set
-# CONFIG_I2C_SENSOR is not set
#
# Dallas's 1-wire bus
@@ -760,6 +782,7 @@ CONFIG_MAX_RAW_DEVS=256
# Hardware Monitoring support
#
CONFIG_HWMON=y
+# CONFIG_HWMON_VID is not set
# CONFIG_HWMON_DEBUG_CHIP is not set
#
@@ -768,6 +791,10 @@ CONFIG_HWMON=y
# CONFIG_IBM_ASM is not set
#
+# Multimedia Capabilities Port drivers
+#
+
+#
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set
@@ -858,9 +885,8 @@ CONFIG_USB_UHCI_HCD=y
#
# USB Device Class drivers
#
-# CONFIG_USB_AUDIO is not set
+# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
# CONFIG_USB_BLUETOOTH_TTY is not set
-# CONFIG_USB_MIDI is not set
# CONFIG_USB_ACM is not set
CONFIG_USB_PRINTER=y
@@ -877,6 +903,7 @@ CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_SDDR09 is not set
# CONFIG_USB_STORAGE_SDDR55 is not set
# CONFIG_USB_STORAGE_JUMPSHOT is not set
+# CONFIG_USB_STORAGE_ONETOUCH is not set
#
# USB Input Devices
@@ -893,6 +920,7 @@ CONFIG_USB_HIDINPUT=y
# CONFIG_USB_MTOUCH is not set
# CONFIG_USB_ITMTOUCH is not set
# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_YEALINK is not set
# CONFIG_USB_XPAD is not set
# CONFIG_USB_ATI_REMOTE is not set
# CONFIG_USB_KEYSPAN_REMOTE is not set
@@ -976,6 +1004,8 @@ CONFIG_USB_MON=y
# Firmware Drivers
#
# CONFIG_EDD is not set
+# CONFIG_DELL_RBU is not set
+CONFIG_DCDBAS=m
#
# File systems
@@ -1000,10 +1030,6 @@ CONFIG_REISERFS_FS_POSIX_ACL=y
# CONFIG_REISERFS_FS_SECURITY is not set
# CONFIG_JFS_FS is not set
CONFIG_FS_POSIX_ACL=y
-
-#
-# XFS support
-#
# CONFIG_XFS_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_ROMFS_FS is not set
@@ -1012,6 +1038,7 @@ CONFIG_INOTIFY=y
CONFIG_DNOTIFY=y
CONFIG_AUTOFS_FS=y
# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
#
# CD-ROM/DVD Filesystems
@@ -1037,12 +1064,11 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_SYSFS=y
-# CONFIG_DEVPTS_FS_XATTR is not set
CONFIG_TMPFS=y
-# CONFIG_TMPFS_XATTR is not set
CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y
CONFIG_RAMFS=y
+# CONFIG_RELAYFS_FS is not set
#
# Miscellaneous filesystems
@@ -1074,6 +1100,7 @@ CONFIG_NFSD_V3=y
# CONFIG_NFSD_V3_ACL is not set
# CONFIG_NFSD_V4 is not set
CONFIG_NFSD_TCP=y
+CONFIG_ROOT_NFS=y
CONFIG_LOCKD=y
CONFIG_LOCKD_V4=y
CONFIG_EXPORTFS=y
@@ -1086,6 +1113,7 @@ CONFIG_SUNRPC=y
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
#
# Partition Types
@@ -1150,6 +1178,7 @@ CONFIG_OPROFILE=y
CONFIG_DEBUG_KERNEL=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_LOG_BUF_SHIFT=18
+CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_SCHEDSTATS is not set
# CONFIG_DEBUG_SLAB is not set
# CONFIG_DEBUG_SPINLOCK is not set
@@ -1157,6 +1186,7 @@ CONFIG_LOG_BUF_SHIFT=18
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_INFO is not set
CONFIG_DEBUG_FS=y
+# CONFIG_FRAME_POINTER is not set
CONFIG_INIT_DEBUG=y
# CONFIG_IOMMU_DEBUG is not set
CONFIG_KPROBES=y
@@ -1180,5 +1210,6 @@ CONFIG_KPROBES=y
# Library routines
#
# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
CONFIG_CRC32=y
# CONFIG_LIBCRC32C is not set
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 5244f803203..e0eb0c712fe 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -55,20 +55,34 @@
* with the int 0x80 path.
*/
ENTRY(ia32_sysenter_target)
- CFI_STARTPROC
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,0
+ CFI_REGISTER rsp,rbp
swapgs
movq %gs:pda_kernelstack, %rsp
addq $(PDA_STACKOFFSET),%rsp
sti
movl %ebp,%ebp /* zero extension */
pushq $__USER32_DS
+ CFI_ADJUST_CFA_OFFSET 8
+ /*CFI_REL_OFFSET ss,0*/
pushq %rbp
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rsp,0
pushfq
+ CFI_ADJUST_CFA_OFFSET 8
+ /*CFI_REL_OFFSET rflags,0*/
movl $VSYSCALL32_SYSEXIT, %r10d
+ CFI_REGISTER rip,r10
pushq $__USER32_CS
+ CFI_ADJUST_CFA_OFFSET 8
+ /*CFI_REL_OFFSET cs,0*/
movl %eax, %eax
pushq %r10
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rip,0
pushq %rax
+ CFI_ADJUST_CFA_OFFSET 8
cld
SAVE_ARGS 0,0,1
/* no need to do an access_ok check here because rbp has been
@@ -79,6 +93,7 @@ ENTRY(ia32_sysenter_target)
.previous
GET_THREAD_INFO(%r10)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
+ CFI_REMEMBER_STATE
jnz sysenter_tracesys
sysenter_do_call:
cmpl $(IA32_NR_syscalls),%eax
@@ -94,14 +109,20 @@ sysenter_do_call:
andl $~0x200,EFLAGS-R11(%rsp)
RESTORE_ARGS 1,24,1,1,1,1
popfq
+ CFI_ADJUST_CFA_OFFSET -8
+ /*CFI_RESTORE rflags*/
popq %rcx /* User %esp */
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_REGISTER rsp,rcx
movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */
+ CFI_REGISTER rip,rdx
swapgs
sti /* sti only takes effect after the next instruction */
/* sysexit */
.byte 0xf, 0x35
sysenter_tracesys:
+ CFI_RESTORE_STATE
SAVE_REST
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* really needed? */
@@ -140,21 +161,28 @@ sysenter_tracesys:
* with the int 0x80 path.
*/
ENTRY(ia32_cstar_target)
- CFI_STARTPROC
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,0
+ CFI_REGISTER rip,rcx
+ /*CFI_REGISTER rflags,r11*/
swapgs
movl %esp,%r8d
+ CFI_REGISTER rsp,r8
movq %gs:pda_kernelstack,%rsp
sti
SAVE_ARGS 8,1,1
movl %eax,%eax /* zero extension */
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
+ CFI_REL_OFFSET rip,RIP-ARGOFFSET
movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
movl %ebp,%ecx
movq $__USER32_CS,CS-ARGOFFSET(%rsp)
movq $__USER32_DS,SS-ARGOFFSET(%rsp)
movq %r11,EFLAGS-ARGOFFSET(%rsp)
+ /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
movq %r8,RSP-ARGOFFSET(%rsp)
+ CFI_REL_OFFSET rsp,RSP-ARGOFFSET
/* no need to do an access_ok check here because r8 has been
32bit zero extended */
/* hardware stack frame is complete now */
@@ -164,6 +192,7 @@ ENTRY(ia32_cstar_target)
.previous
GET_THREAD_INFO(%r10)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
+ CFI_REMEMBER_STATE
jnz cstar_tracesys
cstar_do_call:
cmpl $IA32_NR_syscalls,%eax
@@ -177,12 +206,16 @@ cstar_do_call:
jnz int_ret_from_sys_call
RESTORE_ARGS 1,-ARG_SKIP,1,1,1
movl RIP-ARGOFFSET(%rsp),%ecx
+ CFI_REGISTER rip,rcx
movl EFLAGS-ARGOFFSET(%rsp),%r11d
+ /*CFI_REGISTER rflags,r11*/
movl RSP-ARGOFFSET(%rsp),%esp
+ CFI_RESTORE rsp
swapgs
sysretl
cstar_tracesys:
+ CFI_RESTORE_STATE
SAVE_REST
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* really needed? */
@@ -226,11 +259,18 @@ ia32_badarg:
*/
ENTRY(ia32_syscall)
- CFI_STARTPROC
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,SS+8-RIP
+ /*CFI_REL_OFFSET ss,SS-RIP*/
+ CFI_REL_OFFSET rsp,RSP-RIP
+ /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
+ /*CFI_REL_OFFSET cs,CS-RIP*/
+ CFI_REL_OFFSET rip,RIP-RIP
swapgs
sti
movl %eax,%eax
pushq %rax
+ CFI_ADJUST_CFA_OFFSET 8
cld
/* note the registers are not zero extended to the sf.
this could be a problem. */
@@ -278,6 +318,8 @@ quiet_ni_syscall:
jmp ia32_ptregs_common
.endm
+ CFI_STARTPROC
+
PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
@@ -290,8 +332,9 @@ quiet_ni_syscall:
PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx
ENTRY(ia32_ptregs_common)
- CFI_STARTPROC
popq %r11
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_REGISTER rip, r11
SAVE_REST
call *%rax
RESTORE_REST
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index 04d80406ce4..5389df610e7 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -751,7 +751,7 @@ sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count)
ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count);
set_fs(old_fs);
- if (!ret && offset && put_user(of, offset))
+ if (offset && put_user(of, offset))
return -EFAULT;
return ret;
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 1579bdd0adc..bcdd0a805fe 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -46,3 +46,4 @@ microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o
intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
quirks-y += ../../i386/kernel/quirks.o
i8237-y += ../../i386/kernel/i8237.o
+msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c
index 148f6f7ea31..867a0ebee17 100644
--- a/arch/x86_64/kernel/acpi/sleep.c
+++ b/arch/x86_64/kernel/acpi/sleep.c
@@ -34,7 +34,6 @@
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/bootmem.h>
-#include <linux/irq.h>
#include <linux/acpi.h>
#include <asm/mpspec.h>
#include <asm/io.h>
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index c9a6b812e92..962ad4823b6 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -245,6 +245,8 @@ void __init iommu_hole_init(void)
if (aper_alloc) {
/* Got the aperture from the AGP bridge */
+ } else if (swiotlb && !valid_agp) {
+ /* Do nothing */
} else if ((!no_iommu && end_pfn >= 0xffffffff>>PAGE_SHIFT) ||
force_iommu ||
valid_agp ||
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 375d369570c..b6e7715d877 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -18,7 +18,6 @@
#include <linux/init.h>
#include <linux/mm.h>
-#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/bootmem.h>
#include <linux/smp_lock.h>
@@ -109,11 +108,8 @@ void clear_local_APIC(void)
if (maxlvt >= 4)
apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
v = GET_APIC_VERSION(apic_read(APIC_LVR));
- if (APIC_INTEGRATED(v)) { /* !82489DX */
- if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
}
void __init connect_bsp_APIC(void)
@@ -316,8 +312,6 @@ void __init init_bsp_APIC(void)
*/
apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
value = APIC_DM_NMI;
- if (!APIC_INTEGRATED(ver)) /* 82489DX */
- value |= APIC_LVT_LEVEL_TRIGGER;
apic_write_around(APIC_LVT1, value);
}
@@ -325,14 +319,6 @@ void __cpuinit setup_local_APIC (void)
{
unsigned int value, ver, maxlvt;
- /* Pound the ESR really hard over the head with a big hammer - mbligh */
- if (esr_disable) {
- apic_write(APIC_ESR, 0);
- apic_write(APIC_ESR, 0);
- apic_write(APIC_ESR, 0);
- apic_write(APIC_ESR, 0);
- }
-
value = apic_read(APIC_LVR);
ver = GET_APIC_VERSION(value);
@@ -430,15 +416,11 @@ void __cpuinit setup_local_APIC (void)
value = APIC_DM_NMI;
else
value = APIC_DM_NMI | APIC_LVT_MASKED;
- if (!APIC_INTEGRATED(ver)) /* 82489DX */
- value |= APIC_LVT_LEVEL_TRIGGER;
apic_write_around(APIC_LVT1, value);
- if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */
+ {
unsigned oldvalue;
maxlvt = get_maxlvt();
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
oldvalue = apic_read(APIC_ESR);
value = ERROR_APIC_VECTOR; // enables sending errors
apic_write_around(APIC_LVTERR, value);
@@ -452,17 +434,6 @@ void __cpuinit setup_local_APIC (void)
apic_printk(APIC_VERBOSE,
"ESR value after enabling vector: %08x, after %08x\n",
oldvalue, value);
- } else {
- if (esr_disable)
- /*
- * Something untraceble is creating bad interrupts on
- * secondary quads ... for the moment, just leave the
- * ESR disabled - we can't do anything useful with the
- * errors anyway - mbligh
- */
- apic_printk(APIC_DEBUG, "Leaving ESR disabled.\n");
- else
- apic_printk(APIC_DEBUG, "No ESR for 82489DX.\n");
}
nmi_watchdog_default();
@@ -650,8 +621,7 @@ void __init init_apic_mappings(void)
* Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken).
*/
- if (boot_cpu_id == -1U)
- boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
+ boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
#ifdef CONFIG_X86_IO_APIC
{
@@ -693,8 +663,6 @@ static void __setup_APIC_LVTT(unsigned int clocks)
ver = GET_APIC_VERSION(apic_read(APIC_LVR));
lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
- if (!APIC_INTEGRATED(ver))
- lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
apic_write_around(APIC_LVTT, lvtt_value);
/*
@@ -1081,7 +1049,7 @@ int __init APIC_init_uniprocessor (void)
connect_bsp_APIC();
- phys_cpu_present_map = physid_mask_of_physid(0);
+ phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
apic_write_around(APIC_ID, boot_cpu_id);
setup_local_APIC();
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c
index 35b4c3fcbb3..aaa6d383351 100644
--- a/arch/x86_64/kernel/asm-offsets.c
+++ b/arch/x86_64/kernel/asm-offsets.c
@@ -39,7 +39,6 @@ int main(void)
ENTRY(kernelstack);
ENTRY(oldrsp);
ENTRY(pcurrent);
- ENTRY(irqrsp);
ENTRY(irqcount);
ENTRY(cpunumber);
ENTRY(irqstackptr);
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c
index d7fa4248501..535e0446607 100644
--- a/arch/x86_64/kernel/crash.c
+++ b/arch/x86_64/kernel/crash.c
@@ -11,7 +11,6 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/smp.h>
-#include <linux/irq.h>
#include <linux/reboot.h>
#include <linux/kexec.h>
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index bb0ae18ec02..eb7929eea7b 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -131,7 +131,7 @@ void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned lon
if (ei->type != E820_RAM ||
ei->addr+ei->size <= start ||
- ei->addr > end)
+ ei->addr >= end)
continue;
addr = round_up(ei->addr, PAGE_SIZE);
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
index 9631c747c5e..9cd968dd0f5 100644
--- a/arch/x86_64/kernel/early_printk.c
+++ b/arch/x86_64/kernel/early_printk.c
@@ -5,6 +5,7 @@
#include <linux/tty.h>
#include <asm/io.h>
#include <asm/processor.h>
+#include <asm/fcntl.h>
/* Simple VGA output */
@@ -158,6 +159,47 @@ static struct console early_serial_console = {
.index = -1,
};
+/* Console interface to a host file on AMD's SimNow! */
+
+static int simnow_fd;
+
+enum {
+ MAGIC1 = 0xBACCD00A,
+ MAGIC2 = 0xCA110000,
+ XOPEN = 5,
+ XWRITE = 4,
+};
+
+static noinline long simnow(long cmd, long a, long b, long c)
+{
+ long ret;
+ asm volatile("cpuid" :
+ "=a" (ret) :
+ "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
+ return ret;
+}
+
+void __init simnow_init(char *str)
+{
+ char *fn = "klog";
+ if (*str == '=')
+ fn = ++str;
+ /* error ignored */
+ simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644);
+}
+
+static void simnow_write(struct console *con, const char *s, unsigned n)
+{
+ simnow(XWRITE, simnow_fd, (unsigned long)s, n);
+}
+
+static struct console simnow_console = {
+ .name = "simnow",
+ .write = simnow_write,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+};
+
/* Direct interface for emergencies */
struct console *early_console = &early_vga_console;
static int early_console_initialized = 0;
@@ -205,6 +247,10 @@ int __init setup_early_printk(char *opt)
max_xpos = SCREEN_INFO.orig_video_cols;
max_ypos = SCREEN_INFO.orig_video_lines;
early_console = &early_vga_console;
+ } else if (!strncmp(buf, "simnow", 6)) {
+ simnow_init(buf + 6);
+ early_console = &simnow_console;
+ keep_early = 1;
}
early_console_initialized = 1;
register_console(early_console);
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 3620508c8bd..7937971d185 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -79,16 +79,19 @@
xorl %eax, %eax
pushq %rax /* ss */
CFI_ADJUST_CFA_OFFSET 8
+ /*CFI_REL_OFFSET ss,0*/
pushq %rax /* rsp */
CFI_ADJUST_CFA_OFFSET 8
- CFI_OFFSET rip,0
+ CFI_REL_OFFSET rsp,0
pushq $(1<<9) /* eflags - interrupts on */
CFI_ADJUST_CFA_OFFSET 8
+ /*CFI_REL_OFFSET rflags,0*/
pushq $__KERNEL_CS /* cs */
CFI_ADJUST_CFA_OFFSET 8
+ /*CFI_REL_OFFSET cs,0*/
pushq \child_rip /* rip */
CFI_ADJUST_CFA_OFFSET 8
- CFI_OFFSET rip,0
+ CFI_REL_OFFSET rip,0
pushq %rax /* orig rax */
CFI_ADJUST_CFA_OFFSET 8
.endm
@@ -98,32 +101,39 @@
CFI_ADJUST_CFA_OFFSET -(6*8)
.endm
- .macro CFI_DEFAULT_STACK
- CFI_ADJUST_CFA_OFFSET (SS)
- CFI_OFFSET r15,R15-SS
- CFI_OFFSET r14,R14-SS
- CFI_OFFSET r13,R13-SS
- CFI_OFFSET r12,R12-SS
- CFI_OFFSET rbp,RBP-SS
- CFI_OFFSET rbx,RBX-SS
- CFI_OFFSET r11,R11-SS
- CFI_OFFSET r10,R10-SS
- CFI_OFFSET r9,R9-SS
- CFI_OFFSET r8,R8-SS
- CFI_OFFSET rax,RAX-SS
- CFI_OFFSET rcx,RCX-SS
- CFI_OFFSET rdx,RDX-SS
- CFI_OFFSET rsi,RSI-SS
- CFI_OFFSET rdi,RDI-SS
- CFI_OFFSET rsp,RSP-SS
- CFI_OFFSET rip,RIP-SS
+ .macro CFI_DEFAULT_STACK start=1
+ .if \start
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,SS+8
+ .else
+ CFI_DEF_CFA_OFFSET SS+8
+ .endif
+ CFI_REL_OFFSET r15,R15
+ CFI_REL_OFFSET r14,R14
+ CFI_REL_OFFSET r13,R13
+ CFI_REL_OFFSET r12,R12
+ CFI_REL_OFFSET rbp,RBP
+ CFI_REL_OFFSET rbx,RBX
+ CFI_REL_OFFSET r11,R11
+ CFI_REL_OFFSET r10,R10
+ CFI_REL_OFFSET r9,R9
+ CFI_REL_OFFSET r8,R8
+ CFI_REL_OFFSET rax,RAX
+ CFI_REL_OFFSET rcx,RCX
+ CFI_REL_OFFSET rdx,RDX
+ CFI_REL_OFFSET rsi,RSI
+ CFI_REL_OFFSET rdi,RDI
+ CFI_REL_OFFSET rip,RIP
+ /*CFI_REL_OFFSET cs,CS*/
+ /*CFI_REL_OFFSET rflags,EFLAGS*/
+ CFI_REL_OFFSET rsp,RSP
+ /*CFI_REL_OFFSET ss,SS*/
.endm
/*
* A newly forked process directly context switches into this.
*/
/* rdi: prev */
ENTRY(ret_from_fork)
- CFI_STARTPROC
CFI_DEFAULT_STACK
call schedule_tail
GET_THREAD_INFO(%rcx)
@@ -172,16 +182,21 @@ rff_trace:
*/
ENTRY(system_call)
- CFI_STARTPROC
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,0
+ CFI_REGISTER rip,rcx
+ /*CFI_REGISTER rflags,r11*/
swapgs
movq %rsp,%gs:pda_oldrsp
movq %gs:pda_kernelstack,%rsp
sti
SAVE_ARGS 8,1
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
- movq %rcx,RIP-ARGOFFSET(%rsp)
+ movq %rcx,RIP-ARGOFFSET(%rsp)
+ CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
+ CFI_REMEMBER_STATE
jnz tracesys
cmpq $__NR_syscall_max,%rax
ja badsys
@@ -201,9 +216,12 @@ sysret_check:
cli
movl threadinfo_flags(%rcx),%edx
andl %edi,%edx
+ CFI_REMEMBER_STATE
jnz sysret_careful
movq RIP-ARGOFFSET(%rsp),%rcx
+ CFI_REGISTER rip,rcx
RESTORE_ARGS 0,-ARG_SKIP,1
+ /*CFI_REGISTER rflags,r11*/
movq %gs:pda_oldrsp,%rsp
swapgs
sysretq
@@ -211,12 +229,15 @@ sysret_check:
/* Handle reschedules */
/* edx: work, edi: workmask */
sysret_careful:
+ CFI_RESTORE_STATE
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
sti
pushq %rdi
+ CFI_ADJUST_CFA_OFFSET 8
call schedule
popq %rdi
+ CFI_ADJUST_CFA_OFFSET -8
jmp sysret_check
/* Handle a signal */
@@ -234,8 +255,13 @@ sysret_signal:
1: movl $_TIF_NEED_RESCHED,%edi
jmp sysret_check
+badsys:
+ movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
+ jmp ret_from_sys_call
+
/* Do syscall tracing */
tracesys:
+ CFI_RESTORE_STATE
SAVE_REST
movq $-ENOSYS,RAX(%rsp)
FIXUP_TOP_OF_STACK %rdi
@@ -254,16 +280,29 @@ tracesys:
RESTORE_TOP_OF_STACK %rbx
RESTORE_REST
jmp ret_from_sys_call
+ CFI_ENDPROC
-badsys:
- movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
- jmp ret_from_sys_call
-
/*
* Syscall return path ending with IRET.
* Has correct top of stack, but partial stack frame.
*/
-ENTRY(int_ret_from_sys_call)
+ENTRY(int_ret_from_sys_call)
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,SS+8-ARGOFFSET
+ /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
+ CFI_REL_OFFSET rsp,RSP-ARGOFFSET
+ /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
+ /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
+ CFI_REL_OFFSET rip,RIP-ARGOFFSET
+ CFI_REL_OFFSET rdx,RDX-ARGOFFSET
+ CFI_REL_OFFSET rcx,RCX-ARGOFFSET
+ CFI_REL_OFFSET rax,RAX-ARGOFFSET
+ CFI_REL_OFFSET rdi,RDI-ARGOFFSET
+ CFI_REL_OFFSET rsi,RSI-ARGOFFSET
+ CFI_REL_OFFSET r8,R8-ARGOFFSET
+ CFI_REL_OFFSET r9,R9-ARGOFFSET
+ CFI_REL_OFFSET r10,R10-ARGOFFSET
+ CFI_REL_OFFSET r11,R11-ARGOFFSET
cli
testl $3,CS-ARGOFFSET(%rsp)
je retint_restore_args
@@ -284,8 +323,10 @@ int_careful:
jnc int_very_careful
sti
pushq %rdi
+ CFI_ADJUST_CFA_OFFSET 8
call schedule
popq %rdi
+ CFI_ADJUST_CFA_OFFSET -8
cli
jmp int_with_check
@@ -297,9 +338,11 @@ int_very_careful:
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
jz int_signal
pushq %rdi
+ CFI_ADJUST_CFA_OFFSET 8
leaq 8(%rsp),%rdi # &ptregs -> arg1
call syscall_trace_leave
popq %rdi
+ CFI_ADJUST_CFA_OFFSET -8
andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
cli
jmp int_restore_rest
@@ -329,6 +372,8 @@ int_restore_rest:
jmp ptregscall_common
.endm
+ CFI_STARTPROC
+
PTREGSCALL stub_clone, sys_clone, %r8
PTREGSCALL stub_fork, sys_fork, %rdi
PTREGSCALL stub_vfork, sys_vfork, %rdi
@@ -337,40 +382,49 @@ int_restore_rest:
PTREGSCALL stub_iopl, sys_iopl, %rsi
ENTRY(ptregscall_common)
- CFI_STARTPROC
popq %r11
- CFI_ADJUST_CFA_OFFSET -8
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_REGISTER rip, r11
SAVE_REST
movq %r11, %r15
+ CFI_REGISTER rip, r15
FIXUP_TOP_OF_STACK %r11
call *%rax
RESTORE_TOP_OF_STACK %r11
movq %r15, %r11
+ CFI_REGISTER rip, r11
RESTORE_REST
pushq %r11
- CFI_ADJUST_CFA_OFFSET 8
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rip, 0
ret
CFI_ENDPROC
ENTRY(stub_execve)
CFI_STARTPROC
popq %r11
- CFI_ADJUST_CFA_OFFSET -8
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_REGISTER rip, r11
SAVE_REST
movq %r11, %r15
+ CFI_REGISTER rip, r15
FIXUP_TOP_OF_STACK %r11
call sys_execve
GET_THREAD_INFO(%rcx)
bt $TIF_IA32,threadinfo_flags(%rcx)
+ CFI_REMEMBER_STATE
jc exec_32bit
RESTORE_TOP_OF_STACK %r11
movq %r15, %r11
+ CFI_REGISTER rip, r11
RESTORE_REST
- push %r11
+ pushq %r11
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rip, 0
ret
exec_32bit:
- CFI_ADJUST_CFA_OFFSET REST_SKIP
+ CFI_RESTORE_STATE
movq %rax,RAX(%rsp)
RESTORE_REST
jmp int_ret_from_sys_call
@@ -382,7 +436,8 @@ exec_32bit:
*/
ENTRY(stub_rt_sigreturn)
CFI_STARTPROC
- addq $8, %rsp
+ addq $8, %rsp
+ CFI_ADJUST_CFA_OFFSET -8
SAVE_REST
movq %rsp,%rdi
FIXUP_TOP_OF_STACK %r11
@@ -392,6 +447,25 @@ ENTRY(stub_rt_sigreturn)
jmp int_ret_from_sys_call
CFI_ENDPROC
+/*
+ * initial frame state for interrupts and exceptions
+ */
+ .macro _frame ref
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,SS+8-\ref
+ /*CFI_REL_OFFSET ss,SS-\ref*/
+ CFI_REL_OFFSET rsp,RSP-\ref
+ /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
+ /*CFI_REL_OFFSET cs,CS-\ref*/
+ CFI_REL_OFFSET rip,RIP-\ref
+ .endm
+
+/* initial frame state for interrupts (and exceptions without error code) */
+#define INTR_FRAME _frame RIP
+/* initial frame state for exceptions with error code (and interrupts with
+ vector already pushed) */
+#define XCPT_FRAME _frame ORIG_RAX
+
/*
* Interrupt entry/exit.
*
@@ -402,10 +476,6 @@ ENTRY(stub_rt_sigreturn)
/* 0(%rsp): interrupt number */
.macro interrupt func
- CFI_STARTPROC simple
- CFI_DEF_CFA rsp,(SS-RDI)
- CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
- CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
cld
#ifdef CONFIG_DEBUG_INFO
SAVE_ALL
@@ -425,23 +495,27 @@ ENTRY(stub_rt_sigreturn)
swapgs
1: incl %gs:pda_irqcount # RED-PEN should check preempt count
movq %gs:pda_irqstackptr,%rax
- cmoveq %rax,%rsp
+ cmoveq %rax,%rsp /*todo This needs CFI annotation! */
pushq %rdi # save old stack
+ CFI_ADJUST_CFA_OFFSET 8
call \func
.endm
ENTRY(common_interrupt)
+ XCPT_FRAME
interrupt do_IRQ
/* 0(%rsp): oldrsp-ARGOFFSET */
-ret_from_intr:
+ret_from_intr:
popq %rdi
+ CFI_ADJUST_CFA_OFFSET -8
cli
decl %gs:pda_irqcount
#ifdef CONFIG_DEBUG_INFO
movq RBP(%rdi),%rbp
+ CFI_DEF_CFA_REGISTER rsp
#endif
- leaq ARGOFFSET(%rdi),%rsp
-exit_intr:
+ leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
+exit_intr:
GET_THREAD_INFO(%rcx)
testl $3,CS-ARGOFFSET(%rsp)
je retint_kernel
@@ -453,9 +527,10 @@ exit_intr:
*/
retint_with_reschedule:
movl $_TIF_WORK_MASK,%edi
-retint_check:
+retint_check:
movl threadinfo_flags(%rcx),%edx
andl %edi,%edx
+ CFI_REMEMBER_STATE
jnz retint_careful
retint_swapgs:
swapgs
@@ -476,14 +551,17 @@ bad_iret:
jmp do_exit
.previous
- /* edi: workmask, edx: work */
+ /* edi: workmask, edx: work */
retint_careful:
+ CFI_RESTORE_STATE
bt $TIF_NEED_RESCHED,%edx
jnc retint_signal
sti
pushq %rdi
+ CFI_ADJUST_CFA_OFFSET 8
call schedule
popq %rdi
+ CFI_ADJUST_CFA_OFFSET -8
GET_THREAD_INFO(%rcx)
cli
jmp retint_check
@@ -523,7 +601,9 @@ retint_kernel:
* APIC interrupts.
*/
.macro apicinterrupt num,func
+ INTR_FRAME
pushq $\num-256
+ CFI_ADJUST_CFA_OFFSET 8
interrupt \func
jmp ret_from_intr
CFI_ENDPROC
@@ -536,8 +616,19 @@ ENTRY(thermal_interrupt)
ENTRY(reschedule_interrupt)
apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
-ENTRY(invalidate_interrupt)
- apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
+ .macro INVALIDATE_ENTRY num
+ENTRY(invalidate_interrupt\num)
+ apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
+ .endm
+
+ INVALIDATE_ENTRY 0
+ INVALIDATE_ENTRY 1
+ INVALIDATE_ENTRY 2
+ INVALIDATE_ENTRY 3
+ INVALIDATE_ENTRY 4
+ INVALIDATE_ENTRY 5
+ INVALIDATE_ENTRY 6
+ INVALIDATE_ENTRY 7
ENTRY(call_function_interrupt)
apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
@@ -558,16 +649,23 @@ ENTRY(spurious_interrupt)
* Exception entry points.
*/
.macro zeroentry sym
+ INTR_FRAME
pushq $0 /* push error code/oldrax */
+ CFI_ADJUST_CFA_OFFSET 8
pushq %rax /* push real oldrax to the rdi slot */
+ CFI_ADJUST_CFA_OFFSET 8
leaq \sym(%rip),%rax
jmp error_entry
+ CFI_ENDPROC
.endm
.macro errorentry sym
+ XCPT_FRAME
pushq %rax
+ CFI_ADJUST_CFA_OFFSET 8
leaq \sym(%rip),%rax
jmp error_entry
+ CFI_ENDPROC
.endm
/* error code is on the stack already */
@@ -594,10 +692,7 @@ ENTRY(spurious_interrupt)
* and the exception handler in %rax.
*/
ENTRY(error_entry)
- CFI_STARTPROC simple
- CFI_DEF_CFA rsp,(SS-RDI)
- CFI_REL_OFFSET rsp,(RSP-RDI)
- CFI_REL_OFFSET rip,(RIP-RDI)
+ _frame RDI
/* rdi slot contains rax, oldrax contains error code */
cld
subq $14*8,%rsp
@@ -679,7 +774,9 @@ error_kernelspace:
/* Reload gs selector with exception handling */
/* edi: new selector */
ENTRY(load_gs_index)
+ CFI_STARTPROC
pushf
+ CFI_ADJUST_CFA_OFFSET 8
cli
swapgs
gs_change:
@@ -687,7 +784,9 @@ gs_change:
2: mfence /* workaround */
swapgs
popf
+ CFI_ADJUST_CFA_OFFSET -8
ret
+ CFI_ENDPROC
.section __ex_table,"a"
.align 8
@@ -799,7 +898,7 @@ ENTRY(device_not_available)
/* runs on exception stack */
KPROBE_ENTRY(debug)
- CFI_STARTPROC
+ INTR_FRAME
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_debug
@@ -809,9 +908,9 @@ KPROBE_ENTRY(debug)
/* runs on exception stack */
ENTRY(nmi)
- CFI_STARTPROC
+ INTR_FRAME
pushq $-1
- CFI_ADJUST_CFA_OFFSET 8
+ CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_nmi
/*
* "Paranoid" exit path from exception stack.
@@ -877,7 +976,7 @@ ENTRY(reserved)
/* runs on exception stack */
ENTRY(double_fault)
- CFI_STARTPROC
+ XCPT_FRAME
paranoidentry do_double_fault
jmp paranoid_exit
CFI_ENDPROC
@@ -890,7 +989,7 @@ ENTRY(segment_not_present)
/* runs on exception stack */
ENTRY(stack_segment)
- CFI_STARTPROC
+ XCPT_FRAME
paranoidentry do_stack_segment
jmp paranoid_exit
CFI_ENDPROC
@@ -911,7 +1010,7 @@ ENTRY(spurious_interrupt_bug)
#ifdef CONFIG_X86_MCE
/* runs on exception stack */
ENTRY(machine_check)
- CFI_STARTPROC
+ INTR_FRAME
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_machine_check
@@ -923,14 +1022,19 @@ ENTRY(call_debug)
zeroentry do_call_debug
ENTRY(call_softirq)
+ CFI_STARTPROC
movq %gs:pda_irqstackptr,%rax
pushq %r15
+ CFI_ADJUST_CFA_OFFSET 8
movq %rsp,%r15
+ CFI_DEF_CFA_REGISTER r15
incl %gs:pda_irqcount
cmove %rax,%rsp
call __do_softirq
movq %r15,%rsp
+ CFI_DEF_CFA_REGISTER rsp
decl %gs:pda_irqcount
popq %r15
+ CFI_ADJUST_CFA_OFFSET -8
ret
-
+ CFI_ENDPROC
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index b1c144f7314..7a64ea18178 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -45,7 +45,7 @@ void __init clustered_apic_check(void)
u8 clusters, max_cluster;
u8 id;
u8 cluster_cnt[NUM_APIC_CLUSTERS];
- int num_cpus = 0;
+ int max_apic = 0;
#if defined(CONFIG_ACPI)
/*
@@ -64,14 +64,15 @@ void __init clustered_apic_check(void)
id = bios_cpu_apicid[i];
if (id == BAD_APICID)
continue;
- num_cpus++;
+ if (id > max_apic)
+ max_apic = id;
cluster_cnt[APIC_CLUSTERID(id)]++;
}
/* Don't use clustered mode on AMD platforms. */
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
genapic = &apic_physflat;
-#ifndef CONFIG_CPU_HOTPLUG
+#ifndef CONFIG_HOTPLUG_CPU
/* In the CPU hotplug case we cannot use broadcast mode
because that opens a race when a CPU is removed.
Stay at physflat mode in this case.
@@ -79,7 +80,7 @@ void __init clustered_apic_check(void)
we have ACPI platform support for CPU hotplug
we should detect hotplug capablity from ACPI tables and
only do this when really needed. -AK */
- if (num_cpus <= 8)
+ if (max_apic <= 8)
genapic = &apic_flat;
#endif
goto print;
@@ -103,9 +104,14 @@ void __init clustered_apic_check(void)
* (We don't use lowest priority delivery + HW APIC IRQ steering, so
* can ignore the clustered logical case and go straight to physical.)
*/
- if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster)
+ if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) {
+#ifdef CONFIG_HOTPLUG_CPU
+ /* Don't use APIC shortcuts in CPU hotplug to avoid races */
+ genapic = &apic_physflat;
+#else
genapic = &apic_flat;
- else
+#endif
+ } else
genapic = &apic_cluster;
print:
diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c
index f6523dd1bc0..a472d62f899 100644
--- a/arch/x86_64/kernel/genapic_cluster.c
+++ b/arch/x86_64/kernel/genapic_cluster.c
@@ -51,10 +51,10 @@ static void cluster_init_apic_ldr(void)
count = 3;
id = my_cluster | (1UL << count);
x86_cpu_to_log_apicid[smp_processor_id()] = id;
- apic_write_around(APIC_DFR, APIC_DFR_CLUSTER);
+ apic_write(APIC_DFR, APIC_DFR_CLUSTER);
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
val |= SET_APIC_LOGICAL_ID(id);
- apic_write_around(APIC_LDR, val);
+ apic_write(APIC_LDR, val);
}
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 6d57da96bf8..9da3edb799e 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -38,10 +38,10 @@ static void flat_init_apic_ldr(void)
num = smp_processor_id();
id = 1UL << num;
x86_cpu_to_log_apicid[num] = id;
- apic_write_around(APIC_DFR, APIC_DFR_FLAT);
+ apic_write(APIC_DFR, APIC_DFR_FLAT);
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
val |= SET_APIC_LOGICAL_ID(id);
- apic_write_around(APIC_LDR, val);
+ apic_write(APIC_LDR, val);
}
static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
@@ -62,7 +62,7 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
* prepare target chip field
*/
cfg = __prepare_ICR2(mask);
- apic_write_around(APIC_ICR2, cfg);
+ apic_write(APIC_ICR2, cfg);
/*
* program the ICR
@@ -72,7 +72,7 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
- apic_write_around(APIC_ICR, cfg);
+ apic_write(APIC_ICR, cfg);
local_irq_restore(flags);
}
@@ -177,9 +177,9 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
struct genapic apic_physflat = {
.name = "physical flat",
- .int_delivery_mode = dest_LowestPrio,
+ .int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
- .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_LOWEST,
+ .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_FIXED,
.target_cpus = physflat_target_cpus,
.apic_id_registered = flat_apic_id_registered,
.init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 98ff5eb32b9..4592bf21fca 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -270,7 +270,7 @@ ENTRY(level3_kernel_pgt)
.org 0x4000
ENTRY(level2_ident_pgt)
/* 40MB for bootup. */
- .quad 0x0000000000000283
+ .quad 0x0000000000000183
.quad 0x0000000000200183
.quad 0x0000000000400183
.quad 0x0000000000600183
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index a8916909512..b2a238b5a17 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -18,15 +18,12 @@
#include <asm/atomic.h>
#include <asm/system.h>
#include <asm/io.h>
-#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/pgtable.h>
#include <asm/delay.h>
#include <asm/desc.h>
#include <asm/apic.h>
-#include <linux/irq.h>
-
/*
* Common place to define all x86 IRQ vectors
*
@@ -486,7 +483,14 @@ void spurious_interrupt(void);
void error_interrupt(void);
void reschedule_interrupt(void);
void call_function_interrupt(void);
-void invalidate_interrupt(void);
+void invalidate_interrupt0(void);
+void invalidate_interrupt1(void);
+void invalidate_interrupt2(void);
+void invalidate_interrupt3(void);
+void invalidate_interrupt4(void);
+void invalidate_interrupt5(void);
+void invalidate_interrupt6(void);
+void invalidate_interrupt7(void);
void thermal_interrupt(void);
void i8254_timer_resume(void);
@@ -562,8 +566,15 @@ void __init init_IRQ(void)
*/
set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
- /* IPI for invalidation */
- set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+ /* IPIs for invalidation */
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
+ set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
/* IPI for generic function call */
set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
diff --git a/arch/x86_64/kernel/init_task.c b/arch/x86_64/kernel/init_task.c
index c4dc9149115..e0ba5c1043f 100644
--- a/arch/x86_64/kernel/init_task.c
+++ b/arch/x86_64/kernel/init_task.c
@@ -44,6 +44,6 @@ EXPORT_SYMBOL(init_task);
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp;
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS;
#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 5f1529be123..522944a000a 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -21,7 +21,6 @@
*/
#include <linux/mm.h>
-#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/delay.h>
@@ -45,6 +44,8 @@ int sis_apic_bug; /* not actually supported, dummy for compile */
static int no_timer_check;
+int disable_timer_pin_1 __initdata;
+
static DEFINE_SPINLOCK(ioapic_lock);
/*
@@ -298,6 +299,15 @@ void __init check_ioapic(void)
#endif
/* RED-PEN skip them on mptables too? */
return;
+ case PCI_VENDOR_ID_ATI:
+ /* All timer interrupts on atiixp
+ are doubled. Disable one. */
+ if (disable_timer_pin_1 == 0) {
+ disable_timer_pin_1 = 1;
+ printk(KERN_INFO
+ "ATI board detected. Disabling timer pin 1.\n");
+ }
+ return;
}
/* No multi-function device? */
@@ -1022,13 +1032,11 @@ void __apicdebuginit print_local_APIC(void * dummy)
v = apic_read(APIC_TASKPRI);
printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
- v = apic_read(APIC_ARBPRI);
- printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
- v & APIC_ARBPRI_MASK);
- v = apic_read(APIC_PROCPRI);
- printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
- }
+ v = apic_read(APIC_ARBPRI);
+ printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+ v & APIC_ARBPRI_MASK);
+ v = apic_read(APIC_PROCPRI);
+ printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
v = apic_read(APIC_EOI);
printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
@@ -1048,12 +1056,8 @@ void __apicdebuginit print_local_APIC(void * dummy)
printk(KERN_DEBUG "... APIC IRR field:\n");
print_APIC_bitfield(APIC_IRR);
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
- v = apic_read(APIC_ESR);
- printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
- }
+ v = apic_read(APIC_ESR);
+ printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
v = apic_read(APIC_ICR);
printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
@@ -1665,6 +1669,8 @@ static inline void check_timer(void)
setup_nmi();
enable_8259A_irq(0);
}
+ if (disable_timer_pin_1 > 0)
+ clear_IO_APIC_pin(0, pin1);
return;
}
clear_IO_APIC_pin(0, pin1);
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index 849a20aec7c..d6a04a8320a 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -99,7 +99,6 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
unsigned irq = regs->orig_rax & 0xff;
irq_enter();
- BUG_ON(irq > 256);
__do_IRQ(irq, regs);
irq_exit();
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 8aa56736cde..969365c0771 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -17,6 +17,7 @@
#include <linux/fs.h>
#include <linux/cpu.h>
#include <linux/percpu.h>
+#include <linux/ctype.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/mce.h>
@@ -56,15 +57,19 @@ void mce_log(struct mce *mce)
smp_wmb();
for (;;) {
entry = rcu_dereference(mcelog.next);
- /* When the buffer fills up discard new entries. Assume
- that the earlier errors are the more interesting. */
- if (entry >= MCE_LOG_LEN) {
- set_bit(MCE_OVERFLOW, &mcelog.flags);
- return;
+ for (;;) {
+ /* When the buffer fills up discard new entries. Assume
+ that the earlier errors are the more interesting. */
+ if (entry >= MCE_LOG_LEN) {
+ set_bit(MCE_OVERFLOW, &mcelog.flags);
+ return;
+ }
+ /* Old left over entry. Skip. */
+ if (mcelog.entry[entry].finished) {
+ entry++;
+ continue;
+ }
}
- /* Old left over entry. Skip. */
- if (mcelog.entry[entry].finished)
- continue;
smp_rmb();
next = entry + 1;
if (cmpxchg(&mcelog.next, entry, next) == entry)
@@ -404,9 +409,15 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff
}
err = 0;
- for (i = 0; i < next; i++) {
- if (!mcelog.entry[i].finished)
- continue;
+ for (i = 0; i < next; i++) {
+ unsigned long start = jiffies;
+ while (!mcelog.entry[i].finished) {
+ if (!time_before(jiffies, start + 2)) {
+ memset(mcelog.entry + i,0, sizeof(struct mce));
+ continue;
+ }
+ cpu_relax();
+ }
smp_rmb();
err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
buf += sizeof(struct mce);
@@ -479,6 +490,7 @@ static int __init mcheck_disable(char *str)
/* mce=off disables machine check. Note you can reenable it later
using sysfs.
+ mce=TOLERANCELEVEL (number, see above)
mce=bootlog Log MCEs from before booting. Disabled by default to work
around buggy BIOS that leave bogus MCEs. */
static int __init mcheck_enable(char *str)
@@ -489,6 +501,8 @@ static int __init mcheck_enable(char *str)
mce_dont_init = 1;
else if (!strcmp(str, "bootlog"))
mce_bootlog = 1;
+ else if (isdigit(str[0]))
+ get_option(&str, &tolerant);
else
printk("mce= argument %s ignored. Please use /sys", str);
return 0;
@@ -501,10 +515,12 @@ __setup("mce", mcheck_enable);
* Sysfs support
*/
-/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. */
+/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
+ Only one CPU is active at this time, the others get readded later using
+ CPU hotplug. */
static int mce_resume(struct sys_device *dev)
{
- on_each_cpu(mce_init, NULL, 1, 1);
+ mce_init(NULL);
return 0;
}
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 8d8ed6ae1d0..f16d38d09da 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -14,7 +14,6 @@
*/
#include <linux/mm.h>
-#include <linux/irq.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/config.h>
@@ -46,8 +45,6 @@ int acpi_found_madt;
int apic_version [MAX_APICS];
unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
-unsigned char pci_bus_to_node [256];
-EXPORT_SYMBOL(pci_bus_to_node);
static int mp_current_pci_id = 0;
/* I/O APIC entries */
@@ -705,7 +702,7 @@ void __init mp_register_lapic (
processor.mpc_type = MP_PROCESSOR;
processor.mpc_apicid = id;
- processor.mpc_apicver = 0x10; /* TBD: lapic version */
+ processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
diff --git a/arch/x86_64/kernel/msr.c b/arch/x86_64/kernel/msr.c
deleted file mode 100644
index 598953ab015..00000000000
--- a/arch/x86_64/kernel/msr.c
+++ /dev/null
@@ -1,279 +0,0 @@
-/* ----------------------------------------------------------------------- *
- *
- * Copyright 2000 H. Peter Anvin - All Rights Reserved
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
- * USA; either version 2 of the License, or (at your option) any later
- * version; incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-/*
- * msr.c
- *
- * x86 MSR access device
- *
- * This device is accessed by lseek() to the appropriate register number
- * and then read/write in chunks of 8 bytes. A larger size means multiple
- * reads or writes of the same register.
- *
- * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on
- * an SMP box will direct the access to CPU %d.
- */
-
-#include <linux/module.h>
-#include <linux/config.h>
-
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/major.h>
-#include <linux/fs.h>
-
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-/* Note: "err" is handled in a funny way below. Otherwise one version
- of gcc or another breaks. */
-
-static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx)
-{
- int err;
-
- asm volatile ("1: wrmsr\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl %4,%0\n"
- " jmp 2b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n" " .quad 1b,3b\n" ".previous":"=&bDS" (err)
- :"a"(eax), "d"(edx), "c"(reg), "i"(-EIO), "0"(0));
-
- return err;
-}
-
-static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx)
-{
- int err;
-
- asm volatile ("1: rdmsr\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl %4,%0\n"
- " jmp 2b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
- " .quad 1b,3b\n"
- ".previous":"=&bDS" (err), "=a"(*eax), "=d"(*edx)
- :"c"(reg), "i"(-EIO), "0"(0));
-
- return err;
-}
-
-#ifdef CONFIG_SMP
-
-struct msr_command {
- int cpu;
- int err;
- u32 reg;
- u32 data[2];
-};
-
-static void msr_smp_wrmsr(void *cmd_block)
-{
- struct msr_command *cmd = (struct msr_command *)cmd_block;
-
- if (cmd->cpu == smp_processor_id())
- cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]);
-}
-
-static void msr_smp_rdmsr(void *cmd_block)
-{
- struct msr_command *cmd = (struct msr_command *)cmd_block;
-
- if (cmd->cpu == smp_processor_id())
- cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]);
-}
-
-static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
-{
- struct msr_command cmd;
- int ret;
-
- preempt_disable();
- if (cpu == smp_processor_id()) {
- ret = wrmsr_eio(reg, eax, edx);
- } else {
- cmd.cpu = cpu;
- cmd.reg = reg;
- cmd.data[0] = eax;
- cmd.data[1] = edx;
-
- smp_call_function(msr_smp_wrmsr, &cmd, 1, 1);
- ret = cmd.err;
- }
- preempt_enable();
- return ret;
-}
-
-static inline int do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx)
-{
- struct msr_command cmd;
- int ret;
-
- preempt_disable();
- if (cpu == smp_processor_id()) {
- ret = rdmsr_eio(reg, eax, edx);
- } else {
- cmd.cpu = cpu;
- cmd.reg = reg;
-
- smp_call_function(msr_smp_rdmsr, &cmd, 1, 1);
-
- *eax = cmd.data[0];
- *edx = cmd.data[1];
-
- ret = cmd.err;
- }
- preempt_enable();
- return ret;
-}
-
-#else /* ! CONFIG_SMP */
-
-static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
-{
- return wrmsr_eio(reg, eax, edx);
-}
-
-static inline int do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx)
-{
- return rdmsr_eio(reg, eax, edx);
-}
-
-#endif /* ! CONFIG_SMP */
-
-static loff_t msr_seek(struct file *file, loff_t offset, int orig)
-{
- loff_t ret = -EINVAL;
-
- lock_kernel();
- switch (orig) {
- case 0:
- file->f_pos = offset;
- ret = file->f_pos;
- break;
- case 1:
- file->f_pos += offset;
- ret = file->f_pos;
- }
- unlock_kernel();
- return ret;
-}
-
-static ssize_t msr_read(struct file *file, char __user * buf,
- size_t count, loff_t * ppos)
-{
- u32 __user *tmp = (u32 __user *) buf;
- u32 data[2];
- size_t rv;
- u32 reg = *ppos;
- int cpu = iminor(file->f_dentry->d_inode);
- int err;
-
- if (count % 8)
- return -EINVAL; /* Invalid chunk size */
-
- for (rv = 0; count; count -= 8) {
- err = do_rdmsr(cpu, reg, &data[0], &data[1]);
- if (err)
- return err;
- if (copy_to_user(tmp, &data, 8))
- return -EFAULT;
- tmp += 2;
- }
-
- return ((char __user *)tmp) - buf;
-}
-
-static ssize_t msr_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- const u32 __user *tmp = (const u32 __user *)buf;
- u32 data[2];
- size_t rv;
- u32 reg = *ppos;
- int cpu = iminor(file->f_dentry->d_inode);
- int err;
-
- if (count % 8)
- return -EINVAL; /* Invalid chunk size */
-
- for (rv = 0; count; count -= 8) {
- if (copy_from_user(&data, tmp, 8))
- return -EFAULT;
- err = do_wrmsr(cpu, reg, data[0], data[1]);
- if (err)
- return err;
- tmp += 2;
- }
-
- return ((char __user *)tmp) - buf;
-}
-
-static int msr_open(struct inode *inode, struct file *file)
-{
- unsigned int cpu = iminor(file->f_dentry->d_inode);
- struct cpuinfo_x86 *c = &(cpu_data)[cpu];
-
- if (cpu >= NR_CPUS || !cpu_online(cpu))
- return -ENXIO; /* No such CPU */
- if (!cpu_has(c, X86_FEATURE_MSR))
- return -EIO; /* MSR not supported */
-
- return 0;
-}
-
-/*
- * File operations we support
- */
-static struct file_operations msr_fops = {
- .owner = THIS_MODULE,
- .llseek = msr_seek,
- .read = msr_read,
- .write = msr_write,
- .open = msr_open,
-};
-
-static int __init msr_init(void)
-{
- if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) {
- printk(KERN_ERR "msr: unable to get major %d for msr\n",
- MSR_MAJOR);
- return -EBUSY;
- }
-
- return 0;
-}
-
-static void __exit msr_exit(void)
-{
- unregister_chrdev(MSR_MAJOR, "cpu/msr");
-}
-
-module_init(msr_init);
-module_exit(msr_exit)
-
-MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>");
-MODULE_DESCRIPTION("x86 generic MSR driver");
-MODULE_LICENSE("GPL");
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index caf164959e1..4388b8a5bae 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -14,7 +14,6 @@
#include <linux/config.h>
#include <linux/mm.h>
-#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/bootmem.h>
#include <linux/smp_lock.h>
@@ -488,8 +487,8 @@ void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
== NOTIFY_STOP) {
local_set(&__get_cpu_var(alert_counter), 0);
return;
- }
- die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs);
+ }
+ die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs);
}
} else {
__get_cpu_var(last_irq_sum) = sum;
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 57f35c68aa3..cf0a0315d58 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -191,11 +191,9 @@ static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order)
{
struct page *page;
int node;
- if (dev->bus == &pci_bus_type) {
- cpumask_t mask;
- mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
- node = cpu_to_node(first_cpu(mask));
- } else
+ if (dev->bus == &pci_bus_type)
+ node = pcibus_to_node(to_pci_dev(dev)->bus);
+ else
node = numa_node_id();
page = alloc_pages_node(node, gfp, order);
return page ? page_address(page) : NULL;
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 8661f82ac70..b5a89c0bdf5 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -32,7 +32,6 @@
#include <linux/a.out.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-#include <linux/irq.h>
#include <linux/ptrace.h>
#include <linux/utsname.h>
#include <linux/random.h>
@@ -123,6 +122,7 @@ static void poll_idle (void)
: :
"i" (_TIF_NEED_RESCHED),
"m" (current_thread_info()->flags));
+ clear_thread_flag(TIF_POLLING_NRFLAG);
} else {
set_need_resched();
}
@@ -271,8 +271,11 @@ void __show_regs(struct pt_regs * regs)
printk("\n");
print_modules();
- printk("Pid: %d, comm: %.20s %s %s\n",
- current->pid, current->comm, print_tainted(), system_utsname.release);
+ printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+ current->pid, current->comm, print_tainted(),
+ system_utsname.release,
+ (int)strcspn(system_utsname.version, " "),
+ system_utsname.version);
printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
printk_address(regs->rip);
printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
@@ -483,33 +486,6 @@ out:
}
/*
- * This function selects if the context switch from prev to next
- * has to tweak the TSC disable bit in the cr4.
- */
-static inline void disable_tsc(struct task_struct *prev_p,
- struct task_struct *next_p)
-{
- struct thread_info *prev, *next;
-
- /*
- * gcc should eliminate the ->thread_info dereference if
- * has_secure_computing returns 0 at compile time (SECCOMP=n).
- */
- prev = prev_p->thread_info;
- next = next_p->thread_info;
-
- if (has_secure_computing(prev) || has_secure_computing(next)) {
- /* slow path here */
- if (has_secure_computing(prev) &&
- !has_secure_computing(next)) {
- write_cr4(read_cr4() & ~X86_CR4_TSD);
- } else if (!has_secure_computing(prev) &&
- has_secure_computing(next))
- write_cr4(read_cr4() | X86_CR4_TSD);
- }
-}
-
-/*
* This special macro can be used to load a debugging register
*/
#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r)
@@ -627,8 +603,6 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *
}
}
- disable_tsc(prev_p, next_p);
-
return prev_p;
}
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 9aec524be3e..351d8d64c2f 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -336,6 +336,11 @@ static __init void parse_cmdline_early (char ** cmdline_p)
#endif
#endif
+ if (!memcmp(from, "disable_timer_pin_1", 19))
+ disable_timer_pin_1 = 1;
+ if (!memcmp(from, "enable_timer_pin_1", 18))
+ disable_timer_pin_1 = -1;
+
if (!memcmp(from, "nolapic", 7) ||
!memcmp(from, "disableapic", 11))
disable_apic = 1;
@@ -755,6 +760,24 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
}
}
+#ifdef CONFIG_NUMA
+static int nearby_node(int apicid)
+{
+ int i;
+ for (i = apicid - 1; i >= 0; i--) {
+ int node = apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
+ int node = apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ return first_node(node_online_map); /* Shouldn't happen */
+}
+#endif
+
/*
* On a AMD dual core setup the lower bits of the APIC id distingush the cores.
* Assumes number of cores is a power of two.
@@ -763,8 +786,11 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
int cpu = smp_processor_id();
- int node = 0;
unsigned bits;
+#ifdef CONFIG_NUMA
+ int node = 0;
+ unsigned apicid = phys_proc_id[cpu];
+#endif
bits = 0;
while ((1 << bits) < c->x86_num_cores)
@@ -776,20 +802,32 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
phys_proc_id[cpu] >>= bits;
#ifdef CONFIG_NUMA
- /* When an ACPI SRAT table is available use the mappings from SRAT
- instead. */
- if (acpi_numa <= 0) {
- node = phys_proc_id[cpu];
- if (!node_online(node))
- node = first_node(node_online_map);
- cpu_to_node[cpu] = node;
- } else {
- node = cpu_to_node[cpu];
- }
+ node = phys_proc_id[cpu];
+ if (apicid_to_node[apicid] != NUMA_NO_NODE)
+ node = apicid_to_node[apicid];
+ if (!node_online(node)) {
+ /* Two possibilities here:
+ - The CPU is missing memory and no node was created.
+ In that case try picking one from a nearby CPU
+ - The APIC IDs differ from the HyperTransport node IDs
+ which the K8 northbridge parsing fills in.
+ Assume they are all increased by a constant offset,
+ but in the same order as the HT nodeids.
+ If that doesn't result in a usable node fall back to the
+ path for the previous case. */
+ int ht_nodeid = apicid - (phys_proc_id[0] << bits);
+ if (ht_nodeid >= 0 &&
+ apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+ node = apicid_to_node[ht_nodeid];
+ /* Pick a nearby node */
+ if (!node_online(node))
+ node = nearby_node(apicid);
+ }
+ cpu_to_node[cpu] = node;
+
+ printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
+ cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
#endif
-
- printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
- cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
#endif
}
@@ -909,6 +947,25 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
return 1;
}
+static void srat_detect_node(void)
+{
+#ifdef CONFIG_NUMA
+ unsigned apicid, node;
+ int cpu = smp_processor_id();
+
+ /* Don't do the funky fallback heuristics the AMD version employs
+ for now. */
+ apicid = phys_proc_id[cpu];
+ node = apicid_to_node[apicid];
+ if (node == NUMA_NO_NODE)
+ node = 0;
+ cpu_to_node[cpu] = node;
+
+ if (acpi_numa > 0)
+ printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
+#endif
+}
+
static void __cpuinit init_intel(struct cpuinfo_x86 *c)
{
/* Cache sizes */
@@ -927,6 +984,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
if (c->x86 >= 15)
set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
c->x86_num_cores = intel_num_cpu_cores(c);
+
+ srat_detect_node();
}
static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index e3ffcacc8c9..bd33be24a38 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -94,7 +94,7 @@ void __init setup_per_cpu_areas(void)
size = PERCPU_ENOUGH_ROOM;
#endif
- for (i = 0; i < NR_CPUS; i++) {
+ for_each_cpu_mask (i, cpu_possible_map) {
char *ptr;
if (!NODE_DATA(cpu_to_node(i))) {
@@ -119,7 +119,6 @@ void pda_init(int cpu)
asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
- pda->me = pda;
pda->cpunumber = cpu;
pda->irqcount = -1;
pda->kernelstack =
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index e5958220d6b..9db9dda161b 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -12,7 +12,6 @@
#include <linux/init.h>
#include <linux/mm.h>
-#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/spinlock.h>
#include <linux/smp_lock.h>
@@ -29,6 +28,8 @@
#include <asm/proto.h>
#include <asm/apicdef.h>
+#define __cpuinit __init
+
/*
* Smarter SMP flushing macros.
* c/o Linus Torvalds.
@@ -37,19 +38,41 @@
* writing to user space from interrupts. (Its not allowed anyway).
*
* Optimizations Manfred Spraul <manfred@colorfullife.com>
+ *
+ * More scalable flush, from Andi Kleen
+ *
+ * To avoid global state use 8 different call vectors.
+ * Each CPU uses a specific vector to trigger flushes on other
+ * CPUs. Depending on the received vector the target CPUs look into
+ * the right per cpu variable for the flush data.
+ *
+ * With more than 8 CPUs they are hashed to the 8 available
+ * vectors. The limited global vector space forces us to this right now.
+ * In future when interrupts are split into per CPU domains this could be
+ * fixed, at the cost of triggering multiple IPIs in some cases.
*/
-static cpumask_t flush_cpumask;
-static struct mm_struct * flush_mm;
-static unsigned long flush_va;
-static DEFINE_SPINLOCK(tlbstate_lock);
+union smp_flush_state {
+ struct {
+ cpumask_t flush_cpumask;
+ struct mm_struct *flush_mm;
+ unsigned long flush_va;
#define FLUSH_ALL -1ULL
+ spinlock_t tlbstate_lock;
+ };
+ char pad[SMP_CACHE_BYTES];
+} ____cacheline_aligned;
+
+/* State is put into the per CPU data section, but padded
+ to a full cache line because other CPUs can access it and we don't
+ want false sharing in the per cpu data segment. */
+static DEFINE_PER_CPU(union smp_flush_state, flush_state);
/*
* We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
*/
-static inline void leave_mm (unsigned long cpu)
+static inline void leave_mm(int cpu)
{
if (read_pda(mmu_state) == TLBSTATE_OK)
BUG();
@@ -101,15 +124,25 @@ static inline void leave_mm (unsigned long cpu)
*
* 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
* 2) Leave the mm if we are in the lazy tlb mode.
+ *
+ * Interrupts are disabled.
*/
-asmlinkage void smp_invalidate_interrupt (void)
+asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
{
- unsigned long cpu;
+ int cpu;
+ int sender;
+ union smp_flush_state *f;
- cpu = get_cpu();
+ cpu = smp_processor_id();
+ /*
+ * orig_rax contains the interrupt vector - 256.
+ * Use that to determine where the sender put the data.
+ */
+ sender = regs->orig_rax + 256 - INVALIDATE_TLB_VECTOR_START;
+ f = &per_cpu(flush_state, sender);
- if (!cpu_isset(cpu, flush_cpumask))
+ if (!cpu_isset(cpu, f->flush_cpumask))
goto out;
/*
* This was a BUG() but until someone can quote me the
@@ -120,64 +153,63 @@ asmlinkage void smp_invalidate_interrupt (void)
* BUG();
*/
- if (flush_mm == read_pda(active_mm)) {
+ if (f->flush_mm == read_pda(active_mm)) {
if (read_pda(mmu_state) == TLBSTATE_OK) {
- if (flush_va == FLUSH_ALL)
+ if (f->flush_va == FLUSH_ALL)
local_flush_tlb();
else
- __flush_tlb_one(flush_va);
+ __flush_tlb_one(f->flush_va);
} else
leave_mm(cpu);
}
out:
ack_APIC_irq();
- cpu_clear(cpu, flush_cpumask);
- put_cpu_no_resched();
+ cpu_clear(cpu, f->flush_cpumask);
}
static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
unsigned long va)
{
- cpumask_t tmp;
- /*
- * A couple of (to be removed) sanity checks:
- *
- * - we do not send IPIs to not-yet booted CPUs.
- * - current CPU must not be in mask
- * - mask must exist :)
- */
- BUG_ON(cpus_empty(cpumask));
- cpus_and(tmp, cpumask, cpu_online_map);
- BUG_ON(!cpus_equal(tmp, cpumask));
- BUG_ON(cpu_isset(smp_processor_id(), cpumask));
- if (!mm)
- BUG();
+ int sender;
+ union smp_flush_state *f;
- /*
- * I'm not happy about this global shared spinlock in the
- * MM hot path, but we'll see how contended it is.
- * Temporarily this turns IRQs off, so that lockups are
- * detected by the NMI watchdog.
- */
- spin_lock(&tlbstate_lock);
-
- flush_mm = mm;
- flush_va = va;
- cpus_or(flush_cpumask, cpumask, flush_cpumask);
+ /* Caller has disabled preemption */
+ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
+ f = &per_cpu(flush_state, sender);
+
+ /* Could avoid this lock when
+ num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
+ probably not worth checking this for a cache-hot lock. */
+ spin_lock(&f->tlbstate_lock);
+
+ f->flush_mm = mm;
+ f->flush_va = va;
+ cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
/*
* We have to send the IPI only to
* CPUs affected.
*/
- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+ send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+
+ while (!cpus_empty(f->flush_cpumask))
+ cpu_relax();
- while (!cpus_empty(flush_cpumask))
- mb(); /* nothing. lockup detection does not belong here */;
+ f->flush_mm = NULL;
+ f->flush_va = 0;
+ spin_unlock(&f->tlbstate_lock);
+}
- flush_mm = NULL;
- flush_va = 0;
- spin_unlock(&tlbstate_lock);
+int __cpuinit init_smp_flush(void)
+{
+ int i;
+ for_each_cpu_mask(i, cpu_possible_map) {
+ spin_lock_init(&per_cpu(flush_state.tlbstate_lock, i));
+ }
+ return 0;
}
+
+core_initcall(init_smp_flush);
void flush_tlb_current_task(void)
{
@@ -295,8 +327,11 @@ void unlock_ipi_call_lock(void)
/*
* this function sends a 'generic call function' IPI to one other CPU
* in the system.
+ *
+ * cpu is a standard Linux logical CPU number.
*/
-static void __smp_call_function_single (int cpu, void (*func) (void *info), void *info,
+static void
+__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
int nonatomic, int wait)
{
struct call_data_struct data;
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 4fb34b5cb1f..e12d7baeb33 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -44,7 +44,6 @@
#include <linux/mm.h>
#include <linux/kernel_stat.h>
#include <linux/smp_lock.h>
-#include <linux/irq.h>
#include <linux/bootmem.h>
#include <linux/thread_info.h>
#include <linux/module.h>
@@ -58,6 +57,8 @@
#include <asm/tlbflush.h>
#include <asm/proto.h>
#include <asm/nmi.h>
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
@@ -413,8 +414,13 @@ void __cpuinit smp_callin(void)
/*
* Get our bogomips.
+ *
+ * Need to enable IRQs because it can take longer and then
+ * the NMI watchdog might kill us.
*/
+ local_irq_enable();
calibrate_delay();
+ local_irq_disable();
Dprintk("Stack at about %p\n",&cpuid);
disable_APIC_timer();
@@ -540,8 +546,8 @@ static void inquire_remote_apic(int apicid)
*/
apic_wait_icr_idle();
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
timeout = 0;
do {
@@ -574,12 +580,12 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
/*
* Turn INIT on target chip
*/
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/*
* Send IPI
*/
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+ apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
| APIC_DM_INIT);
Dprintk("Waiting for send to finish...\n");
@@ -595,10 +601,10 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
Dprintk("Deasserting INIT.\n");
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Send IPI */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+ apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
Dprintk("Waiting for send to finish...\n");
timeout = 0;
@@ -610,16 +616,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
atomic_set(&init_deasserted, 1);
- /*
- * Should we send STARTUP IPIs ?
- *
- * Determine this based on the APIC version.
- * If we don't have an integrated APIC, don't send the STARTUP IPIs.
- */
- if (APIC_INTEGRATED(apic_version[phys_apicid]))
- num_starts = 2;
- else
- num_starts = 0;
+ num_starts = 2;
/*
* Run STARTUP IPI loop.
@@ -640,12 +637,11 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
*/
/* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
/* Boot on the stack */
/* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_STARTUP
- | (start_rip >> 12));
+ apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12));
/*
* Give the other CPU some time to accept the IPI.
@@ -864,17 +860,6 @@ static __cpuinit void smp_cleanup_boot(void)
* Reset trampoline flag
*/
*((volatile int *) phys_to_virt(0x467)) = 0;
-
-#ifndef CONFIG_HOTPLUG_CPU
- /*
- * Free pages reserved for SMP bootup.
- * When you add hotplug CPU support later remove this
- * Note there is more work to be done for later CPU bootup.
- */
-
- free_page((unsigned long) __va(PAGE_SIZE));
- free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
-#endif
}
/*
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c
index 0612640d91b..ebb9abf3ce6 100644
--- a/arch/x86_64/kernel/suspend.c
+++ b/arch/x86_64/kernel/suspend.c
@@ -8,23 +8,8 @@
*/
#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/poll.h>
-#include <linux/delay.h>
-#include <linux/sysrq.h>
-#include <linux/proc_fs.h>
-#include <linux/irq.h>
-#include <linux/pm.h>
-#include <linux/device.h>
+#include <linux/smp.h>
#include <linux/suspend.h>
-#include <asm/uaccess.h>
-#include <asm/acpi.h>
-#include <asm/tlbflush.h>
-#include <asm/io.h>
#include <asm/proto.h>
struct saved_context saved_context;
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 7b6abe05825..2373cb8b862 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -18,7 +18,6 @@
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/mc146818rtc.h>
-#include <linux/irq.h>
#include <linux/time.h>
#include <linux/ioport.h>
#include <linux/module.h>
@@ -937,7 +936,6 @@ void __init time_init(void)
vxtime.mode = VXTIME_TSC;
vxtime.quot = (1000000L << 32) / vxtime_hz;
vxtime.tsc_quot = (1000L << 32) / cpu_khz;
- vxtime.hz = vxtime_hz;
rdtscll_sync(&vxtime.last_tsc);
setup_irq(0, &irq0);
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index f238d6078a5..b5e09e6b553 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -47,9 +47,6 @@
#include <asm/proto.h>
#include <asm/nmi.h>
-#include <linux/irq.h>
-
-
extern struct gate_struct idt_table[256];
asmlinkage void divide_error(void);
@@ -324,13 +321,13 @@ void handle_BUG(struct pt_regs *regs)
if (__copy_from_user(&f, (struct bug_frame *) regs->rip,
sizeof(struct bug_frame)))
return;
- if ((unsigned long)f.filename < __PAGE_OFFSET ||
+ if (f.filename >= 0 ||
f.ud2[0] != 0x0f || f.ud2[1] != 0x0b)
return;
- if (__get_user(tmp, f.filename))
- f.filename = "unmapped filename";
+ if (__get_user(tmp, (char *)(long)f.filename))
+ f.filename = (int)(long)"unmapped filename";
printk("----------- [cut here ] --------- [please bite here ] ---------\n");
- printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line);
+ printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", (char *)(long)f.filename, f.line);
}
#ifdef CONFIG_BUG
@@ -343,30 +340,33 @@ void out_of_line_bug(void)
static DEFINE_SPINLOCK(die_lock);
static int die_owner = -1;
-void oops_begin(void)
+unsigned long oops_begin(void)
{
- int cpu = safe_smp_processor_id();
- /* racy, but better than risking deadlock. */
- local_irq_disable();
+ int cpu = safe_smp_processor_id();
+ unsigned long flags;
+
+ /* racy, but better than risking deadlock. */
+ local_irq_save(flags);
if (!spin_trylock(&die_lock)) {
if (cpu == die_owner)
/* nested oops. should stop eventually */;
else
- spin_lock(&die_lock);
+ spin_lock(&die_lock);
}
- die_owner = cpu;
+ die_owner = cpu;
console_verbose();
- bust_spinlocks(1);
+ bust_spinlocks(1);
+ return flags;
}
-void oops_end(void)
+void oops_end(unsigned long flags)
{
die_owner = -1;
- bust_spinlocks(0);
- spin_unlock(&die_lock);
+ bust_spinlocks(0);
+ spin_unlock_irqrestore(&die_lock, flags);
if (panic_on_oops)
- panic("Oops");
-}
+ panic("Oops");
+}
void __die(const char * str, struct pt_regs * regs, long err)
{
@@ -392,10 +392,11 @@ void __die(const char * str, struct pt_regs * regs, long err)
void die(const char * str, struct pt_regs * regs, long err)
{
- oops_begin();
+ unsigned long flags = oops_begin();
+
handle_BUG(regs);
__die(str, regs, err);
- oops_end();
+ oops_end(flags);
do_exit(SIGSEGV);
}
static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
@@ -406,7 +407,8 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long e
void die_nmi(char *str, struct pt_regs *regs)
{
- oops_begin();
+ unsigned long flags = oops_begin();
+
/*
* We are in trouble anyway, lets at least try
* to get a message out.
@@ -416,7 +418,7 @@ void die_nmi(char *str, struct pt_regs *regs)
if (panic_on_timeout || panic_on_oops)
panic("nmi watchdog");
printk("console shuts up ...\n");
- oops_end();
+ oops_end(flags);
do_exit(SIGSEGV);
}
@@ -790,13 +792,16 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs)
*/
cwd = get_fpu_cwd(task);
swd = get_fpu_swd(task);
- switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
+ switch (swd & ~cwd & 0x3f) {
case 0x000:
default:
break;
case 0x001: /* Invalid Op */
- case 0x041: /* Stack Fault */
- case 0x241: /* Stack Fault | Direction */
+ /*
+ * swd & 0x240 == 0x040: Stack Underflow
+ * swd & 0x240 == 0x240: Stack Overflow
+ * User must clear the SF bit (0x40) if set
+ */
info.si_code = FPE_FLTINV;
break;
case 0x002: /* Denormalize */
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 2e573442594..70a0bd16085 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -107,7 +107,7 @@ static force_inline long time_syscall(long *t)
return secs;
}
-static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
+int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
{
if (unlikely(!__sysctl_vsyscall))
return gettimeofday(tv,tz);
@@ -120,7 +120,7 @@ static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz
/* This will break when the xtime seconds get inaccurate, but that is
* unlikely */
-static time_t __vsyscall(1) vtime(time_t *t)
+time_t __vsyscall(1) vtime(time_t *t)
{
if (unlikely(!__sysctl_vsyscall))
return time_syscall(t);
@@ -129,12 +129,12 @@ static time_t __vsyscall(1) vtime(time_t *t)
return __xtime.tv_sec;
}
-static long __vsyscall(2) venosys_0(void)
+long __vsyscall(2) venosys_0(void)
{
return -ENOSYS;
}
-static long __vsyscall(3) venosys_1(void)
+long __vsyscall(3) venosys_1(void)
{
return -ENOSYS;
}
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 816732d8858..b75b872ec15 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -221,12 +221,13 @@ int unhandled_signal(struct task_struct *tsk, int sig)
static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
unsigned long error_code)
{
- oops_begin();
+ unsigned long flags = oops_begin();
+
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
current->comm, address);
dump_pagetable(address);
__die("Bad pagetable", regs, error_code);
- oops_end();
+ oops_end(flags);
do_exit(SIGKILL);
}
@@ -304,6 +305,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
unsigned long address;
const struct exception_table_entry *fixup;
int write;
+ unsigned long flags;
siginfo_t info;
#ifdef CONFIG_CHECKING
@@ -521,7 +523,7 @@ no_context:
* terminate things with extreme prejudice.
*/
- oops_begin();
+ flags = oops_begin();
if (address < PAGE_SIZE)
printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
@@ -534,7 +536,7 @@ no_context:
__die("Oops", regs, error_code);
/* Executive summary in case the body of the oops scrolled away */
printk(KERN_EMERG "CR2: %016lx\n", address);
- oops_end();
+ oops_end(flags);
do_exit(SIGKILL);
/*
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index aa4a5189ece..e60a1a848de 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -57,31 +57,31 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
void show_mem(void)
{
- int i, total = 0, reserved = 0;
- int shared = 0, cached = 0;
+ long i, total = 0, reserved = 0;
+ long shared = 0, cached = 0;
pg_data_t *pgdat;
struct page *page;
- printk("Mem-info:\n");
+ printk(KERN_INFO "Mem-info:\n");
show_free_areas();
- printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+ printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) {
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pfn_to_page(pgdat->node_start_pfn + i);
total++;
- if (PageReserved(page))
- reserved++;
- else if (PageSwapCache(page))
- cached++;
- else if (page_count(page))
- shared += page_count(page) - 1;
+ if (PageReserved(page))
+ reserved++;
+ else if (PageSwapCache(page))
+ cached++;
+ else if (page_count(page))
+ shared += page_count(page) - 1;
}
}
- printk("%d pages of RAM\n", total);
- printk("%d reserved pages\n",reserved);
- printk("%d pages shared\n",shared);
- printk("%d pages swap cached\n",cached);
+ printk(KERN_INFO "%lu pages of RAM\n", total);
+ printk(KERN_INFO "%lu reserved pages\n",reserved);
+ printk(KERN_INFO "%lu pages shared\n",shared);
+ printk(KERN_INFO "%lu pages swap cached\n",cached);
}
/* References to section boundaries */
@@ -381,41 +381,14 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size)
__flush_tlb_all();
}
-static inline int page_is_ram (unsigned long pagenr)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- unsigned long addr, end;
-
- if (e820.map[i].type != E820_RAM) /* not usable memory */
- continue;
- /*
- * !!!FIXME!!! Some BIOSen report areas as RAM that
- * are not. Notably the 640->1Mb area. We need a sanity
- * check here.
- */
- addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
- end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
- if ((pagenr >= addr) && (pagenr < end))
- return 1;
- }
- return 0;
-}
-
-extern int swiotlb_force;
-
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
kcore_vsyscall;
void __init mem_init(void)
{
- int codesize, reservedpages, datasize, initsize;
- int tmp;
+ long codesize, reservedpages, datasize, initsize;
#ifdef CONFIG_SWIOTLB
- if (swiotlb_force)
- swiotlb = 1;
if (!iommu_aperture &&
(end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu))
swiotlb = 1;
@@ -436,25 +409,11 @@ void __init mem_init(void)
/* this will put all low memory onto the freelists */
#ifdef CONFIG_NUMA
- totalram_pages += numa_free_all_bootmem();
- tmp = 0;
- /* should count reserved pages here for all nodes */
+ totalram_pages = numa_free_all_bootmem();
#else
-
-#ifdef CONFIG_FLATMEM
- max_mapnr = end_pfn;
- if (!mem_map) BUG();
-#endif
-
- totalram_pages += free_all_bootmem();
-
- for (tmp = 0; tmp < end_pfn; tmp++)
- /*
- * Only count reserved RAM pages
- */
- if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
- reservedpages++;
+ totalram_pages = free_all_bootmem();
#endif
+ reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
after_bootmem = 1;
@@ -471,7 +430,7 @@ void __init mem_init(void)
kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
VSYSCALL_END - VSYSCALL_START);
- printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
+ printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
end_pfn << (PAGE_SHIFT-10),
codesize >> 10,
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
index ec35747aacd..65417b040c1 100644
--- a/arch/x86_64/mm/k8topology.c
+++ b/arch/x86_64/mm/k8topology.c
@@ -45,10 +45,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
unsigned long prevbase;
struct node nodes[8];
int nodeid, i, nb;
+ unsigned char nodeids[8];
int found = 0;
u32 reg;
unsigned numnodes;
nodemask_t nodes_parsed;
+ unsigned dualcore = 0;
nodes_clear(nodes_parsed);
@@ -67,11 +69,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
prevbase = 0;
for (i = 0; i < 8; i++) {
unsigned long base,limit;
-
+ u32 nodeid;
+
+ /* Undefined before E stepping, but hopefully 0 */
+ dualcore |= ((read_pci_config(0, nb, 3, 0xe8) >> 12) & 3) == 1;
base = read_pci_config(0, nb, 1, 0x40 + i*8);
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
nodeid = limit & 7;
+ nodeids[i] = nodeid;
if ((base & 3) == 0) {
if (i < numnodes)
printk("Skipping disabled node %d\n", i);
@@ -157,8 +163,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
for (i = 0; i < 8; i++) {
if (nodes[i].start != nodes[i].end) {
- /* assume 1:1 NODE:CPU */
- cpu_to_node[i] = i;
+ nodeid = nodeids[i];
+ apicid_to_node[nodeid << dualcore] = i;
+ apicid_to_node[(nodeid << dualcore) + dualcore] = i;
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
}
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 04f7a33e144..80a49d9bd8a 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -28,8 +28,13 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
int memnode_shift;
u8 memnodemap[NODEMAPSIZE];
-unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
-cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
+unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
+ [0 ... NR_CPUS-1] = NUMA_NO_NODE
+};
+unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
int numa_off __initdata;
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 8e3d097a9dd..4b2e844c15a 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -20,14 +20,20 @@
static struct acpi_table_slit *acpi_slit;
-/* Internal processor count */
-static unsigned int __initdata num_processors = 0;
-
static nodemask_t nodes_parsed __initdata;
static nodemask_t nodes_found __initdata;
static struct node nodes[MAX_NUMNODES] __initdata;
static __u8 pxm2node[256] = { [0 ... 255] = 0xff };
+static int node_to_pxm(int n);
+
+int pxm_to_node(int pxm)
+{
+ if ((unsigned)pxm >= 256)
+ return 0;
+ return pxm2node[pxm];
+}
+
static __init int setup_node(int pxm)
{
unsigned node = pxm2node[pxm];
@@ -44,14 +50,14 @@ static __init int setup_node(int pxm)
static __init int conflicting_nodes(unsigned long start, unsigned long end)
{
int i;
- for_each_online_node(i) {
+ for_each_node_mask(i, nodes_parsed) {
struct node *nd = &nodes[i];
if (nd->start == nd->end)
continue;
if (nd->end > start && nd->start < end)
- return 1;
+ return i;
if (nd->end == end && nd->start == start)
- return 1;
+ return i;
}
return -1;
}
@@ -75,8 +81,11 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end)
static __init void bad_srat(void)
{
+ int i;
printk(KERN_ERR "SRAT: SRAT not used.\n");
acpi_numa = -1;
+ for (i = 0; i < MAX_LOCAL_APIC; i++)
+ apicid_to_node[i] = NUMA_NO_NODE;
}
static __init inline int srat_disabled(void)
@@ -104,18 +113,10 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
bad_srat();
return;
}
- if (num_processors >= NR_CPUS) {
- printk(KERN_ERR "SRAT: Processor #%d (lapic %u) INVALID. (Max ID: %d).\n",
- num_processors, pa->apic_id, NR_CPUS);
- bad_srat();
- return;
- }
- cpu_to_node[num_processors] = node;
+ apicid_to_node[pa->apic_id] = node;
acpi_numa = 1;
- printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> CPU %u -> Node %u\n",
- pxm, pa->apic_id, num_processors, node);
-
- num_processors++;
+ printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
+ pxm, pa->apic_id, node);
}
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -143,10 +144,15 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
printk(KERN_INFO "SRAT: hot plug zone found %lx - %lx \n",
start, end);
i = conflicting_nodes(start, end);
- if (i >= 0) {
+ if (i == node) {
+ printk(KERN_WARNING
+ "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
+ pxm, start, end, nodes[i].start, nodes[i].end);
+ } else if (i >= 0) {
printk(KERN_ERR
- "SRAT: pxm %d overlap %lx-%lx with node %d(%Lx-%Lx)\n",
- pxm, start, end, i, nodes[i].start, nodes[i].end);
+ "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
+ pxm, start, end, node_to_pxm(i),
+ nodes[i].start, nodes[i].end);
bad_srat();
return;
}
@@ -174,6 +180,14 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
int i;
if (acpi_numa <= 0)
return -1;
+
+ /* First clean up the node list */
+ for_each_node_mask(i, nodes_parsed) {
+ cutoff_node(i, start, end);
+ if (nodes[i].start == nodes[i].end)
+ node_clear(i, nodes_parsed);
+ }
+
memnode_shift = compute_hash_shift(nodes, nodes_weight(nodes_parsed));
if (memnode_shift < 0) {
printk(KERN_ERR
@@ -181,16 +195,10 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
bad_srat();
return -1;
}
- for (i = 0; i < MAX_NUMNODES; i++) {
- if (!node_isset(i, nodes_parsed))
- continue;
- cutoff_node(i, start, end);
- if (nodes[i].start == nodes[i].end) {
- node_clear(i, nodes_parsed);
- continue;
- }
+
+ /* Finally register nodes */
+ for_each_node_mask(i, nodes_parsed)
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
for (i = 0; i < NR_CPUS; i++) {
if (cpu_to_node[i] == NUMA_NO_NODE)
continue;
@@ -201,7 +209,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
return 0;
}
-int node_to_pxm(int n)
+static int node_to_pxm(int n)
{
int i;
if (pxm2node[n] == n)
diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c
index d80c323669e..3acf60ded2a 100644
--- a/arch/x86_64/pci/k8-bus.c
+++ b/arch/x86_64/pci/k8-bus.c
@@ -58,10 +58,16 @@ fill_mp_bus_to_cpumask(void)
for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus);
j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus);
j++) {
- int node = NODE_ID(nid);
+ struct pci_bus *bus;
+ long node = NODE_ID(nid);
+ /* Algorithm a bit dumb, but
+ it shouldn't matter here */
+ bus = pci_find_bus(0, j);
+ if (!bus)
+ continue;
if (!node_online(node))
node = 0;
- pci_bus_to_node[j] = node;
+ bus->sysdata = (void *)node;
}
}
}
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index 657e88aa090..a0838c4a94e 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -111,13 +111,6 @@ static int __init pci_mmcfg_init(void)
(pci_mmcfg_config[0].base_address == 0))
return 0;
- /* Kludge for now. Don't use mmconfig on AMD systems because
- those have some busses where mmconfig doesn't work,
- and we don't parse ACPI MCFG well enough to handle that.
- Remove when proper handling is added. */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- return 0;
-
/* RED-PEN i386 doesn't do _nocache right now */
pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL);
if (pci_mmcfg_virt == NULL) {