From 93fa7636dfdc059b25df148f230c0991096afdef Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 8 Apr 2008 11:01:58 +0200 Subject: x86, ptrace: PEBS support Polish the ds.h interface and add support for PEBS. Ds.c is meant to be the resource allocator for per-thread and per-cpu BTS and PEBS recording. It is used by ptrace/utrace to provide execution tracing of debugged tasks. It will be used by profilers (e.g. perfmon2). It may be used by kernel debuggers to provide a kernel execution trace. Changes in detail: - guard DS and ptrace by CONFIG macros - separate DS and BTS more clearly - simplify field accesses - add functions to manage PEBS buffers - add simple protection/allocation mechanism - added support for Atom Opens: - buffer overflow handling Currently, only circular buffers are supported. This is all we need for debugging. Profilers would want an overflow notification. This is planned to be added when perfmon2 is made to use the ds.h interface. - utrace intermediate layer Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel.c | 3 +- arch/x86/kernel/ds.c | 953 ++++++++++++++++++++++++++++++------------- arch/x86/kernel/process_32.c | 25 +- arch/x86/kernel/process_64.c | 25 +- arch/x86/kernel/ptrace.c | 444 ++++++++++++-------- arch/x86/kernel/setup_64.c | 3 +- 6 files changed, 991 insertions(+), 462 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fe9224c51d3..cbffa2a25a1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); } if (cpu_has_bts) - ds_init_intel(c); + ptrace_bts_init_intel(c); } static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 11c11b8ec48..5b32b6d062b 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -2,26 +2,48 @@ * Debug Store support * * This provides a low-level interface to the hardware's Debug Store - * feature that is used for last branch recording (LBR) and + * feature that is used for branch trace store (BTS) and * precise-event based sampling (PEBS). * - * Different architectures use a different DS layout/pointer size. - * The below functions therefore work on a void*. + * It manages: + * - per-thread and per-cpu allocation of BTS and PEBS + * - buffer memory allocation (optional) + * - buffer overflow handling + * - buffer access * + * It assumes: + * - get_task_struct on all parameter tasks + * - current is allowed to trace parameter tasks * - * Since there is no user for PEBS, yet, only LBR (or branch - * trace store, BTS) is supported. * - * - * Copyright (C) 2007 Intel Corporation. - * Markus Metzger , Dec 2007 + * Copyright (C) 2007-2008 Intel Corporation. + * Markus Metzger , 2007-2008 */ + +#ifdef CONFIG_X86_DS + #include #include #include #include +#include + + +/* + * The configuration for a particular DS hardware implementation. + */ +struct ds_configuration { + /* the size of the DS structure in bytes */ + unsigned char sizeof_ds; + /* the size of one pointer-typed field in the DS structure in bytes; + this covers the first 8 fields related to buffer management. */ + unsigned char sizeof_field; + /* the size of a BTS/PEBS record in bytes */ + unsigned char sizeof_rec[2]; +}; +static struct ds_configuration ds_cfg; /* @@ -44,378 +66,747 @@ * (interrupt occurs when write pointer passes interrupt pointer) * - value to which counter is reset following counter overflow * - * On later architectures, the last branch recording hardware uses - * 64bit pointers even in 32bit mode. - * - * - * Branch Trace Store (BTS) records store information about control - * flow changes. They at least provide the following information: - * - source linear address - * - destination linear address + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. * - * Netburst supported a predicated bit that had been dropped in later - * architectures. We do not suppor it. * + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * - an offset giving the start of the respective region * - * In order to abstract from the actual DS and BTS layout, we describe - * the access to the relevant fields. - * Thanks to Andi Kleen for proposing this design. + * This offset is further used to index various arrays holding + * information for BTS and PEBS at the respective index. * - * The implementation, however, is not as general as it might seem. In - * order to stay somewhat simple and efficient, we assume an - * underlying unsigned type (mostly a pointer type) and we expect the - * field to be at least as big as that type. + * On later 32bit processors, we only access the lower 32bit of the + * 64bit pointer fields. The upper halves will be zeroed out. */ -/* - * A special from_ip address to indicate that the BTS record is an - * info record that needs to be interpreted or skipped. - */ -#define BTS_ESCAPE_ADDRESS (-1) +enum ds_field { + ds_buffer_base = 0, + ds_index, + ds_absolute_maximum, + ds_interrupt_threshold, +}; -/* - * A field access descriptor - */ -struct access_desc { - unsigned char offset; - unsigned char size; +enum ds_qualifier { + ds_bts = 0, + ds_pebs }; +static inline unsigned long ds_get(const unsigned char *base, + enum ds_qualifier qual, enum ds_field field) +{ + base += (ds_cfg.sizeof_field * (field + (4 * qual))); + return *(unsigned long *)base; +} + +static inline void ds_set(unsigned char *base, enum ds_qualifier qual, + enum ds_field field, unsigned long value) +{ + base += (ds_cfg.sizeof_field * (field + (4 * qual))); + (*(unsigned long *)base) = value; +} + + /* - * The configuration for a particular DS/BTS hardware implementation. + * Locking is done only for allocating BTS or PEBS resources and for + * guarding context and buffer memory allocation. + * + * Most functions require the current task to own the ds context part + * they are going to access. All the locking is done when validating + * access to the context. */ -struct ds_configuration { - /* the DS configuration */ - unsigned char sizeof_ds; - struct access_desc bts_buffer_base; - struct access_desc bts_index; - struct access_desc bts_absolute_maximum; - struct access_desc bts_interrupt_threshold; - /* the BTS configuration */ - unsigned char sizeof_bts; - struct access_desc from_ip; - struct access_desc to_ip; - /* BTS variants used to store additional information like - timestamps */ - struct access_desc info_type; - struct access_desc info_data; - unsigned long debugctl_mask; -}; +static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); /* - * The global configuration used by the below accessor functions + * Validate that the current task is allowed to access the BTS/PEBS + * buffer of the parameter task. + * + * Returns 0, if access is granted; -Eerrno, otherwise. */ -static struct ds_configuration ds_cfg; +static inline int ds_validate_access(struct ds_context *context, + enum ds_qualifier qual) +{ + if (!context) + return -EPERM; + + if (context->owner[qual] == current) + return 0; + + return -EPERM; +} + /* - * Accessor functions for some DS and BTS fields using the above - * global ptrace_bts_cfg. + * We either support (system-wide) per-cpu or per-thread allocation. + * We distinguish the two based on the task_struct pointer, where a + * NULL pointer indicates per-cpu allocation for the current cpu. + * + * Allocations are use-counted. As soon as resources are allocated, + * further allocations must be of the same type (per-cpu or + * per-thread). We model this by counting allocations (i.e. the number + * of tracers of a certain type) for one type negatively: + * =0 no tracers + * >0 number of per-thread tracers + * <0 number of per-cpu tracers + * + * The below functions to get and put tracers and to check the + * allocation type require the ds_lock to be held by the caller. + * + * Tracers essentially gives the number of ds contexts for a certain + * type of allocation. */ -static inline unsigned long get_bts_buffer_base(char *base) +static long tracers; + +static inline void get_tracer(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); + tracers += (task ? 1 : -1); } -static inline void set_bts_buffer_base(char *base, unsigned long value) + +static inline void put_tracer(struct task_struct *task) { - (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; + tracers -= (task ? 1 : -1); } -static inline unsigned long get_bts_index(char *base) + +static inline int check_tracer(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_index.offset); + return (task ? (tracers >= 0) : (tracers <= 0)); } -static inline void set_bts_index(char *base, unsigned long value) + + +/* + * The DS context is either attached to a thread or to a cpu: + * - in the former case, the thread_struct contains a pointer to the + * attached context. + * - in the latter case, we use a static array of per-cpu context + * pointers. + * + * Contexts are use-counted. They are allocated on first access and + * deallocated when the last user puts the context. + * + * We distinguish between an allocating and a non-allocating get of a + * context: + * - the allocating get is used for requesting BTS/PEBS resources. It + * requires the caller to hold the global ds_lock. + * - the non-allocating get is used for all other cases. A + * non-existing context indicates an error. It acquires and releases + * the ds_lock itself for obtaining the context. + * + * A context and its DS configuration are allocated and deallocated + * together. A context always has a DS configuration of the + * appropriate size. + */ +static DEFINE_PER_CPU(struct ds_context *, system_context); + +#define this_system_context per_cpu(system_context, smp_processor_id()) + +/* + * Returns the pointer to the parameter task's context or to the + * system-wide context, if task is NULL. + * + * Increases the use count of the returned context, if not NULL. + */ +static inline struct ds_context *ds_get_context(struct task_struct *task) { - (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; + struct ds_context *context; + + spin_lock(&ds_lock); + + context = (task ? task->thread.ds_ctx : this_system_context); + if (context) + context->count++; + + spin_unlock(&ds_lock); + + return context; } -static inline unsigned long get_bts_absolute_maximum(char *base) + +/* + * Same as ds_get_context, but allocates the context and it's DS + * structure, if necessary; returns NULL; if out of memory. + * + * pre: requires ds_lock to be held + */ +static inline struct ds_context *ds_alloc_context(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); + struct ds_context **p_context = + (task ? &task->thread.ds_ctx : &this_system_context); + struct ds_context *context = *p_context; + + if (!context) { + context = kzalloc(sizeof(*context), GFP_KERNEL); + + if (!context) + return 0; + + context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + if (!context->ds) { + kfree(context); + return 0; + } + + *p_context = context; + + context->this = p_context; + context->task = task; + + if (task) + set_tsk_thread_flag(task, TIF_DS_AREA_MSR); + + if (!task || (task == current)) + wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); + + get_tracer(task); + } + + context->count++; + + return context; } -static inline void set_bts_absolute_maximum(char *base, unsigned long value) + +/* + * Decreases the use count of the parameter context, if not NULL. + * Deallocates the context, if the use count reaches zero. + */ +static inline void ds_put_context(struct ds_context *context) { - (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; + if (!context) + return; + + spin_lock(&ds_lock); + + if (--context->count) + goto out; + + *(context->this) = 0; + + if (context->task) + clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); + + if (!context->task || (context->task == current)) + wrmsrl(MSR_IA32_DS_AREA, 0); + + put_tracer(context->task); + + /* free any leftover buffers from tracers that did not + * deallocate them properly. */ + kfree(context->buffer[ds_bts]); + kfree(context->buffer[ds_pebs]); + kfree(context->ds); + kfree(context); + out: + spin_unlock(&ds_lock); } -static inline unsigned long get_bts_interrupt_threshold(char *base) + + +/* + * Handle a buffer overflow + * + * task: the task whose buffers are overflowing; + * NULL for a buffer overflow on the current cpu + * context: the ds context + * qual: the buffer type + */ +static void ds_overflow(struct task_struct *task, struct ds_context *context, + enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); + if (!context) + return; + + if (context->callback[qual]) + (*context->callback[qual])(task); + + /* todo: do some more overflow handling */ } -static inline void set_bts_interrupt_threshold(char *base, unsigned long value) + + +/* + * Allocate a non-pageable buffer of the parameter size. + * Checks the memory and the locked memory rlimit. + * + * Returns the buffer, if successful; + * NULL, if out of memory or rlimit exceeded. + * + * size: the requested buffer size in bytes + * pages (out): if not NULL, contains the number of pages reserved + */ +static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) { - (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; + unsigned long rlim, vm, pgsz; + void *buffer; + + pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + + rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + vm = current->mm->total_vm + pgsz; + if (rlim < vm) + return 0; + + rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + vm = current->mm->locked_vm + pgsz; + if (rlim < vm) + return 0; + + buffer = kzalloc(size, GFP_KERNEL); + if (!buffer) + return 0; + + current->mm->total_vm += pgsz; + current->mm->locked_vm += pgsz; + + if (pages) + *pages = pgsz; + + return buffer; } -static inline unsigned long get_from_ip(char *base) + +static int ds_request(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl, enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.from_ip.offset); + struct ds_context *context; + unsigned long buffer, adj; + const unsigned long alignment = (1 << 3); + int error = 0; + + if (!ds_cfg.sizeof_ds) + return -EOPNOTSUPP; + + /* we require some space to do alignment adjustments below */ + if (size < (alignment + ds_cfg.sizeof_rec[qual])) + return -EINVAL; + + /* buffer overflow notification is not yet implemented */ + if (ovfl) + return -EOPNOTSUPP; + + + spin_lock(&ds_lock); + + if (!check_tracer(task)) + return -EPERM; + + error = -ENOMEM; + context = ds_alloc_context(task); + if (!context) + goto out_unlock; + + error = -EALREADY; + if (context->owner[qual] == current) + goto out_unlock; + error = -EPERM; + if (context->owner[qual] != 0) + goto out_unlock; + context->owner[qual] = current; + + spin_unlock(&ds_lock); + + + error = -ENOMEM; + if (!base) { + base = ds_allocate_buffer(size, &context->pages[qual]); + if (!base) + goto out_release; + + context->buffer[qual] = base; + } + error = 0; + + context->callback[qual] = ovfl; + + /* adjust the buffer address and size to meet alignment + * constraints: + * - buffer is double-word aligned + * - size is multiple of record size + * + * We checked the size at the very beginning; we have enough + * space to do the adjustment. + */ + buffer = (unsigned long)base; + + adj = ALIGN(buffer, alignment) - buffer; + buffer += adj; + size -= adj; + + size /= ds_cfg.sizeof_rec[qual]; + size *= ds_cfg.sizeof_rec[qual]; + + ds_set(context->ds, qual, ds_buffer_base, buffer); + ds_set(context->ds, qual, ds_index, buffer); + ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); + + if (ovfl) { + /* todo: select a suitable interrupt threshold */ + } else + ds_set(context->ds, qual, + ds_interrupt_threshold, buffer + size + 1); + + /* we keep the context until ds_release */ + return error; + + out_release: + context->owner[qual] = 0; + ds_put_context(context); + return error; + + out_unlock: + spin_unlock(&ds_lock); + ds_put_context(context); + return error; } -static inline void set_from_ip(char *base, unsigned long value) + +int ds_request_bts(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl) { - (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; + return ds_request(task, base, size, ovfl, ds_bts); } -static inline unsigned long get_to_ip(char *base) + +int ds_request_pebs(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl) { - return *(unsigned long *)(base + ds_cfg.to_ip.offset); + return ds_request(task, base, size, ovfl, ds_pebs); } -static inline void set_to_ip(char *base, unsigned long value) + +static int ds_release(struct task_struct *task, enum ds_qualifier qual) { - (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; + struct ds_context *context; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + kfree(context->buffer[qual]); + context->buffer[qual] = 0; + + current->mm->total_vm -= context->pages[qual]; + current->mm->locked_vm -= context->pages[qual]; + context->pages[qual] = 0; + context->owner[qual] = 0; + + /* + * we put the context twice: + * once for the ds_get_context + * once for the corresponding ds_request + */ + ds_put_context(context); + out: + ds_put_context(context); + return error; } -static inline unsigned char get_info_type(char *base) + +int ds_release_bts(struct task_struct *task) { - return *(unsigned char *)(base + ds_cfg.info_type.offset); + return ds_release(task, ds_bts); } -static inline void set_info_type(char *base, unsigned char value) + +int ds_release_pebs(struct task_struct *task) { - (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; + return ds_release(task, ds_pebs); } -static inline unsigned long get_info_data(char *base) + +static int ds_get_index(struct task_struct *task, size_t *pos, + enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.info_data.offset); + struct ds_context *context; + unsigned long base, index; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + base = ds_get(context->ds, qual, ds_buffer_base); + index = ds_get(context->ds, qual, ds_index); + + error = ((index - base) / ds_cfg.sizeof_rec[qual]); + if (pos) + *pos = error; + out: + ds_put_context(context); + return error; } -static inline void set_info_data(char *base, unsigned long value) + +int ds_get_bts_index(struct task_struct *task, size_t *pos) { - (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; + return ds_get_index(task, pos, ds_bts); } +int ds_get_pebs_index(struct task_struct *task, size_t *pos) +{ + return ds_get_index(task, pos, ds_pebs); +} -int ds_allocate(void **dsp, size_t bts_size_in_bytes) +static int ds_get_end(struct task_struct *task, size_t *pos, + enum ds_qualifier qual) { - size_t bts_size_in_records; - unsigned long bts; - void *ds; + struct ds_context *context; + unsigned long base, end; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + base = ds_get(context->ds, qual, ds_buffer_base); + end = ds_get(context->ds, qual, ds_absolute_maximum); + + error = ((end - base) / ds_cfg.sizeof_rec[qual]); + if (pos) + *pos = error; + out: + ds_put_context(context); + return error; +} - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; +int ds_get_bts_end(struct task_struct *task, size_t *pos) +{ + return ds_get_end(task, pos, ds_bts); +} - if (bts_size_in_bytes < 0) - return -EINVAL; +int ds_get_pebs_end(struct task_struct *task, size_t *pos) +{ + return ds_get_end(task, pos, ds_pebs); +} - bts_size_in_records = - bts_size_in_bytes / ds_cfg.sizeof_bts; - bts_size_in_bytes = - bts_size_in_records * ds_cfg.sizeof_bts; +static int ds_access(struct task_struct *task, size_t index, + const void **record, enum ds_qualifier qual) +{ + struct ds_context *context; + unsigned long base, idx; + int error; - if (bts_size_in_bytes <= 0) + if (!record) return -EINVAL; - bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; - if (!bts) - return -ENOMEM; + base = ds_get(context->ds, qual, ds_buffer_base); + idx = base + (index * ds_cfg.sizeof_rec[qual]); - ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + error = -EINVAL; + if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) + goto out; - if (!ds) { - kfree((void *)bts); - return -ENOMEM; - } - - set_bts_buffer_base(ds, bts); - set_bts_index(ds, bts); - set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); - set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); - - *dsp = ds; - return 0; + *record = (const void *)idx; + error = ds_cfg.sizeof_rec[qual]; + out: + ds_put_context(context); + return error; } -int ds_free(void **dsp) +int ds_access_bts(struct task_struct *task, size_t index, const void **record) { - if (*dsp) { - kfree((void *)get_bts_buffer_base(*dsp)); - kfree(*dsp); - *dsp = NULL; - } - return 0; + return ds_access(task, index, record, ds_bts); } -int ds_get_bts_size(void *ds) +int ds_access_pebs(struct task_struct *task, size_t index, const void **record) { - int size_in_bytes; - - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; - - if (!ds) - return 0; - - size_in_bytes = - get_bts_absolute_maximum(ds) - - get_bts_buffer_base(ds); - return size_in_bytes; + return ds_access(task, index, record, ds_pebs); } -int ds_get_bts_end(void *ds) +static int ds_write(struct task_struct *task, const void *record, size_t size, + enum ds_qualifier qual, int force) { - int size_in_bytes = ds_get_bts_size(ds); - - if (size_in_bytes <= 0) - return size_in_bytes; + struct ds_context *context; + int error; - return size_in_bytes / ds_cfg.sizeof_bts; -} + if (!record) + return -EINVAL; -int ds_get_bts_index(void *ds) -{ - int index_offset_in_bytes; + error = -EPERM; + context = ds_get_context(task); + if (!context) + goto out; - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; + if (!force) { + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + } - index_offset_in_bytes = - get_bts_index(ds) - - get_bts_buffer_base(ds); + error = 0; + while (size) { + unsigned long base, index, end, write_end, int_th; + unsigned long write_size, adj_write_size; + + /* + * write as much as possible without producing an + * overflow interrupt. + * + * interrupt_threshold must either be + * - bigger than absolute_maximum or + * - point to a record between buffer_base and absolute_maximum + * + * index points to a valid record. + */ + base = ds_get(context->ds, qual, ds_buffer_base); + index = ds_get(context->ds, qual, ds_index); + end = ds_get(context->ds, qual, ds_absolute_maximum); + int_th = ds_get(context->ds, qual, ds_interrupt_threshold); + + write_end = min(end, int_th); + + /* if we are already beyond the interrupt threshold, + * we fill the entire buffer */ + if (write_end <= index) + write_end = end; + + if (write_end <= index) + goto out; + + write_size = min((unsigned long) size, write_end - index); + memcpy((void *)index, record, write_size); + + record = (const char *)record + write_size; + size -= write_size; + error += write_size; + + adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; + adj_write_size *= ds_cfg.sizeof_rec[qual]; + + /* zero out trailing bytes */ + memset((char *)index + write_size, 0, + adj_write_size - write_size); + index += adj_write_size; + + if (index >= end) + index = base; + ds_set(context->ds, qual, ds_index, index); + + if (index >= int_th) + ds_overflow(task, context, qual); + } - return index_offset_in_bytes / ds_cfg.sizeof_bts; + out: + ds_put_context(context); + return error; } -int ds_set_overflow(void *ds, int method) +int ds_write_bts(struct task_struct *task, const void *record, size_t size) { - switch (method) { - case DS_O_SIGNAL: - return -EOPNOTSUPP; - case DS_O_WRAP: - return 0; - default: - return -EINVAL; - } + return ds_write(task, record, size, ds_bts, /* force = */ 0); } -int ds_get_overflow(void *ds) +int ds_write_pebs(struct task_struct *task, const void *record, size_t size) { - return DS_O_WRAP; + return ds_write(task, record, size, ds_pebs, /* force = */ 0); } -int ds_clear(void *ds) +int ds_unchecked_write_bts(struct task_struct *task, + const void *record, size_t size) { - int bts_size = ds_get_bts_size(ds); - unsigned long bts_base; - - if (bts_size <= 0) - return bts_size; - - bts_base = get_bts_buffer_base(ds); - memset((void *)bts_base, 0, bts_size); - - set_bts_index(ds, bts_base); - return 0; + return ds_write(task, record, size, ds_bts, /* force = */ 1); } -int ds_read_bts(void *ds, int index, struct bts_struct *out) +int ds_unchecked_write_pebs(struct task_struct *task, + const void *record, size_t size) { - void *bts; + return ds_write(task, record, size, ds_pebs, /* force = */ 1); +} - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; +static int ds_reset_or_clear(struct task_struct *task, + enum ds_qualifier qual, int clear) +{ + struct ds_context *context; + unsigned long base, end; + int error; - if (index < 0) - return -EINVAL; + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; - if (index >= ds_get_bts_size(ds)) - return -EINVAL; + base = ds_get(context->ds, qual, ds_buffer_base); + end = ds_get(context->ds, qual, ds_absolute_maximum); - bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); + if (clear) + memset((void *)base, 0, end - base); - memset(out, 0, sizeof(*out)); - if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { - out->qualifier = get_info_type(bts); - out->variant.jiffies = get_info_data(bts); - } else { - out->qualifier = BTS_BRANCH; - out->variant.lbr.from_ip = get_from_ip(bts); - out->variant.lbr.to_ip = get_to_ip(bts); - } + ds_set(context->ds, qual, ds_index, base); - return sizeof(*out);; + error = 0; + out: + ds_put_context(context); + return error; } -int ds_write_bts(void *ds, const struct bts_struct *in) +int ds_reset_bts(struct task_struct *task) { - unsigned long bts; - - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; - - if (ds_get_bts_size(ds) <= 0) - return -ENXIO; + return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); +} - bts = get_bts_index(ds); +int ds_reset_pebs(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); +} - memset((void *)bts, 0, ds_cfg.sizeof_bts); - switch (in->qualifier) { - case BTS_INVALID: - break; +int ds_clear_bts(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); +} - case BTS_BRANCH: - set_from_ip((void *)bts, in->variant.lbr.from_ip); - set_to_ip((void *)bts, in->variant.lbr.to_ip); - break; +int ds_clear_pebs(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); +} - case BTS_TASK_ARRIVES: - case BTS_TASK_DEPARTS: - set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); - set_info_type((void *)bts, in->qualifier); - set_info_data((void *)bts, in->variant.jiffies); - break; +int ds_get_pebs_reset(struct task_struct *task, u64 *value) +{ + struct ds_context *context; + int error; - default: + if (!value) return -EINVAL; - } - bts = bts + ds_cfg.sizeof_bts; - if (bts >= get_bts_absolute_maximum(ds)) - bts = get_bts_buffer_base(ds); - set_bts_index(ds, bts); + context = ds_get_context(task); + error = ds_validate_access(context, ds_pebs); + if (error < 0) + goto out; - return ds_cfg.sizeof_bts; + *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); + + error = 0; + out: + ds_put_context(context); + return error; } -unsigned long ds_debugctl_mask(void) +int ds_set_pebs_reset(struct task_struct *task, u64 value) { - return ds_cfg.debugctl_mask; -} + struct ds_context *context; + int error; -#ifdef __i386__ -static const struct ds_configuration ds_cfg_netburst = { - .sizeof_ds = 9 * 4, - .bts_buffer_base = { 0, 4 }, - .bts_index = { 4, 4 }, - .bts_absolute_maximum = { 8, 4 }, - .bts_interrupt_threshold = { 12, 4 }, - .sizeof_bts = 3 * 4, - .from_ip = { 0, 4 }, - .to_ip = { 4, 4 }, - .info_type = { 4, 1 }, - .info_data = { 8, 4 }, - .debugctl_mask = (1<<2)|(1<<3) -}; + context = ds_get_context(task); + error = ds_validate_access(context, ds_pebs); + if (error < 0) + goto out; -static const struct ds_configuration ds_cfg_pentium_m = { - .sizeof_ds = 9 * 4, - .bts_buffer_base = { 0, 4 }, - .bts_index = { 4, 4 }, - .bts_absolute_maximum = { 8, 4 }, - .bts_interrupt_threshold = { 12, 4 }, - .sizeof_bts = 3 * 4, - .from_ip = { 0, 4 }, - .to_ip = { 4, 4 }, - .info_type = { 4, 1 }, - .info_data = { 8, 4 }, - .debugctl_mask = (1<<6)|(1<<7) + *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; + + error = 0; + out: + ds_put_context(context); + return error; +} + +static const struct ds_configuration ds_cfg_var = { + .sizeof_ds = sizeof(long) * 12, + .sizeof_field = sizeof(long), + .sizeof_rec[ds_bts] = sizeof(long) * 3, + .sizeof_rec[ds_pebs] = sizeof(long) * 10 }; -#endif /* _i386_ */ - -static const struct ds_configuration ds_cfg_core2 = { - .sizeof_ds = 9 * 8, - .bts_buffer_base = { 0, 8 }, - .bts_index = { 8, 8 }, - .bts_absolute_maximum = { 16, 8 }, - .bts_interrupt_threshold = { 24, 8 }, - .sizeof_bts = 3 * 8, - .from_ip = { 0, 8 }, - .to_ip = { 8, 8 }, - .info_type = { 8, 1 }, - .info_data = { 16, 8 }, - .debugctl_mask = (1<<6)|(1<<7)|(1<<9) +static const struct ds_configuration ds_cfg_64 = { + .sizeof_ds = 8 * 12, + .sizeof_field = 8, + .sizeof_rec[ds_bts] = 8 * 3, + .sizeof_rec[ds_pebs] = 8 * 10 }; static inline void @@ -429,14 +820,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { -#ifdef __i386__ case 0xD: case 0xE: /* Pentium M */ - ds_configure(&ds_cfg_pentium_m); + ds_configure(&ds_cfg_var); break; -#endif /* _i386_ */ case 0xF: /* Core2 */ - ds_configure(&ds_cfg_core2); + case 0x1C: /* Atom */ + ds_configure(&ds_cfg_64); break; default: /* sorry, don't know about them */ @@ -445,13 +835,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) break; case 0xF: switch (c->x86_model) { -#ifdef __i386__ case 0x0: case 0x1: case 0x2: /* Netburst */ - ds_configure(&ds_cfg_netburst); + ds_configure(&ds_cfg_var); break; -#endif /* _i386_ */ default: /* sorry, don't know about them */ break; @@ -462,3 +850,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) break; } } + +void ds_free(struct ds_context *context) +{ + /* This is called when the task owning the parameter context + * is dying. There should not be any user of that context left + * to disturb us, anymore. */ + unsigned long leftovers = context->count; + while (leftovers--) + ds_put_context(context); +} +#endif /* CONFIG_X86_DS */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f8476dfbb60..5cec8a75a4e 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -316,6 +316,14 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } +#ifdef CONFIG_X86_DS + /* Free any DS contexts that have not been properly released. */ + if (unlikely(current->thread.ds_ctx)) { + /* we clear debugctl to make sure DS is not used. */ + update_debugctlmsr(0); + ds_free(current->thread.ds_ctx); + } +#endif /* CONFIG_X86_DS */ } void flush_thread(void) @@ -482,18 +490,27 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, { struct thread_struct *prev, *next; unsigned long debugctl; + unsigned long ds_prev = 0, ds_next = 0; prev = &prev_p->thread; next = &next_p->thread; debugctl = prev->debugctlmsr; - if (next->ds_area_msr != prev->ds_area_msr) { + +#ifdef CONFIG_X86_DS + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { /* we clear debugctl to make sure DS * is not in use when we change it */ debugctl = 0; update_debugctlmsr(0); - wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); + wrmsr(MSR_IA32_DS_AREA, ds_next, 0); } +#endif /* CONFIG_X86_DS */ if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); @@ -517,13 +534,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, hard_enable_TSC(); } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f39988..ad213494a22 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -267,6 +267,14 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } +#ifdef CONFIG_X86_DS + /* Free any DS contexts that have not been properly released. */ + if (unlikely(t->ds_ctx)) { + /* we clear debugctl to make sure DS is not used. */ + update_debugctlmsr(0); + ds_free(t->ds_ctx); + } +#endif /* CONFIG_X86_DS */ } void flush_thread(void) @@ -492,18 +500,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, { struct thread_struct *prev, *next; unsigned long debugctl; + unsigned long ds_prev = 0, ds_next = 0; prev = &prev_p->thread, next = &next_p->thread; debugctl = prev->debugctlmsr; - if (next->ds_area_msr != prev->ds_area_msr) { + +#ifdef CONFIG_X86_DS + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { /* we clear debugctl to make sure DS * is not in use when we change it */ debugctl = 0; update_debugctlmsr(0); - wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); + wrmsrl(MSR_IA32_DS_AREA, ds_next); } +#endif /* CONFIG_X86_DS */ if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); @@ -541,13 +558,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ } /* diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index fb03ef380f0..b7ff783dc5f 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -554,45 +554,115 @@ static int ptrace_set_debugreg(struct task_struct *child, return 0; } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS +/* + * The configuration for a particular BTS hardware implementation. + */ +struct bts_configuration { + /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ + unsigned char sizeof_bts; + /* the size of a field in the BTS record in bytes */ + unsigned char sizeof_field; + /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ + unsigned long debugctl_mask; +}; +static struct bts_configuration bts_cfg; + +#define BTS_MAX_RECORD_SIZE (8 * 3) + + +/* + * Branch Trace Store (BTS) uses the following format. Different + * architectures vary in the size of those fields. + * - source linear address + * - destination linear address + * - flags + * + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. + * + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * + * In order to store additional information in the BTS buffer, we use + * a special source address to indicate that the record requires + * special interpretation. + * + * Netburst indicated via a bit in the flags field whether the branch + * was predicted; this is ignored. + */ -static int ptrace_bts_get_size(struct task_struct *child) +enum bts_field { + bts_from = 0, + bts_to, + bts_flags, + + bts_escape = (unsigned long)-1, + bts_qual = bts_to, + bts_jiffies = bts_flags +}; + +static inline unsigned long bts_get(const char *base, enum bts_field field) { - if (!child->thread.ds_area_msr) - return -ENXIO; + base += (bts_cfg.sizeof_field * field); + return *(unsigned long *)base; +} - return ds_get_bts_index((void *)child->thread.ds_area_msr); +static inline void bts_set(char *base, enum bts_field field, unsigned long val) +{ + base += (bts_cfg.sizeof_field * field);; + (*(unsigned long *)base) = val; } -static int ptrace_bts_read_record(struct task_struct *child, - long index, +/* + * Translate a BTS record from the raw format into the bts_struct format + * + * out (out): bts_struct interpretation + * raw: raw BTS record + */ +static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) +{ + memset(out, 0, sizeof(*out)); + if (bts_get(raw, bts_from) == bts_escape) { + out->qualifier = bts_get(raw, bts_qual); + out->variant.jiffies = bts_get(raw, bts_jiffies); + } else { + out->qualifier = BTS_BRANCH; + out->variant.lbr.from_ip = bts_get(raw, bts_from); + out->variant.lbr.to_ip = bts_get(raw, bts_to); + } +} + +static int ptrace_bts_read_record(struct task_struct *child, size_t index, struct bts_struct __user *out) { struct bts_struct ret; - int retval; - int bts_end; - int bts_index; - - if (!child->thread.ds_area_msr) - return -ENXIO; + const void *bts_record; + size_t bts_index, bts_end; + int error; - if (index < 0) - return -EINVAL; + error = ds_get_bts_end(child, &bts_end); + if (error < 0) + return error; - bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr); if (bts_end <= index) return -EINVAL; + error = ds_get_bts_index(child, &bts_index); + if (error < 0) + return error; + /* translate the ptrace bts index into the ds bts index */ - bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); - bts_index -= (index + 1); - if (bts_index < 0) - bts_index += bts_end; + bts_index += bts_end - (index + 1); + if (bts_end <= bts_index) + bts_index -= bts_end; - retval = ds_read_bts((void *)child->thread.ds_area_msr, - bts_index, &ret); - if (retval < 0) - return retval; + error = ds_access_bts(child, bts_index, &bts_record); + if (error < 0) + return error; + + ptrace_bts_translate_record(&ret, bts_record); if (copy_to_user(out, &ret, sizeof(ret))) return -EFAULT; @@ -600,101 +670,106 @@ static int ptrace_bts_read_record(struct task_struct *child, return sizeof(ret); } -static int ptrace_bts_clear(struct task_struct *child) -{ - if (!child->thread.ds_area_msr) - return -ENXIO; - - return ds_clear((void *)child->thread.ds_area_msr); -} - static int ptrace_bts_drain(struct task_struct *child, long size, struct bts_struct __user *out) { - int end, i; - void *ds = (void *)child->thread.ds_area_msr; - - if (!ds) - return -ENXIO; + struct bts_struct ret; + const unsigned char *raw; + size_t end, i; + int error; - end = ds_get_bts_index(ds); - if (end <= 0) - return end; + error = ds_get_bts_index(child, &end); + if (error < 0) + return error; if (size < (end * sizeof(struct bts_struct))) return -EIO; - for (i = 0; i < end; i++, out++) { - struct bts_struct ret; - int retval; + error = ds_access_bts(child, 0, (const void **)&raw); + if (error < 0) + return error; - retval = ds_read_bts(ds, i, &ret); - if (retval < 0) - return retval; + for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { + ptrace_bts_translate_record(&ret, raw); if (copy_to_user(out, &ret, sizeof(ret))) return -EFAULT; } - ds_clear(ds); + error = ds_clear_bts(child); + if (error < 0) + return error; return end; } +static void ptrace_bts_ovfl(struct task_struct *child) +{ + send_sig(child->thread.bts_ovfl_signal, child, 0); +} + static int ptrace_bts_config(struct task_struct *child, long cfg_size, const struct ptrace_bts_config __user *ucfg) { struct ptrace_bts_config cfg; - int bts_size, ret = 0; - void *ds; + int error = 0; + + error = -EOPNOTSUPP; + if (!bts_cfg.sizeof_bts) + goto errout; + error = -EIO; if (cfg_size < sizeof(cfg)) - return -EIO; + goto errout; + error = -EFAULT; if (copy_from_user(&cfg, ucfg, sizeof(cfg))) - return -EFAULT; + goto errout; - if ((int)cfg.size < 0) - return -EINVAL; + error = -EINVAL; + if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && + !(cfg.flags & PTRACE_BTS_O_ALLOC)) + goto errout; - bts_size = 0; - ds = (void *)child->thread.ds_area_msr; - if (ds) { - bts_size = ds_get_bts_size(ds); - if (bts_size < 0) - return bts_size; - } - cfg.size = PAGE_ALIGN(cfg.size); + if (cfg.flags & PTRACE_BTS_O_ALLOC) { + ds_ovfl_callback_t ovfl = 0; + unsigned int sig = 0; - if (bts_size != cfg.size) { - ret = ptrace_bts_realloc(child, cfg.size, - cfg.flags & PTRACE_BTS_O_CUT_SIZE); - if (ret < 0) + /* we ignore the error in case we were not tracing child */ + (void)ds_release_bts(child); + + if (cfg.flags & PTRACE_BTS_O_SIGNAL) { + if (!cfg.signal) + goto errout; + + sig = cfg.signal; + ovfl = ptrace_bts_ovfl; + } + + error = ds_request_bts(child, /* base = */ 0, cfg.size, ovfl); + if (error < 0) goto errout; - ds = (void *)child->thread.ds_area_msr; + child->thread.bts_ovfl_signal = sig; } - if (cfg.flags & PTRACE_BTS_O_SIGNAL) - ret = ds_set_overflow(ds, DS_O_SIGNAL); - else - ret = ds_set_overflow(ds, DS_O_WRAP); - if (ret < 0) + error = -EINVAL; + if (!child->thread.ds_ctx && cfg.flags) goto errout; if (cfg.flags & PTRACE_BTS_O_TRACE) - child->thread.debugctlmsr |= ds_debugctl_mask(); + child->thread.debugctlmsr |= bts_cfg.debugctl_mask; else - child->thread.debugctlmsr &= ~ds_debugctl_mask(); + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; if (cfg.flags & PTRACE_BTS_O_SCHED) set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); else clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - ret = sizeof(cfg); + error = sizeof(cfg); out: if (child->thread.debugctlmsr) @@ -702,10 +777,10 @@ out: else clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - return ret; + return error; errout: - child->thread.debugctlmsr &= ~ds_debugctl_mask(); + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); goto out; } @@ -714,29 +789,40 @@ static int ptrace_bts_status(struct task_struct *child, long cfg_size, struct ptrace_bts_config __user *ucfg) { - void *ds = (void *)child->thread.ds_area_msr; struct ptrace_bts_config cfg; + size_t end; + const void *base, *max; + int error; if (cfg_size < sizeof(cfg)) return -EIO; - memset(&cfg, 0, sizeof(cfg)); + error = ds_get_bts_end(child, &end); + if (error < 0) + return error; - if (ds) { - cfg.size = ds_get_bts_size(ds); + error = ds_access_bts(child, /* index = */ 0, &base); + if (error < 0) + return error; - if (ds_get_overflow(ds) == DS_O_SIGNAL) - cfg.flags |= PTRACE_BTS_O_SIGNAL; + error = ds_access_bts(child, /* index = */ end, &max); + if (error < 0) + return error; - if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && - child->thread.debugctlmsr & ds_debugctl_mask()) - cfg.flags |= PTRACE_BTS_O_TRACE; + memset(&cfg, 0, sizeof(cfg)); + cfg.size = (max - base); + cfg.signal = child->thread.bts_ovfl_signal; + cfg.bts_size = sizeof(struct bts_struct); - if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) - cfg.flags |= PTRACE_BTS_O_SCHED; - } + if (cfg.signal) + cfg.flags |= PTRACE_BTS_O_SIGNAL; - cfg.bts_size = sizeof(struct bts_struct); + if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && + child->thread.debugctlmsr & bts_cfg.debugctl_mask) + cfg.flags |= PTRACE_BTS_O_TRACE; + + if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) + cfg.flags |= PTRACE_BTS_O_SCHED; if (copy_to_user(ucfg, &cfg, sizeof(cfg))) return -EFAULT; @@ -744,89 +830,38 @@ static int ptrace_bts_status(struct task_struct *child, return sizeof(cfg); } - static int ptrace_bts_write_record(struct task_struct *child, const struct bts_struct *in) { - int retval; + unsigned char bts_record[BTS_MAX_RECORD_SIZE]; - if (!child->thread.ds_area_msr) - return -ENXIO; + BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); - retval = ds_write_bts((void *)child->thread.ds_area_msr, in); - if (retval) - return retval; + memset(bts_record, 0, bts_cfg.sizeof_bts); + switch (in->qualifier) { + case BTS_INVALID: + break; - return sizeof(*in); -} + case BTS_BRANCH: + bts_set(bts_record, bts_from, in->variant.lbr.from_ip); + bts_set(bts_record, bts_to, in->variant.lbr.to_ip); + break; -static int ptrace_bts_realloc(struct task_struct *child, - int size, int reduce_size) -{ - unsigned long rlim, vm; - int ret, old_size; + case BTS_TASK_ARRIVES: + case BTS_TASK_DEPARTS: + bts_set(bts_record, bts_from, bts_escape); + bts_set(bts_record, bts_qual, in->qualifier); + bts_set(bts_record, bts_jiffies, in->variant.jiffies); + break; - if (size < 0) + default: return -EINVAL; - - old_size = ds_get_bts_size((void *)child->thread.ds_area_msr); - if (old_size < 0) - return old_size; - - ret = ds_free((void **)&child->thread.ds_area_msr); - if (ret < 0) - goto out; - - size >>= PAGE_SHIFT; - old_size >>= PAGE_SHIFT; - - current->mm->total_vm -= old_size; - current->mm->locked_vm -= old_size; - - if (size == 0) - goto out; - - rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; - vm = current->mm->total_vm + size; - if (rlim < vm) { - ret = -ENOMEM; - - if (!reduce_size) - goto out; - - size = rlim - current->mm->total_vm; - if (size <= 0) - goto out; } - rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - vm = current->mm->locked_vm + size; - if (rlim < vm) { - ret = -ENOMEM; - - if (!reduce_size) - goto out; - - size = rlim - current->mm->locked_vm; - if (size <= 0) - goto out; - } - - ret = ds_allocate((void **)&child->thread.ds_area_msr, - size << PAGE_SHIFT); - if (ret < 0) - goto out; - - current->mm->total_vm += size; - current->mm->locked_vm += size; - -out: - if (child->thread.ds_area_msr) - set_tsk_thread_flag(child, TIF_DS_AREA_MSR); - else - clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); - - return ret; + /* The writing task will be the switched-to task on a context + * switch. It needs to write into the switched-from task's BTS + * buffer. */ + return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); } void ptrace_bts_take_timestamp(struct task_struct *tsk, @@ -839,7 +874,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk, ptrace_bts_write_record(tsk, &rec); } -#endif /* X86_BTS */ + +static const struct bts_configuration bts_cfg_netburst = { + .sizeof_bts = sizeof(long) * 3, + .sizeof_field = sizeof(long), + .debugctl_mask = (1<<2)|(1<<3)|(1<<5) +}; + +static const struct bts_configuration bts_cfg_pentium_m = { + .sizeof_bts = sizeof(long) * 3, + .sizeof_field = sizeof(long), + .debugctl_mask = (1<<6)|(1<<7) +}; + +static const struct bts_configuration bts_cfg_core2 = { + .sizeof_bts = 8 * 3, + .sizeof_field = 8, + .debugctl_mask = (1<<6)|(1<<7)|(1<<9) +}; + +static inline void bts_configure(const struct bts_configuration *cfg) +{ + bts_cfg = *cfg; +} + +void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 0x6: + switch (c->x86_model) { + case 0xD: + case 0xE: /* Pentium M */ + bts_configure(&bts_cfg_pentium_m); + break; + case 0xF: /* Core2 */ + case 0x1C: /* Atom */ + bts_configure(&bts_cfg_core2); + break; + default: + /* sorry, don't know about them */ + break; + } + break; + case 0xF: + switch (c->x86_model) { + case 0x0: + case 0x1: + case 0x2: /* Netburst */ + bts_configure(&bts_cfg_netburst); + break; + default: + /* sorry, don't know about them */ + break; + } + break; + default: + /* sorry, don't know about them */ + break; + } +} +#endif /* CONFIG_X86_PTRACE_BTS */ /* * Called by kernel/ptrace.c when detaching.. @@ -852,15 +946,15 @@ void ptrace_disable(struct task_struct *child) #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif - if (child->thread.ds_area_msr) { -#ifdef X86_BTS - ptrace_bts_realloc(child, 0, 0); -#endif - child->thread.debugctlmsr &= ~ds_debugctl_mask(); - if (!child->thread.debugctlmsr) - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - } +#ifdef CONFIG_X86_PTRACE_BTS + (void)ds_release_bts(child); + + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; + if (!child->thread.debugctlmsr) + clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + + clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); +#endif /* CONFIG_X86_PTRACE_BTS */ } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION @@ -980,7 +1074,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) /* * These bits need more cooking - not enabled yet: */ -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS case PTRACE_BTS_CONFIG: ret = ptrace_bts_config (child, data, (struct ptrace_bts_config __user *)addr); @@ -992,7 +1086,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_SIZE: - ret = ptrace_bts_get_size(child); + ret = ds_get_bts_index(child, /* pos = */ 0); break; case PTRACE_BTS_GET: @@ -1001,14 +1095,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_CLEAR: - ret = ptrace_bts_clear(child); + ret = ds_clear_bts(child); break; case PTRACE_BTS_DRAIN: ret = ptrace_bts_drain (child, data, (struct bts_struct __user *) addr); break; -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ default: ret = ptrace_request(child, request, addr, data); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index f2fc8feb727..f7aebe13c99 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -920,11 +920,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); } if (cpu_has_bts) - ds_init_intel(c); + ptrace_bts_init_intel(c); n = c->extended_cpuid_level; if (n >= 0x80000008) { -- cgit v1.2.3 From 970e725098a6da5a9c1f8128102c812e31a0444c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 16 Apr 2008 16:40:17 -0700 Subject: x86, ptrace: PEBS support, warning fix arch/x86/kernel/process_32.c:566: warning: unused variable 'ds_next' arch/x86/kernel/process_32.c:566: warning: unused variable 'ds_prev' Cc: Markus Metzger Cc: Andi Kleen Cc: "H. Peter Anvin" Cc: "Siddha, Suresh B" Cc: Roland McGrath Cc: Michael Kerrisk Cc: Cc: stephane eranian Cc: Jason Wessel Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5cec8a75a4e..496ea3110aa 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -484,20 +484,13 @@ int set_tsc_mode(unsigned int val) return 0; } -static noinline void -__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) +#ifdef CONFIG_X86_DS +static int update_debugctl(struct thread_struct *prev, + struct thread_struct *next, unsigned long debugctl) { - struct thread_struct *prev, *next; - unsigned long debugctl; - unsigned long ds_prev = 0, ds_next = 0; + unsigned long ds_prev = 0; + unsigned long ds_next = 0; - prev = &prev_p->thread; - next = &next_p->thread; - - debugctl = prev->debugctlmsr; - -#ifdef CONFIG_X86_DS if (prev->ds_ctx) ds_prev = (unsigned long)prev->ds_ctx->ds; if (next->ds_ctx) @@ -510,8 +503,28 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, update_debugctlmsr(0); wrmsr(MSR_IA32_DS_AREA, ds_next, 0); } + return debugctl; +} +#else +static int update_debugctl(struct thread_struct *prev, + struct thread_struct *next, unsigned long debugctl) +{ + return debugctl; +} #endif /* CONFIG_X86_DS */ +static noinline void +__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss) +{ + struct thread_struct *prev, *next; + unsigned long debugctl; + + prev = &prev_p->thread; + next = &next_p->thread; + + debugctl = update_debugctl(prev, next, prev->debugctlmsr); + if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); -- cgit v1.2.3 From 573da4224e8c3800e613d715e909c3179a7e3cb2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 28 Apr 2008 23:15:04 +0400 Subject: x86: DS cleanup - dont treat 0 as NULL Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ds.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 5b32b6d062b..24a323c9599 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -238,12 +238,12 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) - return 0; + return NULL; context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); if (!context->ds) { kfree(context); - return 0; + return NULL; } *p_context = context; @@ -279,7 +279,7 @@ static inline void ds_put_context(struct ds_context *context) if (--context->count) goto out; - *(context->this) = 0; + *(context->this) = NULL; if (context->task) clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); @@ -341,16 +341,16 @@ static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; vm = current->mm->total_vm + pgsz; if (rlim < vm) - return 0; + return NULL; rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; vm = current->mm->locked_vm + pgsz; if (rlim < vm) - return 0; + return NULL; buffer = kzalloc(size, GFP_KERNEL); if (!buffer) - return 0; + return NULL; current->mm->total_vm += pgsz; current->mm->locked_vm += pgsz; @@ -395,7 +395,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size, if (context->owner[qual] == current) goto out_unlock; error = -EPERM; - if (context->owner[qual] != 0) + if (context->owner[qual] != NULL) goto out_unlock; context->owner[qual] = current; @@ -445,7 +445,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size, return error; out_release: - context->owner[qual] = 0; + context->owner[qual] = NULL; ds_put_context(context); return error; @@ -825,7 +825,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) ds_configure(&ds_cfg_var); break; case 0xF: /* Core2 */ - case 0x1C: /* Atom */ + case 0x1C: /* Atom */ ds_configure(&ds_cfg_64); break; default: -- cgit v1.2.3 From 34b2cd5b688b012975fcfc3b3970fc3508fa82c4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 17 May 2008 08:30:07 +0200 Subject: x86: PEBS cleanup Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad213494a22..4a93c98a60a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -500,7 +500,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, { struct thread_struct *prev, *next; unsigned long debugctl; - unsigned long ds_prev = 0, ds_next = 0; prev = &prev_p->thread, next = &next_p->thread; @@ -508,17 +507,23 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, debugctl = prev->debugctlmsr; #ifdef CONFIG_X86_DS - if (prev->ds_ctx) - ds_prev = (unsigned long)prev->ds_ctx->ds; - if (next->ds_ctx) - ds_next = (unsigned long)next->ds_ctx->ds; - - if (ds_next != ds_prev) { - /* we clear debugctl to make sure DS - * is not in use when we change it */ - debugctl = 0; - update_debugctlmsr(0); - wrmsrl(MSR_IA32_DS_AREA, ds_next); + { + unsigned long ds_prev = 0, ds_next = 0; + + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { + /* + * We clear debugctl to make sure DS + * is not in use when we change it: + */ + debugctl = 0; + update_debugctlmsr(0); + wrmsrl(MSR_IA32_DS_AREA, ds_next); + } } #endif /* CONFIG_X86_DS */ -- cgit v1.2.3 From 63cc8c75156462d4b42cbdd76c293b7eee7ddbfe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 May 2008 15:44:40 +0200 Subject: percpu: introduce DEFINE_PER_CPU_PAGE_ALIGNED() macro While examining holes in percpu section I found this : c05f5000 D per_cpu__current_task c05f5000 D __per_cpu_start c05f5004 D per_cpu__cpu_number c05f5008 D per_cpu__irq_regs c05f500c d per_cpu__cpu_devices c05f5040 D per_cpu__cyc2ns c05f6000 d per_cpu__cpuid4_info c05f6004 d per_cpu__cache_kobject c05f6008 d per_cpu__index_kobject c05f7000 D per_cpu__gdt_page This is because gdt_page is a percpu variable, defined with a page alignement, and linker is doing its job, two times because of .o nesting in the build process. I introduced a new macro DEFINE_PER_CPU_PAGE_ALIGNED() to avoid wasting this space. All page aligned variables (only one at this time) are put in a separate subsection .data.percpu.page_aligned, at the very begining of percpu zone. Before patch , on a x86_32 machine : .data.percpu 30232 3227471872 .data.percpu 22168 3227471872 Thats 8064 bytes saved for each CPU. Signed-off-by: Eric Dumazet Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/vmlinux_32.lds.S | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d0463a94624..b2f54fafb8b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -21,7 +21,7 @@ #include "cpu.h" -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index ce5ed083a1e..0f7c29a8c4e 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -189,6 +189,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; -- cgit v1.2.3 From 3243b4ed817fad04e56e7b7dc78ec28dc8322ff2 Mon Sep 17 00:00:00 2001 From: Krzysztof Oledzki Date: Wed, 4 Jun 2008 03:40:17 +0200 Subject: x86: add another PCI ID for ICH6 force-hpet Tested on Asus P5GDC-V $ lspci -n -n |grep ISA 00:1f.0 ISA bridge [0601]: Intel Corporation 82801FB/FR (ICH6/ICH6R) LPC Interface Bridge [8086:2640] (rev 03) Force enabled HPET at base address 0xfed00000 hpet clockevent registered hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0 hpet0: 3 64-bit timers, 14318180 Hz Signed-off-by: Krzysztof Piotr Oledzki Signed-off-by: Ingo Molnar --- arch/x86/kernel/quirks.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index d89a648fe71..06e1fd6be83 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -158,6 +158,8 @@ static void ich_force_enable_hpet(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0, + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, -- cgit v1.2.3 From 59ea746337c69f6a5f1bc4d5e8544b3cbf12f801 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 12 Jun 2008 13:56:40 +0200 Subject: MM: virtual address debug Add some (configurable) expensive sanity checking to catch wrong address translations on x86. - create linux/mmdebug.h file to be able include this file in asm headers to not get unsolvable loops in header files - __phys_addr on x86_32 became a function in ioremap.c since PAGE_OFFSET, is_vmalloc_addr and VMALLOC_* non-constasts are undefined if declared in page_32.h - add __phys_addr_const for initializing doublefault_tss.__cr3 Tested on 386, 386pae, x86_64 and x86_64 numa=fake=2. Contains Andi's enable numa virtual address debug patch. Signed-off-by: Jiri Slaby Cc: Andi Kleen Signed-off-by: Ingo Molnar --- arch/x86/kernel/doublefault_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index a47798b59f0..395acb12b0d 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -66,6 +66,6 @@ struct tss_struct doublefault_tss __cacheline_aligned = { .ds = __USER_DS, .fs = __KERNEL_PERCPU, - .__cr3 = __pa(swapper_pg_dir) + .__cr3 = __phys_addr_const((unsigned long)swapper_pg_dir) } }; -- cgit v1.2.3 From f3561810b163aca5388ad550abbbc82ae5323189 Mon Sep 17 00:00:00 2001 From: Joe Buehler Date: Mon, 9 Jun 2008 08:55:20 -0400 Subject: x86: add PCI ID for 6300ESB force hpet 00:1f.0 ISA bridge: Intel Corporation 6300ESB LPC Interface Controller (rev 02) 00:1f.0 Class 0601: 8086:25a1 (rev 02) kernel: pci 0000:00:1f.0: Force enabled HPET at 0xfed00000 kernel: hpet clockevent registered kernel: hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0 kernel: hpet0: 3 64-bit timers, 14318180 Hz Signed-off-by: Ingo Molnar --- arch/x86/kernel/quirks.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 06e1fd6be83..f327abafe3e 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -257,6 +257,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev) old_ich_force_enable_hpet(dev); } +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1, + old_ich_force_enable_hpet_user); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, old_ich_force_enable_hpet_user); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, -- cgit v1.2.3 From d94d93ca5cc36cd78c532def62772c98fe8ba5d7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:46 -0700 Subject: x64, x2apic/intr-remap: 8259 specific mask/unmask routines 8259 specific mask/unmask routines which be used later while enabling interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8259.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index dc92b49d920..4b8a53d841f 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void) device_initcall(i8259A_init_sysfs); +void mask_8259A(void) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +void unmask_8259A(void) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ + outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + void init_8259A(int auto_eoi) { unsigned long flags; -- cgit v1.2.3 From 4dc2f96cacd1e74c688f94348a3bfd0a980817d5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:47 -0700 Subject: x64, x2apic/intr-remap: ioapic routines which deal with initial io-apic RTE setup Generic ioapic specific routines which be used later during enabling interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 66 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9e645cba11c..84dd63c13d6 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -114,6 +114,9 @@ DEFINE_SPINLOCK(vector_lock); */ int nr_ioapic_registers[MAX_IO_APICS]; +/* I/O APIC RTE contents at the OS boot up */ +struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; + /* I/O APIC entries */ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; @@ -446,6 +449,69 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +/* + * Saves and masks all the unmasked IO-APIC RTE's + */ +int save_mask_IO_APIC_setup(void) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + int apic, pin; + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (apic = 0; apic < nr_ioapics; apic++) { + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(apic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + + for (apic = 0; apic < nr_ioapics; apic++) { + early_ioapic_entries[apic] = + kzalloc(sizeof(struct IO_APIC_route_entry) * + nr_ioapic_registers[apic], GFP_KERNEL); + if (!early_ioapic_entries[apic]) + return -ENOMEM; + } + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + + entry = early_ioapic_entries[apic][pin] = + ioapic_read_entry(apic, pin); + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + } + } + return 0; +} + +void restore_IO_APIC_setup(void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + ioapic_write_entry(apic, pin, + early_ioapic_entries[apic][pin]); +} + +void reinit_intr_remapped_IO_APIC(int intr_remapping) +{ + /* + * for now plain restore of previous settings. + * TBD: In the case of OS enabling interrupt-remapping, + * IO-APIC RTE's need to be setup to point to interrupt-remapping + * table entries. for now, do a plain restore, and wait for + * the setup_IO_APIC_irqs() to do proper initialization. + */ + restore_IO_APIC_setup(); +} + int skip_ioapic_setup; int ioapic_force; -- cgit v1.2.3 From 0c81c746f9bdbfaafe64322d540c8b7b59c27314 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:48 -0700 Subject: x64, x2apic/intr-remap: introduce read_apic_id() to genapic routines Move the read_apic_id() to genapic routines. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 5 +++++ arch/x86/kernel/genapic_64.c | 11 ----------- arch/x86/kernel/genapic_flat_64.c | 14 +++++++++++++- arch/x86/kernel/genx2apic_uv_x.c | 14 +++++++++++++- 4 files changed, 31 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1e3d32e27c1..9dd4ee4735f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1096,6 +1096,11 @@ void __cpuinit generic_processor_info(int apicid, int version) cpu_set(cpu, cpu_present_map); } +int hard_smp_processor_id(void) +{ + return read_apic_id(); +} + /* * Power management */ diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1fa8be5bd21..7414871751a 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -79,17 +79,6 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -unsigned int read_apic_id(void) -{ - unsigned int id; - - WARN_ON(preemptible() && num_online_cpus() > 1); - id = apic_read(APIC_ID); - if (uv_system_type >= UV_X2APIC) - id |= __get_cpu_var(x2apic_extra_bits); - return id; -} - enum uv_system_type get_uv_system_type(void) { return uv_system_type; diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 1a9c68845ee..400ed8df8b4 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -15,9 +15,11 @@ #include #include #include +#include #include #include #include +#include static cpumask_t flat_target_cpus(void) { @@ -95,9 +97,17 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } +static unsigned int read_xapic_id(void) +{ + unsigned int id; + + id = GET_XAPIC_ID(apic_read(APIC_ID)); + return id; +} + static int flat_apic_id_registered(void) { - return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); + return physid_isset(read_xapic_id(), phys_cpu_present_map); } static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) @@ -123,6 +133,7 @@ struct genapic apic_flat = { .send_IPI_mask = flat_send_IPI_mask, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, + .read_apic_id = read_xapic_id, }; /* @@ -187,4 +198,5 @@ struct genapic apic_physflat = { .send_IPI_mask = physflat_send_IPI_mask, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, + .read_apic_id = read_xapic_id, }; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 711f11c30b0..1ef99be1848 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -134,9 +135,19 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } +static unsigned int uv_read_apic_id(void) +{ + unsigned int id; + + WARN_ON(preemptible() && num_online_cpus() > 1); + id = apic_read(APIC_ID) | __get_cpu_var(x2apic_extra_bits); + + return id; +} + static unsigned int phys_pkg_id(int index_msb) { - return GET_APIC_ID(read_apic_id()) >> index_msb; + return uv_read_apic_id() >> index_msb; } #ifdef ZZZ /* Needs x2apic patch */ @@ -159,6 +170,7 @@ struct genapic apic_x2apic_uv_x = { /* ZZZ.send_IPI_self = uv_send_IPI_self, */ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ + .read_apic_id = uv_read_apic_id, }; static __cpuinit void set_x2apic_extra_bits(int pnode) -- cgit v1.2.3 From 2d7a66d02e11af9ab8e16c76d22767e622b4e3d7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 11 Jul 2008 14:24:19 -0700 Subject: x64, x2apic/intr-remap: Interrupt-remapping and x2apic support, fix Yinghai Lu wrote: > Setting APIC routing to physical flat > Kernel panic - not syncing: Boot APIC ID in local APIC unexpected (0 vs 4) > Pid: 1, comm: swapper Not tainted 2.6.26-rc9-tip-01763-g74f94b1-dirty #320 > > Call Trace: > [] ? set_cpu_sibling_map+0x38c/0x3bd > [] ? read_xapic_id+0x25/0x3e > [] ? verify_local_APIC+0x139/0x1b9 > [] ? read_xapic_id+0x25/0x3e > [] ? native_smp_prepare_cpus+0x224/0x2e9 > [] ? kernel_init+0x64/0x341 > [] ? child_rip+0xa/0x11 > [] ? kernel_init+0x0/0x341 > [] ? child_rip+0x0/0x11 > > > guess read_apic_id changing cuase some problem... genapic's read_apic_id() returns the actual apic id extracted from the APIC_ID register. And in some cases like UV, read_apic_id() returns completely different values from APIC ID register. Use the native apic register read, rather than genapic read_apic_id() in verify_local_APIC() And also, lapic_suspend() should also use native apic register read. Reported-by: Yinghai Lu Signed-off-by: Suresh Siddha Cc: "akpm@linux-foundation.org" Cc: "arjan@linux.intel.com" Cc: "andi@firstfloor.org" Cc: "ebiederm@xmission.com" Cc: "jbarnes@virtuousgeek.org" Cc: "steiner@sgi.com" Cc: "jeremy@goop.org" Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 9dd4ee4735f..3963f590c3d 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -626,10 +626,10 @@ int __init verify_local_APIC(void) /* * The ID register is read/write in a real APIC. */ - reg0 = read_apic_id(); + reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = read_apic_id(); + reg1 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_write(APIC_ID, reg0); if (reg1 != (reg0 ^ APIC_ID_MASK)) @@ -1137,7 +1137,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) maxlvt = lapic_get_maxlvt(); - apic_pm_state.apic_id = read_apic_id(); + apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); -- cgit v1.2.3 From 1b374e4d6f8b3eb2fcd034fcc24ea8ba1dfde7aa Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:49 -0700 Subject: x64, x2apic/intr-remap: basic apic ops support Introduce basic apic operations which handle the apic programming. This will be used later to introduce another specific operations for x2apic. For the perfomance critial accesses like IPI's, EOI etc, we use the native operations as they are already referenced by different indirections like genapic, irq_chip etc. 64bit Paravirt ops can also define their apic operations accordingly. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 ++++++ arch/x86/kernel/apic_64.c | 34 ++++++++++++++++++++++++++++++++-- arch/x86/kernel/io_apic_64.c | 8 ++++---- arch/x86/kernel/paravirt.c | 8 +++++--- arch/x86/kernel/smpboot.c | 28 +++++++++++----------------- 5 files changed, 58 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3e58b676d23..2a83c07bd88 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,6 +145,12 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +void apic_icr_write(u32 low, u32 id) +{ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write_around(APIC_ICR, low); +} + void apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 3963f590c3d..9bb040689b3 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -119,13 +119,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -void apic_wait_icr_idle(void) +void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_apic_wait_icr_idle(void) +u32 safe_xapic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -141,6 +141,36 @@ u32 safe_apic_wait_icr_idle(void) return send_status; } +void xapic_icr_write(u32 low, u32 id) +{ + apic_write(APIC_ICR2, id << 24); + apic_write(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return (icr1 | ((u64)icr2 << 32)); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .write_atomic = native_apic_mem_write_atomic, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; + +EXPORT_SYMBOL_GPL(apic_ops); + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 84dd63c13d6..b62d42ef928 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1157,6 +1157,7 @@ static __apicdebuginit void print_APIC_bitfield (int base) void __apicdebuginit print_local_APIC(void * dummy) { unsigned int v, ver, maxlvt; + unsigned long icr; if (apic_verbosity == APIC_QUIET) return; @@ -1200,10 +1201,9 @@ void __apicdebuginit print_local_APIC(void * dummy) v = apic_read(APIC_ESR); printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + icr = apic_icr_read(); + printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e0f571d58c1..b80105a0f47 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -360,9 +360,11 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC - .apic_write = native_apic_write, - .apic_write_atomic = native_apic_write_atomic, - .apic_read = native_apic_read, +#ifnded CONFIG_X86_64 + .apic_write = native_apic_mem_write, + .apic_write_atomic = native_apic_mem_write_atomic, + .apic_read = native_apic_mem_read, +#endif .setup_boot_clock = setup_boot_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock, .startup_ipi_hook = paravirt_nop, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f35c2d8016a..c55263b3df0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); static atomic_t init_deasserted; -static int boot_cpu_logical_apicid; /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu) #endif #ifdef CONFIG_X86_32 +static int boot_cpu_logical_apicid; + u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; @@ -546,8 +547,7 @@ static inline void __inquire_remote_apic(int apicid) printk(KERN_CONT "a previous APIC delivery may have failed\n"); - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); + apic_icr_write(APIC_DM_REMRD | regs[i], apicid); timeout = 0; do { @@ -579,11 +579,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) int maxlvt; /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); - /* Boot on the stack */ /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); + apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -639,13 +637,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) /* * Turn INIT on target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* * Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT - | APIC_DM_INIT); + apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, + phys_apicid); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -655,10 +651,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) Dprintk("Deasserting INIT.\n"); /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -703,12 +697,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) */ /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* Boot on the stack */ /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_STARTUP - | (start_eip >> 12)); + apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), + phys_apicid); /* * Give the other CPU some time to accept the IPI. @@ -1147,7 +1139,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) * Setup boot CPU information */ smp_store_cpu_info(0); /* Final full version of the data */ +#ifdef CONFIG_X86_32 boot_cpu_logical_apicid = logical_smp_processor_id(); +#endif current_thread_info()->cpu = 0; /* needed? */ set_cpu_sibling_map(0); -- cgit v1.2.3 From 32e1d0a0651004f5fe47f85a2a5c725ad579a90c Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:50 -0700 Subject: x64, x2apic/intr-remap: cpuid bits for x2apic feature cpuid feature for x2apic. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/feature_names.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index e43ad4ad4cb..0bf4d37a048 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -45,7 +45,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = { /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", + NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ -- cgit v1.2.3 From 13c88fb58d0112d47f7839f24a755715c6218822 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:52 -0700 Subject: x64, x2apic/intr-remap: x2apic ops for x2apic mode support x2apic ops for x2apic mode support. This uses MSR interface and differs slightly from the xapic register layout. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 9bb040689b3..a969ef78e12 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -171,6 +171,41 @@ struct apic_ops __read_mostly *apic_ops = &xapic_ops; EXPORT_SYMBOL_GPL(apic_ops); +static void x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static u32 safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +void x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +u64 x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +static struct apic_ops x2apic_ops = { + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .write_atomic = native_apic_msr_write, + .icr_read = x2apic_icr_read, + .icr_write = x2apic_icr_write, + .wait_icr_idle = x2apic_wait_icr_idle, + .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, +}; + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ -- cgit v1.2.3 From cff73a6ffaed726780b001937d2a42efde553922 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:53 -0700 Subject: x64, x2apic/intr-remap: introcude self IPI to genapic routines Introduce self IPI op for genapic. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/genapic_flat_64.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 7414871751a..6657de609dc 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -61,7 +61,7 @@ void __init setup_apic_routing(void) /* Same for both flat and physical. */ -void send_IPI_self(int vector) +void apic_send_IPI_self(int vector) { __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 400ed8df8b4..73558682213 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -131,6 +131,7 @@ struct genapic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_mask = flat_send_IPI_mask, + .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, .read_apic_id = read_xapic_id, @@ -196,6 +197,7 @@ struct genapic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_mask = physflat_send_IPI_mask, + .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, .read_apic_id = read_xapic_id, -- cgit v1.2.3 From 12a67cf6851871ca8df42025c94f140c303d0f7f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:54 -0700 Subject: x64, x2apic/intr-remap: x2apic cluster mode support x2apic cluster mode support. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/genapic_64.c | 2 + arch/x86/kernel/genx2apic_cluster.c | 135 ++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 arch/x86/kernel/genx2apic_cluster.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 55ff016e9f6..bde3e9b6fec 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -95,6 +95,7 @@ obj-$(CONFIG_OLPC) += olpc.o # 64 bit specific files ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o + obj-y += genx2apic_cluster.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 6657de609dc..1029e178cdf 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -38,6 +38,8 @@ void __init setup_apic_routing(void) { if (uv_system_type == UV_NON_UNIQUE_APIC) genapic = &apic_x2apic_uv_x; + else if (cpu_has_x2apic && intr_remapping_enabled) + genapic = &apic_x2apic_cluster; else #ifdef CONFIG_ACPI /* diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c new file mode 100644 index 00000000000..ed0fdede800 --- /dev/null +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); + +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ + +static cpumask_t x2apic_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +/* + * for now each logical cpu is in its own vector allocation domain. + */ +static cpumask_t x2apic_vector_allocation_domain(int cpu) +{ + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; +} + +static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, + unsigned int dest) +{ + unsigned long cfg; + + cfg = __prepare_ICR(0, vector, dest); + + /* + * send the IPI. + */ + x2apic_icr_write(cfg, apicid); +} + +/* + * for now, we send the IPI's one by one in the cpumask. + * TBD: Based on the cpu mask, we can send the IPI's to the cluster group + * at once. We have 16 cpu's in a cluster. This will minimize IPI register + * writes. + */ +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +{ + unsigned long flags; + unsigned long query_cpu; + + local_irq_save(flags); + for_each_cpu_mask(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + } + local_irq_restore(flags); +} + +static void x2apic_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); +} + +static void x2apic_send_IPI_all(int vector) +{ + x2apic_send_IPI_mask(cpu_online_map, vector); +} + +static int x2apic_apic_id_registered(void) +{ + return 1; +} + +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return per_cpu(x86_cpu_to_logical_apicid, cpu); + else + return BAD_APICID; +} + +static unsigned int x2apic_read_id(void) +{ + return apic_read(APIC_ID); +} + +static unsigned int phys_pkg_id(int index_msb) +{ + return x2apic_read_id() >> index_msb; +} + +static void x2apic_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} + +static void init_x2apic_ldr(void) +{ + int cpu = smp_processor_id(); + + per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); + return; +} + +struct genapic apic_x2apic_cluster = { + .name = "cluster x2apic", + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_LOGICAL != 0), + .target_cpus = x2apic_target_cpus, + .vector_allocation_domain = x2apic_vector_allocation_domain, + .apic_id_registered = x2apic_apic_id_registered, + .init_apic_ldr = init_x2apic_ldr, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_self = x2apic_send_IPI_self, + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, + .read_apic_id = x2apic_read_id, +}; -- cgit v1.2.3 From 5c520a6724e912a7e6153b7597192edad6752750 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:55 -0700 Subject: x64, x2apic/intr-remap: setup init_apic_ldr for UV Signed-off-by: Suresh Siddha Signed-off-by: Jack Steiner Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 1ef99be1848..dcd4fb219da 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -120,6 +120,10 @@ static int uv_apic_id_registered(void) return 1; } +static inline void uv_init_apic_ldr(void) +{ +} + static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -164,6 +168,7 @@ struct genapic apic_x2apic_uv_x = { .target_cpus = uv_target_cpus, .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ .apic_id_registered = uv_apic_id_registered, + .init_apic_ldr = uv_init_apic_ldr, .send_IPI_all = uv_send_IPI_all, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_mask = uv_send_IPI_mask, -- cgit v1.2.3 From 89027d35aa5b8f45ce0f7fa0911db85b46563da0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:56 -0700 Subject: x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping IO-APIC support in the presence of interrupt-remapping infrastructure. IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, trigger mode etc, which traditionally was present in the IO-APIC RTE. Introduce a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this cleanly). For edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flush the hardware cache. For level triggered, we need to modify the io-apic RTE aswell with the update vector information, along with modifying IRTE with vector and cpu destination. So irq migration for level triggered is little bit more complex compared to edge triggered migration. But the good news is, we use the same algorithm for level triggered migration as we have today, only difference being, we now initiate the irq migration from process context instead of the interrupt context. In future, when we do a directed EOI (combined with cpu EOI broadcast suppression) to the IO-APIC, level triggered irq migration will also be as simple as edge triggered migration and we can do the irq migration with a simple atomic update to IO-APIC RTE. TBD: some tests/changes needed in the presence of fixup_irqs() for level triggered irq migration. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 1 + arch/x86/kernel/io_apic_64.c | 300 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 279 insertions(+), 22 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index a969ef78e12..d5c06917b5b 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -46,6 +46,7 @@ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; +int x2apic; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b62d42ef928..9bd02ef049a 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -37,6 +37,7 @@ #include #endif #include +#include #include #include @@ -48,6 +49,7 @@ #include #include #include +#include #include #include @@ -312,7 +314,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) pin = entry->pin; if (pin == -1) break; - io_apic_write(apic, 0x11 + pin*2, dest); + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -906,18 +913,98 @@ void setup_vector_irq(int cpu) static struct irq_chip ioapic_chip; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip; +#endif static void ioapic_register_intr(int irq, unsigned long trigger) { - if (trigger) { + if (trigger) irq_desc[irq].status |= IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, "fasteoi"); - } else { + else irq_desc[irq].status &= ~IRQ_LEVEL; + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + irq_desc[irq].status |= IRQ_MOVE_PCNTXT; + if (trigger) + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_edge_irq, "edge"); + return; + } +#endif + if (trigger) + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); +} + +static int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) +{ + /* + * add it to the IO-APIC irq-routing table: + */ + memset(entry,0,sizeof(*entry)); + +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) { + struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct irte irte; + struct IR_IO_APIC_route_entry *ir_entry = + (struct IR_IO_APIC_route_entry *) entry; + int index; + + if (!iommu) + panic("No mapping iommu for ioapic %d\n", apic); + + index = alloc_irte(iommu, irq, 1); + if (index < 0) + panic("Failed to allocate IRTE for ioapic %d\n", apic); + + memset(&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = trigger; + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = vector; + irte.dest_id = IRTE_DEST(destination); + + modify_irte(irq, &irte); + + ir_entry->index2 = (index >> 15) & 0x1; + ir_entry->zero = 0; + ir_entry->format = 1; + ir_entry->index = (index & 0x7fff); + } else +#endif + { + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->dest = destination; } + + entry->mask = 0; /* enable IRQ */ + entry->trigger = trigger; + entry->polarity = polarity; + entry->vector = vector; + + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (trigger) + entry->mask = 1; + return 0; } static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, @@ -942,24 +1029,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, irq, trigger, polarity); - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.dest = cpu_mask_to_apicid(mask); - entry.mask = 0; /* enable IRQ */ - entry.trigger = trigger; - entry.polarity = polarity; - entry.vector = cfg->vector; - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (trigger) - entry.mask = 1; + if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { + printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mp_ioapics[apic].mp_apicid, pin); + __clear_irq_vector(irq); + return; + } ioapic_register_intr(irq, trigger); if (irq < 16) @@ -1011,6 +1089,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, { struct IO_APIC_route_entry entry; + if (intr_remapping_enabled) + return; + memset(&entry, 0, sizeof(entry)); /* @@ -1466,6 +1547,147 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP + +#ifdef CONFIG_INTR_REMAP +static void ir_irq_migration(struct work_struct *work); + +static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); + +/* + * Migrate the IO-APIC irq in the presence of intr-remapping. + * + * For edge triggered, irq migration is a simple atomic update(of vector + * and cpu destination) of IRTE and flush the hardware cache. + * + * For level triggered, we need to modify the io-apic RTE aswell with the update + * vector information, along with modifying IRTE with vector and destination. + * So irq migration for level triggered is little bit more complex compared to + * edge triggered migration. But the good news is, we use the same algorithm + * for level triggered migration as we have today, only difference being, + * we now initiate the irq migration from process context instead of the + * interrupt context. + * + * In future, when we do a directed EOI (combined with cpu EOI broadcast + * suppression) to the IO-APIC, level triggered irq migration will also be + * as simple as edge triggered migration and we can do the irq migration + * with a simple atomic update to IO-APIC RTE. + */ +static void migrate_ioapic_irq(int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct irq_desc *desc = irq_desc + irq; + cpumask_t tmp, cleanup_mask; + struct irte irte; + int modify_ioapic_rte = desc->status & IRQ_LEVEL; + unsigned int dest; + unsigned long flags; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + if (modify_ioapic_rte) { + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * Modified the IRTE and flushes the Interrupt entry cache. + */ + modify_irte(irq, &irte); + + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} + +static int migrate_irq_remapped_level(int irq) +{ + int ret = -1; + + mask_IO_APIC_irq(irq); + + if (io_apic_level_ack_pending(irq)) { + /* + * Interrupt in progress. Migrating irq now will change the + * vector information in the IO-APIC RTE and that will confuse + * the EOI broadcast performed by cpu. + * So, delay the irq migration to the next instance. + */ + schedule_delayed_work(&ir_migration_work, 1); + goto unmask; + } + + /* everthing is clear. we have right of way */ + migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); + + ret = 0; + irq_desc[irq].status &= ~IRQ_MOVE_PENDING; + cpus_clear(irq_desc[irq].pending_mask); + +unmask: + unmask_IO_APIC_irq(irq); + return ret; +} + +static void ir_irq_migration(struct work_struct *work) +{ + int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + struct irq_desc *desc = irq_desc + irq; + if (desc->status & IRQ_MOVE_PENDING) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->chip->set_affinity || + !(desc->status & IRQ_MOVE_PENDING)) { + desc->status &= ~IRQ_MOVE_PENDING; + spin_unlock_irqrestore(&desc->lock, flags); + continue; + } + + desc->chip->set_affinity(irq, + irq_desc[irq].pending_mask); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* + * Migrates the IRQ destination in the process context. + */ +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + if (irq_desc[irq].status & IRQ_LEVEL) { + irq_desc[irq].status |= IRQ_MOVE_PENDING; + irq_desc[irq].pending_mask = mask; + migrate_irq_remapped_level(irq); + return; + } + + migrate_ioapic_irq(irq, mask); +} +#endif + asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; @@ -1522,6 +1744,17 @@ static void irq_complete_move(unsigned int irq) #else static inline void irq_complete_move(unsigned int irq) {} #endif +#ifdef CONFIG_INTR_REMAP +static void ack_x2apic_level(unsigned int irq) +{ + ack_x2APIC_irq(); +} + +static void ack_x2apic_edge(unsigned int irq) +{ + ack_x2APIC_irq(); +} +#endif static void ack_apic_edge(unsigned int irq) { @@ -1596,6 +1829,21 @@ static struct irq_chip ioapic_chip __read_mostly = { .retrigger = ioapic_retrigger_irq, }; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip __read_mostly = { + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, +#ifdef CONFIG_SMP + .set_affinity = set_ir_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; +#endif + static inline void init_IO_APIC_traps(void) { int irq; @@ -1783,6 +2031,8 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { + if (intr_remapping_enabled) + panic("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -1809,6 +2059,8 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } + if (intr_remapping_enabled) + panic("timer doesn't work through Interrupt-remapped IO-APIC"); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " @@ -2401,6 +2653,10 @@ void __init setup_ioapic_dest(void) setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); +#ifdef CONFIG_INTR_REMAP + else if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif else set_ioapic_affinity_irq(irq, TARGET_CPUS); } -- cgit v1.2.3 From 75c46fa61bc5b4ccd20a168ff325c58771248fcd Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:57 -0700 Subject: x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure MSI and MSI-X support for interrupt remapping infrastructure. MSI address register will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, etc. For MSI-X, all the IRTE's will be consecutively allocated in the table, and the address registers will contain the starting index to the block and the data register will contain the subindex with in that block. This also introduces a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this). As MSI is edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flushing the hardware cache. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 230 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 222 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9bd02ef049a..877aa2e9d7e 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -2297,6 +2297,9 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); +#ifdef CONFIG_INTR_REMAP + free_irte(irq); +#endif spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); @@ -2315,10 +2318,41 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms tmp = TARGET_CPUS; err = assign_irq_vector(irq, tmp); - if (!err) { - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); + if (err) + return err; + + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irte irte; + int ir_index; + u16 sub_handle; + + ir_index = map_irq_to_irte_handle(irq, &sub_handle); + BUG_ON(ir_index == -1); + + memset (&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = 0; /* edge */ + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + modify_irte(irq, &irte); + msg->address_hi = MSI_ADDR_BASE_HI; + msg->data = sub_handle; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(ir_index) | + MSI_ADDR_IR_INDEX2(ir_index); + } else +#endif + { msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | @@ -2369,6 +2403,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) write_msi_msg(irq, &msg); irq_desc[irq].affinity = mask; } + +#ifdef CONFIG_INTR_REMAP +/* + * Migrate the MSI irq to another cpumask. This migration is + * done in the process context using interrupt-remapping hardware. + */ +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned int dest; + cpumask_t tmp, cleanup_mask; + struct irte irte; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * atomically update the IRTE with the new destination and vector. + */ + modify_irte(irq, &irte); + + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} +#endif #endif /* CONFIG_SMP */ /* @@ -2386,26 +2469,157 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +#ifdef CONFIG_INTR_REMAP +static struct irq_chip msi_ir_chip = { + .name = "IR-PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_x2apic_edge, +#ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +/* + * Map the PCI dev to the corresponding remapping hardware unit + * and allocate 'nvec' consecutive interrupt-remapping table entries + * in it. + */ +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) +{ + struct intel_iommu *iommu; + int index; + + iommu = map_dev_to_ir(dev); + if (!iommu) { + printk(KERN_ERR + "Unable to map PCI %s to iommu\n", pci_name(dev)); + return -ENOENT; + } + + index = alloc_irte(iommu, irq, nvec); + if (index < 0) { + printk(KERN_ERR + "Unable to allocate %d IRTE for PCI %s\n", nvec, + pci_name(dev)); + return -ENOSPC; + } + return index; +} +#endif + +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) { + int ret; struct msi_msg msg; + + ret = msi_compose_msg(dev, irq, &msg); + if (ret < 0) + return ret; + + set_irq_msi(irq, desc); + write_msi_msg(irq, &msg); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irq_desc *desc = irq_desc + irq; + /* + * irq migration in process context + */ + desc->status |= IRQ_MOVE_PCNTXT; + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); + } else +#endif + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + + return 0; +} + +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +{ int irq, ret; + irq = create_irq(); if (irq < 0) return irq; - ret = msi_compose_msg(dev, irq, &msg); +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + ret = msi_alloc_irte(dev, irq, 1); + if (ret < 0) + goto error; +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); if (ret < 0) { destroy_irq(irq); return ret; } + return 0; - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); +#ifdef CONFIG_INTR_REMAP +error: + destroy_irq(irq); + return ret; +#endif +} - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret, sub_handle; + struct msi_desc *desc; +#ifdef CONFIG_INTR_REMAP + struct intel_iommu *iommu = 0; + int index = 0; +#endif + sub_handle = 0; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq(); + if (irq < 0) + return irq; +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + if (!sub_handle) { + /* + * allocate the consecutive block of IRTE's + * for 'nvec' + */ + index = msi_alloc_irte(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + iommu = map_dev_to_ir(dev); + if (!iommu) { + ret = -ENOENT; + goto error; + } + /* + * setup the mapping between the irq and the IRTE + * base index, the sub_handle pointing to the + * appropriate interrupt remap table entry. + */ + set_irte_irq(irq, iommu, index, sub_handle); + } +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) + goto error; + sub_handle++; + } return 0; + +error: + destroy_irq(irq); + return ret; } void arch_teardown_msi_irq(unsigned int irq) -- cgit v1.2.3 From 6e1cb38a2aef7680975e71f23de187859ee8b158 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:58 -0700 Subject: x64, x2apic/intr-remap: add x2apic support, including enabling interrupt-remapping x2apic support. Interrupt-remapping must be enabled before enabling x2apic, this is needed to ensure that IO interrupts continue to work properly after the cpu mode is changed to x2apic(which uses 32bit extended physical/cluster apic id). On systems where apicid's are > 255, BIOS can handover the control to OS in x2apic mode. Or if the OS handover was in legacy xapic mode, check if it is capable of x2apic mode. And if we succeed in enabling Interrupt-remapping, then we can enable x2apic mode in the CPU. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 2 + arch/x86/kernel/apic_64.c | 154 ++++++++++++++++++++++++++++++++++++++-- arch/x86/kernel/cpu/common_64.c | 2 + arch/x86/kernel/genapic_64.c | 1 + arch/x86/kernel/mpparse.c | 2 + arch/x86/kernel/setup.c | 2 + arch/x86/kernel/smpboot.c | 5 ++ 7 files changed, 164 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 785700a08e9..8705262ddcd 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1337,7 +1337,9 @@ static void __init acpi_process_madt(void) acpi_ioapic = 1; smp_found_config = 1; +#ifdef CONFIG_X86_32 setup_apic_routing(); +#endif } } if (error == -EINVAL) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d5c06917b5b..dd0501039f0 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,7 @@ #include #include #include +#include #include #include @@ -46,8 +48,12 @@ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; +int disable_x2apic; int x2apic; +/* x2apic enabled before OS handover */ +int x2apic_preenabled; + /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -896,6 +902,125 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } +void check_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + + if (msr & X2APIC_ENABLE) { + printk("x2apic enabled by BIOS, switching to x2apic ops\n"); + x2apic_preenabled = x2apic = 1; + apic_ops = &x2apic_ops; + } +} + +void enable_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + if (!(msr & X2APIC_ENABLE)) { + printk("Enabling x2apic\n"); + wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); + } +} + +void enable_IR_x2apic(void) +{ +#ifdef CONFIG_INTR_REMAP + int ret; + unsigned long flags; + + if (!cpu_has_x2apic) + return; + + if (!x2apic_preenabled && disable_x2apic) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of nox2apic\n"); + return; + } + + if (x2apic_preenabled && disable_x2apic) + panic("Bios already enabled x2apic, can't enforce nox2apic"); + + if (!x2apic_preenabled && skip_ioapic_setup) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of skipping io-apic setup\n"); + return; + } + + ret = dmar_table_init(); + if (ret) { + printk(KERN_INFO + "dmar_table_init() failed with %d:\n", ret); + + if (x2apic_preenabled) + panic("x2apic enabled by bios. But IR enabling failed"); + else + printk(KERN_INFO + "Not enabling x2apic,Intr-remapping\n"); + return; + } + + local_irq_save(flags); + mask_8259A(); + save_mask_IO_APIC_setup(); + + ret = enable_intr_remapping(1); + + if (ret && x2apic_preenabled) { + local_irq_restore(flags); + panic("x2apic enabled by bios. But IR enabling failed"); + } + + if (ret) + goto end; + + if (!x2apic) { + x2apic = 1; + apic_ops = &x2apic_ops; + enable_x2apic(); + } +end: + if (ret) + /* + * IR enabling failed + */ + restore_IO_APIC_setup(); + else + reinit_intr_remapped_IO_APIC(x2apic_preenabled); + + unmask_8259A(); + local_irq_restore(flags); + + if (!ret) { + if (!x2apic_preenabled) + printk(KERN_INFO + "Enabled x2apic and interrupt-remapping\n"); + else + printk(KERN_INFO + "Enabled Interrupt-remapping\n"); + } else + printk(KERN_ERR + "Failed to enable Interrupt-remapping and x2apic\n"); +#else + if (!cpu_has_x2apic) + return; + + if (x2apic_preenabled) + panic("x2apic enabled prior OS handover," + " enable CONFIG_INTR_REMAP"); + + printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " + " and x2apic\n"); +#endif + + return; +} + /* * Detect and enable local APICs on non-SMP boards. * Original code written by Keir Fraser. @@ -943,6 +1068,11 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { + if (x2apic) { + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + return; + } + /* * If no local APIC can be found then set up a fake all * zeroes page to simulate the local APIC and another @@ -981,6 +1111,9 @@ int __init APIC_init_uniprocessor(void) return -1; } + enable_IR_x2apic(); + setup_apic_routing(); + verify_local_APIC(); connect_bsp_APIC(); @@ -1238,10 +1371,14 @@ static int lapic_resume(struct sys_device *dev) maxlvt = lapic_get_maxlvt(); local_irq_save(flags); - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); + if (!x2apic) { + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + } else + enable_x2apic(); + apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); apic_write(APIC_DFR, apic_pm_state.apic_dfr); @@ -1381,6 +1518,15 @@ __cpuinit int apic_is_clustered_box(void) return (clusters > 2); } +static __init int setup_nox2apic(char *str) +{ + disable_x2apic = 1; + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC); + return 0; +} +early_param("nox2apic", setup_nox2apic); + + /* * APIC command line parameters */ diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 36537ab9e56..e7bf3c2dc5f 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -606,6 +606,8 @@ void __cpuinit cpu_init(void) barrier(); check_efer(); + if (cpu != 0 && x2apic) + enable_x2apic(); /* * set up and load the per-CPU TSS diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1029e178cdf..792e21ba1a8 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3b25e49380c..70e1f3e287f 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -545,7 +545,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) generic_bigsmp_probe(); #endif +#ifdef CONFIG_X86_32 setup_apic_routing(); +#endif if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); return num_processors; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 987b6fde3a9..2e78a143dec 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -730,6 +730,8 @@ void __init setup_arch(char **cmdline_p) num_physpages = max_pfn; check_efer(); + if (cpu_has_x2apic) + check_x2apic(); /* How many end-of-memory variables you have, grandma! */ /* need this before calling reserve_initrd */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c55263b3df0..0c43e1f2e7d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1145,6 +1145,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) current_thread_info()->cpu = 0; /* needed? */ set_cpu_sibling_map(0); +#ifdef CONFIG_X86_64 + enable_IR_x2apic(); + setup_apic_routing(); +#endif + if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); disable_smp(); -- cgit v1.2.3 From 2d9579a124d746a3e0e0ba45e57d80800ee80807 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:59 -0700 Subject: x64, x2apic/intr-remap: support for x2apic physical mode support x2apic Physical mode support. By default we will use x2apic cluster mode. x2apic physical mode can be selected using "x2apic_phys" boot parameter. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/genapic_64.c | 18 +++++- arch/x86/kernel/genx2apic_phys.c | 122 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 3 deletions(-) create mode 100644 arch/x86/kernel/genx2apic_phys.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bde3e9b6fec..81280e93e79 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -96,6 +96,7 @@ obj-$(CONFIG_OLPC) += olpc.o ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o obj-y += genx2apic_cluster.o + obj-y += genx2apic_phys.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 792e21ba1a8..3940d8161f8 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -30,6 +30,15 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); struct genapic __read_mostly *genapic = &apic_flat; +static int x2apic_phys = 0; + +static int set_x2apic_phys_mode(char *arg) +{ + x2apic_phys = 1; + return 0; +} +early_param("x2apic_phys", set_x2apic_phys_mode); + static enum uv_system_type uv_system_type; /* @@ -39,9 +48,12 @@ void __init setup_apic_routing(void) { if (uv_system_type == UV_NON_UNIQUE_APIC) genapic = &apic_x2apic_uv_x; - else if (cpu_has_x2apic && intr_remapping_enabled) - genapic = &apic_x2apic_cluster; - else + else if (cpu_has_x2apic && intr_remapping_enabled) { + if (x2apic_phys) + genapic = &apic_x2apic_phys; + else + genapic = &apic_x2apic_cluster; + } else #ifdef CONFIG_ACPI /* * Quirk: some x86_64 machines can only use physical APIC mode diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c new file mode 100644 index 00000000000..3c70b9d692b --- /dev/null +++ b/arch/x86/kernel/genx2apic_phys.c @@ -0,0 +1,122 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ + +static cpumask_t x2apic_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +static cpumask_t x2apic_vector_allocation_domain(int cpu) +{ + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; +} + +static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, + unsigned int dest) +{ + unsigned long cfg; + + cfg = __prepare_ICR(0, vector, dest); + + /* + * send the IPI. + */ + x2apic_icr_write(cfg, apicid); +} + +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +{ + unsigned long flags; + unsigned long query_cpu; + + local_irq_save(flags); + for_each_cpu_mask(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +static void x2apic_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); +} + +static void x2apic_send_IPI_all(int vector) +{ + x2apic_send_IPI_mask(cpu_online_map, vector); +} + +static int x2apic_apic_id_registered(void) +{ + return 1; +} + +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return per_cpu(x86_cpu_to_apicid, cpu); + else + return BAD_APICID; +} + +static unsigned int x2apic_read_id(void) +{ + return apic_read(APIC_ID); +} + +static unsigned int phys_pkg_id(int index_msb) +{ + return x2apic_read_id() >> index_msb; +} + +void x2apic_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} + +void init_x2apic_ldr(void) +{ + return; +} + +struct genapic apic_x2apic_phys = { + .name = "physical x2apic", + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .target_cpus = x2apic_target_cpus, + .vector_allocation_domain = x2apic_vector_allocation_domain, + .apic_id_registered = x2apic_apic_id_registered, + .init_apic_ldr = init_x2apic_ldr, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_self = x2apic_send_IPI_self, + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, + .read_apic_id = x2apic_read_id, +}; -- cgit v1.2.3 From 372e92d8b3e433888bf76c36f1c7e1405eae1584 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 14:56:18 -0700 Subject: x64, x2apic/intr-remap: Interrupt-remapping and x2apic support On Thu, Jul 10, 2008 at 12:53:20PM -0700, Ingo Molnar wrote: > > Btw., i threw it at the -tip test-cluster and got back a quick build > bugreport: > > arch/x86/xen/enlighten.c: In function 'xen_patch': > arch/x86/xen/enlighten.c:1084: warning: label 'patch_site' defined but not used > arch/x86/xen/enlighten.c: At top level: > arch/x86/xen/enlighten.c:1272: error: expected identifier before '(' token > arch/x86/xen/enlighten.c:1273: error: expected '}' before '.' token > arch/x86/kernel/paravirt.c:376:2: error: invalid preprocessing directive > #ifndedarch/x86/kernel/paravirt.c:384:2: error: #endif without #if > > with this config: > > http://redhat.com/~mingo/misc/config-Thu_Jul_10_21_43_28_CEST_2008.bad fix the typo. Signed-off-by: Suresh Siddha Cc: "Siddha Cc: Suresh B" Cc: "akpm@linux-foundation.org" Cc: "arjan@linux.intel.com" Cc: "andi@firstfloor.org" Cc: "ebiederm@xmission.com" Cc: "jbarnes@virtuousgeek.org" Cc: "steiner@sgi.com" Cc: jeremy@goop.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index b80105a0f47..4f29ff847eb 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -360,7 +360,7 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC -#ifnded CONFIG_X86_64 +#ifndef CONFIG_X86_64 .apic_write = native_apic_mem_write, .apic_write_atomic = native_apic_mem_write_atomic, .apic_read = native_apic_mem_read, -- cgit v1.2.3 From 277d1f5846d84e16760131a93b7a67ebfa8eded4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 11 Jul 2008 13:11:55 -0700 Subject: x2apic: uninline uv_init_apic_ldr() Andrew says: > There's no point in declaring it inline if it's always called indirectly. And point taken! Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index dcd4fb219da..c915f750241 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -120,7 +120,7 @@ static int uv_apic_id_registered(void) return 1; } -static inline void uv_init_apic_ldr(void) +static void uv_init_apic_ldr(void) { } -- cgit v1.2.3 From c535b6a1a685eb23f96e2c221777d6c1e05080d5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 11 Jul 2008 18:41:54 -0700 Subject: x86: let 32bit use apic_ops too Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 2a83c07bd88..7413354c9ff 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,19 +145,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -void apic_icr_write(u32 low, u32 id) -{ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write_around(APIC_ICR, low); -} - -void apic_wait_icr_idle(void) +void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_apic_wait_icr_idle(void) +u32 safe_xapic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -173,6 +167,35 @@ u32 safe_apic_wait_icr_idle(void) return send_status; } +void xapic_icr_write(u32 low, u32 id) +{ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write_around(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return icr1 | ((u64)icr2 << 32); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .write_atomic = native_apic_mem_write_atomic, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; +EXPORT_SYMBOL_GPL(apic_ops); + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ -- cgit v1.2.3 From 4c9961d56ec20c27ec5d02e49fd7427748312741 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 11 Jul 2008 18:44:16 -0700 Subject: x86: make read_apic_id return final apicid also remove GET_APIC_ID when read_apic_id is used. need to apply after [PATCH] x86: mach_apicdef.h need to include before smp.h Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/apic_32.c | 4 ++-- arch/x86/kernel/apic_64.c | 6 +++--- arch/x86/kernel/genapic_flat_64.c | 2 +- arch/x86/kernel/io_apic_32.c | 5 ++--- arch/x86/kernel/io_apic_64.c | 4 ++-- arch/x86/kernel/smpboot.c | 6 +++--- 7 files changed, 14 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8705262ddcd..b314bcd0840 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -761,7 +761,7 @@ static void __init acpi_register_lapic_address(unsigned long address) set_fixmap_nocache(FIX_APIC_BASE, address); if (boot_cpu_physical_apicid == -1U) { - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); #ifdef CONFIG_X86_32 apic_version[boot_cpu_physical_apicid] = GET_APIC_VERSION(apic_read(APIC_LVR)); diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7413354c9ff..47ff978aecf 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1230,7 +1230,7 @@ void __init init_apic_mappings(void) * default configuration (or the MP table is broken). */ if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } @@ -1270,7 +1270,7 @@ int __init APIC_init_uniprocessor(void) * might be zero if read from MP tables. Get it from LAPIC. */ #ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); #endif physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index dd0501039f0..c75f58a66d8 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1060,7 +1060,7 @@ void __init early_init_lapic_mapping(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } /** @@ -1069,7 +1069,7 @@ void __init early_init_lapic_mapping(void) void __init init_apic_mappings(void) { if (x2apic) { - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); return; } @@ -1092,7 +1092,7 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 73558682213..7dac2f275fa 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -101,7 +101,7 @@ static unsigned int read_xapic_id(void) { unsigned int id; - id = GET_XAPIC_ID(apic_read(APIC_ID)); + id = GET_APIC_ID(apic_read(APIC_ID)); return id; } diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index c50adb84ea6..382208d11f8 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1501,7 +1501,7 @@ void /*__init*/ print_local_APIC(void *dummy) smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, - GET_APIC_ID(read_apic_id())); + GET_APIC_ID(v)); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1709,8 +1709,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest.physical.physical_dest = - GET_APIC_ID(read_apic_id()); + entry.dest.physical.physical_dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 877aa2e9d7e..2db0f98e2af 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1246,7 +1246,7 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1439,7 +1439,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest = GET_APIC_ID(read_apic_id()); + entry.dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0c43e1f2e7d..6cd002f3e20 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -211,7 +211,7 @@ static void __cpuinit smp_callin(void) /* * (This works even if the APIC is not enabled.) */ - phys_id = GET_APIC_ID(read_apic_id()); + phys_id = read_apic_id(); cpuid = smp_processor_id(); if (cpu_isset(cpuid, cpu_callin_map)) { panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, @@ -1157,9 +1157,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) } preempt_disable(); - if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { + if (read_apic_id() != boot_cpu_physical_apicid) { panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); + read_apic_id(), boot_cpu_physical_apicid); /* Or can we switch back to PIC here? */ } preempt_enable(); -- cgit v1.2.3 From f910a9dc7c865896815e2a95fe33363e9522f277 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 12 Jul 2008 01:01:20 -0700 Subject: x86: make 64bit have get_apic_id generalize the x2apic code some more. let read_apic_id become a macro (later on a function/inline) GET_APIC_ID(apic_read(APIC_ID)) +#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) instead of this weird construct: -#define read_apic_id (genapic->read_apic_id) Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 26 +++++++++++++++++++++++--- arch/x86/kernel/genx2apic_cluster.c | 20 +++++++++++++++++++- arch/x86/kernel/genx2apic_phys.c | 20 +++++++++++++++++++- arch/x86/kernel/genx2apic_uv_x.c | 23 ++++++++++++++++++++--- 4 files changed, 81 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 7dac2f275fa..2c973cbf054 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -97,11 +97,27 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = (((x)>>24) & 0xFFu); + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = ((id & 0xFFu)<<24); + return x; +} + static unsigned int read_xapic_id(void) { unsigned int id; - id = GET_APIC_ID(apic_read(APIC_ID)); + id = get_apic_id(apic_read(APIC_ID)); return id; } @@ -134,7 +150,9 @@ struct genapic apic_flat = { .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = read_xapic_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFu<<24), }; /* @@ -200,5 +218,7 @@ struct genapic apic_physflat = { .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = read_xapic_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFu<<24), }; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index ed0fdede800..40bc0140d89 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -94,6 +94,22 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = x; + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = id; + return x; +} + static unsigned int x2apic_read_id(void) { return apic_read(APIC_ID); @@ -131,5 +147,7 @@ struct genapic apic_x2apic_cluster = { .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = x2apic_read_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), }; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3c70b9d692b..2f3c6ca19de 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -84,6 +84,22 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = x; + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = id; + return x; +} + static unsigned int x2apic_read_id(void) { return apic_read(APIC_ID); @@ -118,5 +134,7 @@ struct genapic apic_x2apic_phys = { .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = x2apic_read_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), }; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index c915f750241..3ca29cd8c23 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -139,16 +139,31 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } -static unsigned int uv_read_apic_id(void) +static unsigned int get_apic_id(unsigned long x) { unsigned int id; WARN_ON(preemptible() && num_online_cpus() > 1); - id = apic_read(APIC_ID) | __get_cpu_var(x2apic_extra_bits); + id = x | __get_cpu_var(x2apic_extra_bits); return id; } +static long set_apic_id(unsigned int id) +{ + unsigned long x; + + /* maskout x2apic_extra_bits ? */ + x = id; + return x; +} + +static unsigned int uv_read_apic_id(void) +{ + + return get_apic_id(apic_read(APIC_ID)); +} + static unsigned int phys_pkg_id(int index_msb) { return uv_read_apic_id() >> index_msb; @@ -175,7 +190,9 @@ struct genapic apic_x2apic_uv_x = { /* ZZZ.send_IPI_self = uv_send_IPI_self, */ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ - .read_apic_id = uv_read_apic_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), }; static __cpuinit void set_x2apic_extra_bits(int pnode) -- cgit v1.2.3 From 94a8c3c2437c8946f1b6c8e0b2c560a7db8ed3c6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 13 Jul 2008 22:19:35 -0700 Subject: x86: let 32bit use apic_ops too - fix fix for pv - clean up the namespace there too. Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 5 ----- arch/x86/kernel/vmi_32.c | 51 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 4f29ff847eb..e0f139106c7 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -360,11 +360,6 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC -#ifndef CONFIG_X86_64 - .apic_write = native_apic_mem_write, - .apic_write_atomic = native_apic_mem_write_atomic, - .apic_read = native_apic_mem_read, -#endif .setup_boot_clock = setup_boot_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock, .startup_ipi_hook = paravirt_nop, diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b15346092b7..cf307435455 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -676,6 +676,50 @@ static inline int __init probe_vmi_rom(void) return 0; } +#ifdef CONFIG_X86_LOCAL_APIC +static u32 vmi_apic_read(u32 reg) +{ + return 0; +} + +static void vmi_apic_write(u32 reg, u32 val) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static u64 vmi_apic_icr_read(void) +{ + return 0; +} + +static void vmi_apic_icr_write(u32 low, u32 id) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static void vmi_apic_wait_icr_idle(void) +{ + return; +} + +static u32 vmi_safe_apic_wait_icr_idle(void) +{ + return 0; +} + +static struct apic_ops vmi_basic_apic_ops = { + .read = vmi_apic_read, + .write = vmi_apic_write, + .write_atomic = vmi_apic_write, + .icr_read = vmi_apic_icr_read, + .icr_write = vmi_apic_icr_write, + .wait_icr_idle = vmi_apic_wait_icr_idle, + .safe_wait_icr_idle = vmi_safe_apic_wait_icr_idle, +}; +#endif + /* * VMI setup common to all processors */ @@ -904,9 +948,10 @@ static inline int __init activate_vmi(void) #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(pv_apic_ops.apic_read, APICRead); - para_fill(pv_apic_ops.apic_write, APICWrite); - para_fill(pv_apic_ops.apic_write_atomic, APICWrite); + para_fill(vmi_basic_apic_ops.read, APICRead); + para_fill(vmi_basic_apic_ops.write, APICWrite); + para_fill(vmi_basic_apic_ops.write_atomic, APICWrite); + apic_ops = &vmi_basic_apic_ops; #endif /* -- cgit v1.2.3 From 9a8f0e6b5dfe3b4f330fc82b16a4000f5688fce8 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 18 Jul 2008 09:59:40 -0700 Subject: x86: let 32bit use apic_ops too - fix Fix VMI apic_ops. Signed-off-by: Suresh Siddha Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 5 +++++ arch/x86/kernel/vmi_32.c | 51 +++-------------------------------------------- 2 files changed, 8 insertions(+), 48 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 47ff978aecf..cb54d9e20f9 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,6 +145,11 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +/* + * Paravirt kernels also might be using these below ops. So we still + * use generic apic_read()/apic_write(), which might be pointing to different + * ops in PARAVIRT case. + */ void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index cf307435455..237082833c1 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -676,50 +676,6 @@ static inline int __init probe_vmi_rom(void) return 0; } -#ifdef CONFIG_X86_LOCAL_APIC -static u32 vmi_apic_read(u32 reg) -{ - return 0; -} - -static void vmi_apic_write(u32 reg, u32 val) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static u64 vmi_apic_icr_read(void) -{ - return 0; -} - -static void vmi_apic_icr_write(u32 low, u32 id) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static void vmi_apic_wait_icr_idle(void) -{ - return; -} - -static u32 vmi_safe_apic_wait_icr_idle(void) -{ - return 0; -} - -static struct apic_ops vmi_basic_apic_ops = { - .read = vmi_apic_read, - .write = vmi_apic_write, - .write_atomic = vmi_apic_write, - .icr_read = vmi_apic_icr_read, - .icr_write = vmi_apic_icr_write, - .wait_icr_idle = vmi_apic_wait_icr_idle, - .safe_wait_icr_idle = vmi_safe_apic_wait_icr_idle, -}; -#endif - /* * VMI setup common to all processors */ @@ -948,10 +904,9 @@ static inline int __init activate_vmi(void) #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(vmi_basic_apic_ops.read, APICRead); - para_fill(vmi_basic_apic_ops.write, APICWrite); - para_fill(vmi_basic_apic_ops.write_atomic, APICWrite); - apic_ops = &vmi_basic_apic_ops; + para_fill(apic_ops->read, APICRead); + para_fill(apic_ops->write, APICWrite); + para_fill(apic_ops->write_atomic, APICWrite); #endif /* -- cgit v1.2.3 From f586bf7df9acc26b68b0feefc0dd32fa63516d3a Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 18 Jul 2008 15:58:35 -0700 Subject: x86: APIC: Remove apic_write_around(); use alternatives, merge fix On Fri, Jul 18, 2008 at 02:03:48PM -0700, Ingo Molnar wrote: > > * Yinghai Lu wrote: > > > > git merge tip/x86/x2apic > > CONFLICT (content): Merge conflict in arch/x86/kernel/Makefile > > CONFLICT (content): Merge conflict in arch/x86/kernel/paravirt.c > > CONFLICT (content): Merge conflict in arch/x86/kernel/smpboot.c > > CONFLICT (content): Merge conflict in arch/x86/kernel/vmi_32.c > > CONFLICT (content): Merge conflict in arch/x86/xen/enlighten.c > > CONFLICT (content): Merge conflict in include/asm-x86/apic.h > > CONFLICT (content): Merge conflict in include/asm-x86/paravirt.h > > that's due to the changes in tip/x86/apic and in tip/x86/uv. > > ok, i've just merged x86/apic into x86/x2apic and x86/uv as well, and > pushed out the result. > > Note: it's a first raw merge and completely untested. It will now merge > cleanly into tip/master. There are probably a few details missing. Ingo, thanks for doing this. While I was testing my merge changes, you posted yours... anyhow we need this piece, which is missing from your merge. Signed-off-by: Suresh Siddha Cc: Yinghai Lu Cc: "Maciej W. Rozycki" Cc: Jeremy Fitzhardinge Cc: Zachary Amsden Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 5 ++--- arch/x86/kernel/apic_64.c | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index dcb897f22aa..8728f54a93d 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -174,8 +174,8 @@ u32 safe_xapic_wait_icr_idle(void) void xapic_icr_write(u32 low, u32 id) { - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write_around(APIC_ICR, low); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write(APIC_ICR, low); } u64 xapic_icr_read(void) @@ -191,7 +191,6 @@ u64 xapic_icr_read(void) static struct apic_ops xapic_ops = { .read = native_apic_mem_read, .write = native_apic_mem_write, - .write_atomic = native_apic_mem_write_atomic, .icr_read = xapic_icr_read, .icr_write = xapic_icr_write, .wait_icr_idle = xapic_wait_icr_idle, diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46e612408ac..a850bc63fb1 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -167,7 +167,6 @@ u64 xapic_icr_read(void) static struct apic_ops xapic_ops = { .read = native_apic_mem_read, .write = native_apic_mem_write, - .write_atomic = native_apic_mem_write_atomic, .icr_read = xapic_icr_read, .icr_write = xapic_icr_write, .wait_icr_idle = xapic_wait_icr_idle, @@ -206,7 +205,6 @@ u64 x2apic_icr_read(void) static struct apic_ops x2apic_ops = { .read = native_apic_msr_read, .write = native_apic_msr_write, - .write_atomic = native_apic_msr_write, .icr_read = x2apic_icr_read, .icr_write = x2apic_icr_write, .wait_icr_idle = x2apic_wait_icr_idle, -- cgit v1.2.3 From 1b9b89e7f163336ad84200b66a17284dbf26aced Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 21 Jul 2008 22:08:21 -0700 Subject: x86: add apic probe for genapic 64bit, v2 introducing an APIC handling probing abstraction: static struct genapic *apic_probe[] __initdata = { &apic_x2apic_uv_x, &apic_x2apic_phys, &apic_x2apic_cluster, &apic_physflat, NULL, }; This way we can remove UV, x2apic specific code from genapic_64.c and move them to their specific genapic files. [ v2: fix compiling when CONFIG_ACPI is not set ] Signed-off-by: Yinghai Lu Cc: Jack Steiner Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_64.c | 85 ++++++++++++------------------------- arch/x86/kernel/genapic_flat_64.c | 26 ++++++++++++ arch/x86/kernel/genx2apic_cluster.c | 11 +++++ arch/x86/kernel/genx2apic_phys.c | 21 +++++++++ arch/x86/kernel/genx2apic_uv_x.c | 32 +++++++++++++- 5 files changed, 115 insertions(+), 60 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 3940d8161f8..b3ba969c50d 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,62 +16,37 @@ #include #include #include -#include #include #include #include -#ifdef CONFIG_ACPI -#include -#endif - -DEFINE_PER_CPU(int, x2apic_extra_bits); +extern struct genapic apic_flat; +extern struct genapic apic_physflat; +extern struct genapic apic_x2xpic_uv_x; +extern struct genapic apic_x2apic_phys; +extern struct genapic apic_x2apic_cluster; struct genapic __read_mostly *genapic = &apic_flat; -static int x2apic_phys = 0; - -static int set_x2apic_phys_mode(char *arg) -{ - x2apic_phys = 1; - return 0; -} -early_param("x2apic_phys", set_x2apic_phys_mode); - -static enum uv_system_type uv_system_type; +static struct genapic *apic_probe[] __initdata = { + &apic_x2apic_uv_x, + &apic_x2apic_phys, + &apic_x2apic_cluster, + &apic_physflat, + NULL, +}; /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ void __init setup_apic_routing(void) { - if (uv_system_type == UV_NON_UNIQUE_APIC) - genapic = &apic_x2apic_uv_x; - else if (cpu_has_x2apic && intr_remapping_enabled) { - if (x2apic_phys) - genapic = &apic_x2apic_phys; - else - genapic = &apic_x2apic_cluster; - } else -#ifdef CONFIG_ACPI - /* - * Quirk: some x86_64 machines can only use physical APIC mode - * regardless of how many processors are present (x86_64 ES7000 - * is an example). - */ - if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) - genapic = &apic_physflat; - else -#endif - - if (max_physical_apicid < 8) - genapic = &apic_flat; - else - genapic = &apic_physflat; - - printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + if (genapic == &apic_flat) { + if (max_physical_apicid >= 8) + genapic = &apic_physflat; + printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + } } /* Same for both flat and physical. */ @@ -83,23 +58,15 @@ void apic_send_IPI_self(int vector) int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (!strcmp(oem_id, "SGI")) { - if (!strcmp(oem_table_id, "UVL")) - uv_system_type = UV_LEGACY_APIC; - else if (!strcmp(oem_table_id, "UVX")) - uv_system_type = UV_X2APIC; - else if (!strcmp(oem_table_id, "UVH")) - uv_system_type = UV_NON_UNIQUE_APIC; + int i; + + for (i = 0; apic_probe[i]; ++i) { + if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { + genapic = apic_probe[i]; + printk(KERN_INFO "Setting APIC routing to %s.\n", + genapic->name); + return 1; + } } return 0; } - -enum uv_system_type get_uv_system_type(void) -{ - return uv_system_type; -} - -int is_uv_system(void) -{ - return uv_system_type != UV_NONE; -} diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 2c973cbf054..1740b83329f 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -21,6 +21,15 @@ #include #include +#ifdef CONFIG_ACPI +#include +#endif + +static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 1; +} + static cpumask_t flat_target_cpus(void) { return cpu_online_map; @@ -138,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb) struct genapic apic_flat = { .name = "flat", + .acpi_madt_oem_check = flat_acpi_madt_oem_check, .int_delivery_mode = dest_LowestPrio, .int_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = flat_target_cpus, @@ -160,6 +170,21 @@ struct genapic apic_flat = { * We cannot use logical delivery in this case because the mask * overflows, so use physical mode. */ +static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ +#ifdef CONFIG_ACPI + /* + * Quirk: some x86_64 machines can only use physical APIC mode + * regardless of how many processors are present (x86_64 ES7000 + * is an example). + */ + if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) + return 1; +#endif + + return 0; +} static cpumask_t physflat_target_cpus(void) { @@ -206,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) struct genapic apic_physflat = { .name = "physical flat", + .acpi_madt_oem_check = physflat_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = physflat_target_cpus, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 40bc0140d89..ef3f3182d50 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -4,12 +4,22 @@ #include #include #include +#include + #include #include #include DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); +static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (cpu_has_x2apic && intr_remapping_enabled) + return 1; + + return 0; +} + /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ static cpumask_t x2apic_target_cpus(void) @@ -135,6 +145,7 @@ static void init_x2apic_ldr(void) struct genapic apic_x2apic_cluster = { .name = "cluster x2apic", + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .int_delivery_mode = dest_LowestPrio, .int_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = x2apic_target_cpus, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 2f3c6ca19de..f35a3bf3a9e 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -4,10 +4,30 @@ #include #include #include +#include + #include #include #include +DEFINE_PER_CPU(int, x2apic_extra_bits); + +static int x2apic_phys; + +static int set_x2apic_phys_mode(char *arg) +{ + x2apic_phys = 1; + return 0; +} +early_param("x2apic_phys", set_x2apic_phys_mode); + +static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (cpu_has_x2apic && intr_remapping_enabled && x2apic_phys) + return 1; + + return 0; +} /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ @@ -122,6 +142,7 @@ void init_x2apic_ldr(void) struct genapic apic_x2apic_phys = { .name = "physical x2apic", + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = x2apic_target_cpus, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index a8e5cb4c4d4..a882bc3a2ce 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -27,6 +27,33 @@ #include #include +static enum uv_system_type uv_system_type; + +static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strcmp(oem_id, "SGI")) { + if (!strcmp(oem_table_id, "UVL")) + uv_system_type = UV_LEGACY_APIC; + else if (!strcmp(oem_table_id, "UVX")) + uv_system_type = UV_X2APIC; + else if (!strcmp(oem_table_id, "UVH")) { + uv_system_type = UV_NON_UNIQUE_APIC; + return 1; + } + } + return 0; +} + +enum uv_system_type get_uv_system_type(void) +{ + return uv_system_type; +} + +int is_uv_system(void) +{ + return uv_system_type != UV_NONE; +} + DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); @@ -153,7 +180,7 @@ static unsigned int get_apic_id(unsigned long x) return id; } -static long set_apic_id(unsigned int id) +static unsigned long set_apic_id(unsigned int id) { unsigned long x; @@ -182,6 +209,7 @@ static void uv_send_IPI_self(int vector) struct genapic apic_x2apic_uv_x = { .name = "UV large system", + .acpi_madt_oem_check = uv_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = uv_target_cpus, @@ -433,3 +461,5 @@ void __cpuinit uv_cpu_init(void) if (get_uv_system_type() == UV_NON_UNIQUE_APIC) set_x2apic_extra_bits(uv_hub_info->pnode); } + + -- cgit v1.2.3 From 026e2c05ef58ef413e2d52696f125d5ea1aa8bce Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Jul 2008 11:58:14 +0200 Subject: x86, cyrix: debug Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cyrix.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3fd7a67bb06..db5868cd244 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -116,7 +116,7 @@ static void __cpuinit set_cx86_reorder(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* Load/Store Serialize to mem access disable (=reorder it) */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); + setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; setCx86(CX86_CCR3, ccr3); @@ -127,11 +127,11 @@ static void __cpuinit set_cx86_memwb(void) printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); } static void __cpuinit set_cx86_inc(void) @@ -144,10 +144,10 @@ static void __cpuinit set_cx86_inc(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* PCR1 -- Performance Control */ /* Incrementor on, whatever that is */ - setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); + setCx86_old(CX86_PCR1, getCx86_old(CX86_PCR1) | 0x02); /* PCR0 -- Performance Control */ /* Incrementor Margin 10 */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); + setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) | 0x04); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ } @@ -162,14 +162,14 @@ static void __cpuinit geode_configure(void) local_irq_save(flags); /* Suspend on halt power saving and enable #SUSP pin */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* FPU fast, DTE cache, Mem bypass */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); + setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ set_cx86_memwb(); @@ -286,7 +286,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); + setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); /* * GXm : 0x30 ... 0x5f GXm datasheet 51 @@ -309,7 +309,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) if (dir1 > 7) { dir0_msn++; /* M II */ /* Enable MMX extensions (App note 108) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); + setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); } else { c->coma_bug = 1; /* 6x86MX, it has the bug. */ } @@ -424,7 +424,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ + setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ local_irq_restore(flags); } -- cgit v1.2.3 From bbc1f698a508927d21324b57500e863f9bd562b9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 21:34:13 +0530 Subject: x86: Introducing asm/syscalls.h Declaring arch-dependent syscalls for x86 architecture Signed-off-by: Jaswinder Singh --- arch/x86/kernel/ioport.c | 1 + arch/x86/kernel/ldt.c | 1 + arch/x86/kernel/process_32.c | 1 + arch/x86/kernel/process_64.c | 1 + arch/x86/kernel/signal_32.c | 1 + arch/x86/kernel/signal_64.c | 1 + arch/x86/kernel/sys_i386_32.c | 2 ++ arch/x86/kernel/sys_x86_64.c | 1 + arch/x86/kernel/tls.c | 1 + arch/x86/kernel/vm86_32.c | 1 + 10 files changed, 11 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 50e5e4a31c8..19191430274 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -14,6 +14,7 @@ #include #include #include +#include /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, unsigned int base, diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index a8449571858..c49ff2dc8f8 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef CONFIG_SMP static void flush_ldt(void *current_mm) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0c3927accb0..6490e498126 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,6 +55,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e8a8e1b9981..c78090dd0c5 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -51,6 +51,7 @@ #include #include #include +#include asmlinkage extern void ret_from_fork(void); diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 07faaa5109c..5cede1045ce 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "sigframe.h" diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index bf87684474f..b95a0a60905 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "sigframe.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 7066cb855a6..1884a8d12bf 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -22,6 +22,8 @@ #include #include +#include + asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 3b360ef3381..c9288c883e2 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -16,6 +16,7 @@ #include #include +#include asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off) diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index ab6bf375a30..6bb7b8579e7 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "tls.h" diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 38f566fa27d..4eeb5cf9720 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -46,6 +46,7 @@ #include #include #include +#include /* * Known problems: -- cgit v1.2.3 From fb26132b441e75d6ba9996efc29b42081aee0abd Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 21:36:40 +0530 Subject: x86: process_32.c declare cpu_number before they get used Moved DECLARE_PER_CPU(int, cpu_number) from CONFIG_X86_32_SMP to CONFIG_X86_32 because cpu_number is required for both. And include asm/smp.h in process_32.c Signed-off-by: Jaswinder Singh --- arch/x86/kernel/process_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 6490e498126..7218bccd176 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -56,6 +56,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -- cgit v1.2.3 From cc0384917bf69079088701a0725c5fc6b554bf35 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 21:52:51 +0530 Subject: x86: time_XX.c declare functions before they get used Declare time_init() in asm-x86/time.h Also did cleanup in asm-x86/timer.h : timer_ack is only required for X86_32 int recalibrate_cpu_khz(void) is for X86_32 Signed-off-by: Jaswinder Singh --- arch/x86/kernel/time_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index ffe3c664afc..bbecf8b6bf9 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "do_timer.h" -- cgit v1.2.3 From 1e84911c6c37fd1080ef07039e19c346628b31db Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 22:58:29 +0530 Subject: x86: mtrr/main.c declare range_state as static Signed-off-by: Jaswinder Singh --- arch/x86/kernel/cpu/mtrr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6f23969c8fa..b5ade28ca8f 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -729,7 +729,7 @@ struct var_mtrr_range_state { mtrr_type type; }; -struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; +static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; static int __initdata debug_print; static int __init -- cgit v1.2.3 From a31863168660c6b6f6c7ffe05bb6a38e97803326 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 22 Jul 2008 21:53:53 +0200 Subject: x86: consolidate header guards This patch consolidates the header guard names which are also used externally, i.e. in .c files. Signed-off-by: Vegard Nossum --- arch/x86/kernel/asm-offsets_64.c | 2 +- arch/x86/kernel/syscall_64.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index aa89387006f..505543a75a5 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -22,7 +22,7 @@ #define __NO_STUBS 1 #undef __SYSCALL -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H #define __SYSCALL(nr, sym) [nr] = 1, static char syscalls[] = { #include diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index 170d43c1748..3d1be4f0fac 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c @@ -8,12 +8,12 @@ #define __NO_STUBS #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H #include #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = sym, -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H typedef void (*sys_call_ptr_t)(void); -- cgit v1.2.3 From 05d3ed0a1fe3ea05ab9f3b8d32576a0bc2e19660 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 21 Jul 2008 10:15:22 -0400 Subject: x86, pci: iommu fix potential overflow in alloc_iommu() It is possible that alloc_iommu()'s boundary_size overflows as dma_get_seg_boundary can return 0xffffffff. In that case, further usage of boundary_size triggers a BUG_ON() in the iommu code. Signed-off-by: Prarit Bhargava Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index df5f142657d..1062dc1e639 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -93,7 +93,7 @@ static unsigned long alloc_iommu(struct device *dev, int size) base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; spin_lock_irqsave(&iommu_bitmap_lock, flags); -- cgit v1.2.3 From fd60b39f845aa7462453af8aed4f059b162f181f Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 23 Jul 2008 22:45:01 +0800 Subject: arch/x86/kernel/genx2apic_uv_x.c: Removed duplicated include Removed duplicated include file in arch/x86/kernel/genx2apic_uv_x.c. Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index a882bc3a2ce..14a9772778e 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 38ffbe66d59051fd9cfcfc8545f164700e2fa3bc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 23 Jul 2008 14:21:18 -0700 Subject: x86/paravirt/xen: properly fill out the ldt ops LTP testing showed that Xen does not properly implement sys_modify_ldt(). This patch does the final little bits needed to make the ldt work properly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/ldt.c | 9 ++++++++- arch/x86/kernel/paravirt.c | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 3fee2aa50f3..4895e0634d2 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -51,6 +51,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, (mincount - oldsize) * LDT_ENTRY_SIZE); + paravirt_alloc_ldt(newldt, mincount); + #ifdef CONFIG_X86_64 /* CHECKME: Do we really need this ? */ wmb(); @@ -75,6 +77,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) #endif } if (oldsize) { + paravirt_free_ldt(oldldt, oldsize); if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(oldldt); else @@ -86,10 +89,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) static inline int copy_ldt(mm_context_t *new, mm_context_t *old) { int err = alloc_ldt(new, old->size, 0); + int i; if (err < 0) return err; - memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); + + for(i = 0; i < old->size; i++) + write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); return 0; } @@ -126,6 +132,7 @@ void destroy_context(struct mm_struct *mm) if (mm == current->active_mm) clear_LDT(); #endif + paravirt_free_ldt(mm->context.ldt, mm->context.size); if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(mm->context.ldt); else diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52d79..d8f2277be5a 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -348,6 +348,10 @@ struct pv_cpu_ops pv_cpu_ops = { .write_ldt_entry = native_write_ldt_entry, .write_gdt_entry = native_write_gdt_entry, .write_idt_entry = native_write_idt_entry, + + .alloc_ldt = paravirt_nop, + .free_ldt = paravirt_nop, + .load_sp0 = native_load_sp0, #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) -- cgit v1.2.3 From d5de8841355a48f7f634a04507185eaf1f9755e3 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 23 Jul 2008 13:28:58 -0700 Subject: x86: split spinlock implementations out into their own files ftrace requires certain low-level code, like spinlocks and timestamps, to be compiled without -pg in order to avoid infinite recursion. This patch splits out the core paravirt spinlocks and the Xen spinlocks into separate files which can be compiled without -pg. Also do xen/time.c while we're about it. As a result, we can now use ftrace within a Xen domain. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 4 ++-- arch/x86/kernel/paravirt-spinlocks.c | 31 +++++++++++++++++++++++++++++++ arch/x86/kernel/paravirt.c | 23 ----------------------- 3 files changed, 33 insertions(+), 25 deletions(-) create mode 100644 arch/x86/kernel/paravirt-spinlocks.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3db651fc8ec..d679cb2c79b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -10,7 +10,7 @@ ifdef CONFIG_FTRACE # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg -CFLAGS_REMOVE_paravirt.o = -pg +CFLAGS_REMOVE_paravirt-spinlocks.o = -pg endif # @@ -89,7 +89,7 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o -obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c new file mode 100644 index 00000000000..38d7f7f1dbc --- /dev/null +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -0,0 +1,31 @@ +/* + * Split spinlock implementation out into its own file, so it can be + * compiled in a FTRACE-compatible way. + */ +#include +#include + +#include + +struct pv_lock_ops pv_lock_ops = { +#ifdef CONFIG_SMP + .spin_is_locked = __ticket_spin_is_locked, + .spin_is_contended = __ticket_spin_is_contended, + + .spin_lock = __ticket_spin_lock, + .spin_trylock = __ticket_spin_trylock, + .spin_unlock = __ticket_spin_unlock, +#endif +}; +EXPORT_SYMBOL_GPL(pv_lock_ops); + +void __init paravirt_use_bytelocks(void) +{ +#ifdef CONFIG_SMP + pv_lock_ops.spin_is_locked = __byte_spin_is_locked; + pv_lock_ops.spin_is_contended = __byte_spin_is_contended; + pv_lock_ops.spin_lock = __byte_spin_lock; + pv_lock_ops.spin_trylock = __byte_spin_trylock; + pv_lock_ops.spin_unlock = __byte_spin_unlock; +#endif +} diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52d79..0d71de9ff56 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -268,17 +268,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return __get_cpu_var(paravirt_lazy_mode); } -void __init paravirt_use_bytelocks(void) -{ -#ifdef CONFIG_SMP - pv_lock_ops.spin_is_locked = __byte_spin_is_locked; - pv_lock_ops.spin_is_contended = __byte_spin_is_contended; - pv_lock_ops.spin_lock = __byte_spin_lock; - pv_lock_ops.spin_trylock = __byte_spin_trylock; - pv_lock_ops.spin_unlock = __byte_spin_unlock; -#endif -} - struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, @@ -461,18 +450,6 @@ struct pv_mmu_ops pv_mmu_ops = { .set_fixmap = native_set_fixmap, }; -struct pv_lock_ops pv_lock_ops = { -#ifdef CONFIG_SMP - .spin_is_locked = __ticket_spin_is_locked, - .spin_is_contended = __ticket_spin_is_contended, - - .spin_lock = __ticket_spin_lock, - .spin_trylock = __ticket_spin_trylock, - .spin_unlock = __ticket_spin_unlock, -#endif -}; -EXPORT_SYMBOL_GPL(pv_lock_ops); - EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); EXPORT_SYMBOL (pv_mmu_ops); -- cgit v1.2.3 From e0a5a5d9b006fd441e61685a051fa85d52fb172c Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 22 Jul 2008 18:14:16 +0200 Subject: x86, 64-bit, dwarf2: push pushes 8 bytes and popf pops 8 The CFI_ADJUST_CFA_OFFSET dwarf2 annotation of a push/popf pair in ret_from_fork wrongly used a value of 4. It should have been 8. Fix that. Signed-off-by: Alexander van Heukelum Cc: Andi Kleen Cc: heukelum@fastmail.fm Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 89434d43960..cf3a0b2d005 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -275,9 +275,9 @@ ENTRY(native_usergs_sysret64) ENTRY(ret_from_fork) CFI_DEFAULT_STACK push kernel_eflags(%rip) - CFI_ADJUST_CFA_OFFSET 4 + CFI_ADJUST_CFA_OFFSET 8 popf # reset kernel eflags - CFI_ADJUST_CFA_OFFSET -4 + CFI_ADJUST_CFA_OFFSET -8 call schedule_tail GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) -- cgit v1.2.3 From 32f71aff77b6470d272f80ac28f43f9601c4d140 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 21 Jul 2008 00:52:49 +0100 Subject: x86: PIC, L-APIC and I/O APIC debug information Dump all the PIC, local APIC and I/O APIC information at the fs_initcall() level, which is after ACPI (if used) has initialised PCI information, making the point of invocation consistent across MP-table and ACPI platforms. Remove explicit calls to print_IO_APIC() from elsewhere. Make the interface of all the functions involved consistent between 32-bit and 64-bit versions and make them all static by default by the means of a New-and-Improved(TM) __apicdebuginit() macro. Note that like print_IO_APIC() all these only output anything if "apic=debug" has been passed to the kernel through the command line. Signed-off-by: Maciej W. Rozycki Cc: Chuck Ebbert Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 29 +++++++++++++++++++---------- arch/x86/kernel/io_apic_64.c | 31 +++++++++++++++++++------------ 2 files changed, 38 insertions(+), 22 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index de9aa0e3a9c..d54455ec985 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -50,6 +50,8 @@ #include #include +#define __apicdebuginit(type) static type __init + int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; @@ -1345,7 +1347,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } -void __init print_IO_APIC(void) + +__apicdebuginit(void) print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1460,9 +1463,7 @@ void __init print_IO_APIC(void) return; } -#if 0 - -static void print_APIC_bitfield(int base) +__apicdebuginit(void) print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1483,7 +1484,7 @@ static void print_APIC_bitfield(int base) } } -void /*__init*/ print_local_APIC(void *dummy) +__apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; @@ -1567,12 +1568,12 @@ void /*__init*/ print_local_APIC(void *dummy) printk("\n"); } -void print_all_local_APICs(void) +__apicdebuginit(void) print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1); } -void /*__init*/ print_PIC(void) +__apicdebuginit(void) print_PIC(void) { unsigned int v; unsigned long flags; @@ -1604,7 +1605,17 @@ void /*__init*/ print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -#endif /* 0 */ +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + static void __init enable_IO_APIC(void) { @@ -2333,8 +2344,6 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); } /* diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 8269434d170..8cdcc4f287c 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -53,6 +53,8 @@ #include #include +#define __apicdebuginit(type) static type __init + struct irq_cfg { cpumask_t domain; cpumask_t old_domain; @@ -87,8 +89,6 @@ int first_system_vector = 0xfe; char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; -#define __apicdebuginit __init - int sis_apic_bug; /* not actually supported, dummy for compile */ static int no_timer_check; @@ -965,7 +965,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } -void __apicdebuginit print_IO_APIC(void) + +__apicdebuginit(void) print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1059,9 +1060,7 @@ void __apicdebuginit print_IO_APIC(void) return; } -#if 0 - -static __apicdebuginit void print_APIC_bitfield (int base) +__apicdebuginit(void) print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1082,7 +1081,7 @@ static __apicdebuginit void print_APIC_bitfield (int base) } } -void __apicdebuginit print_local_APIC(void * dummy) +__apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; @@ -1159,12 +1158,12 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n"); } -void print_all_local_APICs (void) +__apicdebuginit(void) print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1); } -void __apicdebuginit print_PIC(void) +__apicdebuginit(void) print_PIC(void) { unsigned int v; unsigned long flags; @@ -1196,7 +1195,17 @@ void __apicdebuginit print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -#endif /* 0 */ +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + void __init enable_IO_APIC(void) { @@ -1849,8 +1858,6 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); } struct sysfs_ioapic_data { -- cgit v1.2.3 From 510b37258dfd61693ca6c039865c78bd996e3718 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 22 Jul 2008 13:20:22 -0700 Subject: x86: move declaring x2apic_extra_bits it is for uv only Signed-off-by: Yinghai Lu Cc: Jack Steiner Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_phys.c | 2 -- arch/x86/kernel/genx2apic_uv_x.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index f35a3bf3a9e..3229c68aedd 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -10,8 +10,6 @@ #include #include -DEFINE_PER_CPU(int, x2apic_extra_bits); - static int x2apic_phys; static int set_x2apic_phys_mode(char *arg) diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 14a9772778e..169388c4cce 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -26,6 +26,8 @@ #include #include +DEFINE_PER_CPU(int, x2apic_extra_bits); + static enum uv_system_type uv_system_type; static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -- cgit v1.2.3 From 36a028de785c6e6f15ac84f8b3b087b89137ea26 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 24 Jul 2008 13:52:28 +0200 Subject: x86: apic unification - merge down lapic_get_maxlvt No code change on binary level. Signed-off-by: Cyrill Gorcunov Cc: macro@linux-mips.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 8 ++++++-- arch/x86/kernel/apic_64.c | 9 ++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d6c89835837..447dd8c5c0e 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -193,9 +193,13 @@ int get_physical_broadcast(void) */ int lapic_get_maxlvt(void) { - unsigned int v = apic_read(APIC_LVR); + unsigned int v; - /* 82489DXs do not report # of LVT entries. */ + v = apic_read(APIC_LVR); + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7f1f030da7e..4fa2a8620c2 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -158,11 +158,14 @@ void __cpuinit enable_NMI_through_LVT0(void) */ int lapic_get_maxlvt(void) { - unsigned int v, maxlvt; + unsigned int v; v = apic_read(APIC_LVR); - maxlvt = GET_APIC_MAXLVT(v); - return maxlvt; + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ + return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } /* -- cgit v1.2.3 From d4c63ec060f3315653c0ae5bc3a7fe2419a2282f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 24 Jul 2008 13:52:29 +0200 Subject: x86: apic unification - merge down enable_NMI_through_LVT0 No code change on binary level. Signed-off-by: Cyrill Gorcunov Cc: macro@linux-mips.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 8 ++++++-- arch/x86/kernel/apic_64.c | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 447dd8c5c0e..01708f128ee 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -172,11 +172,15 @@ u32 safe_apic_wait_icr_idle(void) */ void __cpuinit enable_NMI_through_LVT0(void) { - unsigned int v = APIC_DM_NMI; + unsigned int v; + + /* unmask and set to NMI */ + v = APIC_DM_NMI; - /* Level triggered for 82489DX */ + /* Level triggered for 82489DX (32bit mode) */ if (!lapic_is_integrated()) v |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT0, v); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4fa2a8620c2..7615b4b9c3f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -150,6 +150,11 @@ void __cpuinit enable_NMI_through_LVT0(void) /* unmask and set to NMI */ v = APIC_DM_NMI; + + /* Level triggered for 82489DX (32bit mode) */ + if (!lapic_is_integrated()) + v |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT0, v); } -- cgit v1.2.3 From 4fe702c7e401f912c0edd294af6e37c02f451bbb Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Fri, 25 Jul 2008 10:19:27 +0530 Subject: X86_32: declare pt_regs_access as unsigned long Fixed pt_regs_access to unsigned long as per X86_64 Signed-off-by: Jaswinder Singh --- arch/x86/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e37dccce85d..fc3e8dcd9da 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -69,7 +69,7 @@ static inline bool invalid_selector(u16 value) #define FLAG_MASK FLAG_MASK_32 -static long *pt_regs_access(struct pt_regs *regs, unsigned long regno) +static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); regno >>= 2; -- cgit v1.2.3 From f86c99853b22576ee8dc4fa27ff6f3c0c7ce0ef8 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Fri, 25 Jul 2008 10:52:53 +0530 Subject: X86_SMP: smpboot.c declare idle_thread_array and smp_b_stepping as static Signed-off-by: Jaswinder Singh --- arch/x86/kernel/smpboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4b53a647bc0..a9331a42f76 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -88,7 +88,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) #else -struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; +static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; #define get_idle_for_cpu(x) (idle_thread_array[(x)]) #define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) #endif @@ -129,7 +129,7 @@ static int boot_cpu_logical_apicid; static cpumask_t cpu_sibling_setup_map; /* Set if we find a B stepping CPU */ -int __cpuinitdata smp_b_stepping; +static int __cpuinitdata smp_b_stepping; #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) -- cgit v1.2.3 From 2907829cd0ffdef69083985ba28cf1cf3857c681 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Fri, 25 Jul 2008 11:05:56 +0530 Subject: X86_SMP: ipi.c declare functions before they get used move upwards for __send_IPI_shortcut and send_IPI_mask_bitmask Signed-off-by: Jaswinder Singh --- arch/x86/kernel/ipi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 3f7537b669d..f1c688e46f3 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -20,6 +20,8 @@ #ifdef CONFIG_X86_32 #include +#include + /* * the following functions deal with sending IPIs between CPUs. * @@ -147,7 +149,6 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) } /* must come after the send_IPI functions above for inlining */ -#include static int convert_apicid_to_cpu(int apic_id) { int i; -- cgit v1.2.3 From 3c9339049df5cc3a468c11de6c4101a1ea8c3d83 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 25 Jul 2008 11:32:36 +0200 Subject: x86: fix ds.c build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/ds.c: In function ‘ds_allocate_buffer': arch/x86/kernel/ds.c:339: error: implicit declaration of function ‘PAGE_ALIGN' Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 24a323c9599..ab21c270bfa 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -29,6 +29,7 @@ #include #include #include +#include /* -- cgit v1.2.3 From 286f571837ba9d67625afd015366d79345abb414 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:08:46 +0200 Subject: x86: apic_*.c: add description to AMD's extended LVT functions Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 +++ arch/x86/kernel/apic_64.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d6c89835837..fad94b0decc 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -646,6 +646,9 @@ int setup_profiling_timer(unsigned int multiplier) * * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and * MCE interrupts are supported. Thus MCE offset must be set to 0. + * + * If mask=1, the LVT entry does not generate interrupts while mask=0 + * enables the vector. See also the BKDGs. */ #define APIC_EILVT_LVTOFF_MCE 0 diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7f1f030da7e..42bf69f8bc8 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -205,6 +205,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) * * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and * MCE interrupts are supported. Thus MCE offset must be set to 0. + * + * If mask=1, the LVT entry does not generate interrupts while mask=0 + * enables the vector. See also the BKDGs. */ #define APIC_EILVT_LVTOFF_MCE 0 -- cgit v1.2.3 From 09691616850b3614dfb44790e1e1419b6a7f5d13 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:09:05 +0200 Subject: x86: apic: export symbols for extended interrupt LVT functions This patch adds EXPORT_SYMBOLs to allow OProfile to be built as module. Cc: Arjan van de Ven Signed-off-by: Robert Richter Cc: oprofile-list Cc: Arjan van de Ven Cc: Barry Kasindorf Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 1 + arch/x86/kernel/apic_64.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index fad94b0decc..ed3a6d7e9de 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -672,6 +672,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); return APIC_EILVT_LVTOFF_IBS; } +EXPORT_SYMBOL(setup_APIC_eilvt_ibs); /* * Local APIC start and shutdown diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 42bf69f8bc8..45ec90e37b9 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -232,6 +232,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); return APIC_EILVT_LVTOFF_IBS; } +EXPORT_SYMBOL(setup_APIC_eilvt_ibs); /* * Program the next event, relative to now -- cgit v1.2.3 From 6aa360e6c16c145edf1837690e0f7aaea6b86ef3 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 23 Jul 2008 15:28:14 +0200 Subject: x86: apic: changing export symbols to *_GPL This fits better here. Signed-off-by: Robert Richter Cc: Arjan van de Ven Cc: Barry Kasindorf Cc: oprofile-list Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 +- arch/x86/kernel/apic_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index ed3a6d7e9de..0059e7a8a9e 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -672,7 +672,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); return APIC_EILVT_LVTOFF_IBS; } -EXPORT_SYMBOL(setup_APIC_eilvt_ibs); +EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); /* * Local APIC start and shutdown diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 45ec90e37b9..e571351f2a9 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -232,7 +232,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); return APIC_EILVT_LVTOFF_IBS; } -EXPORT_SYMBOL(setup_APIC_eilvt_ibs); +EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); /* * Program the next event, relative to now -- cgit v1.2.3 From 1ddb5518052e4e28ab489237443f7443b3fd69ca Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Jul 2008 16:48:55 +0200 Subject: x86: convert pci-dma.c from round_up to roundup Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index cbecb05551b..88ddd04cfa9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -82,7 +82,7 @@ void __init dma32_reserve_bootmem(void) * using 512M as goal */ align = 64ULL<<20; - size = round_up(dma32_bootmem_size, align); + size = roundup(dma32_bootmem_size, align); dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, 512ULL<<20); if (dma32_bootmem_ptr) -- cgit v1.2.3 From 5c05917e7fe313a187ad6ebb94c1c6cf42862a0b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jul 2008 17:29:40 -0700 Subject: x86: usb debug port early console, v4 based on work from Eric, and add some timeout so don't dead loop when debug device is not installed v2: fix checkpatch warning v3: move ehci struct def to linux/usrb/ehci_def.h from host/ehci.h also add CONFIG_EARLY_PRINTK_DBGP to disable it by default v4: address comments from Ingo, seperate ehci reg def moving to another patch also add auto detect port that connect to debug device for Nvidia southbridge Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Andi Kleen Cc: "Arjan van de Ven" Cc: "Eric W. Biederman" Cc: "Greg KH" Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 762 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 759 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index ff9e7350da5..28155acf706 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -3,11 +3,19 @@ #include #include #include +#include +#include +#include +#include #include #include #include #include #include +#include +#include +#include +#include /* Simple VGA output */ #define VGABASE (__ISA_IO_base + 0xb8000) @@ -78,6 +86,7 @@ static int early_serial_base = 0x3f8; /* ttyS0 */ static int early_serial_putc(unsigned char ch) { unsigned timeout = 0xffff; + while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) cpu_relax(); outb(ch, early_serial_base + TXR); @@ -151,6 +160,721 @@ static struct console early_serial_console = { .index = -1, }; +#ifdef CONFIG_EARLY_PRINTK_DBGP + +static struct ehci_caps __iomem *ehci_caps; +static struct ehci_regs __iomem *ehci_regs; +static struct ehci_dbg_port __iomem *ehci_debug; +static unsigned int dbgp_endpoint_out; + +struct ehci_dev { + u32 bus; + u32 slot; + u32 func; +}; + +static struct ehci_dev ehci_dev; + +#define USB_DEBUG_DEVNUM 127 + +#define DBGP_DATA_TOGGLE 0x8800 + +static inline u32 dbgp_pid_update(u32 x, u32 tok) +{ + return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff); +} + +static inline u32 dbgp_len_update(u32 x, u32 len) +{ + return (x & ~0x0f) | (len & 0x0f); +} + +/* + * USB Packet IDs (PIDs) + */ + +/* token */ +#define USB_PID_OUT 0xe1 +#define USB_PID_IN 0x69 +#define USB_PID_SOF 0xa5 +#define USB_PID_SETUP 0x2d +/* handshake */ +#define USB_PID_ACK 0xd2 +#define USB_PID_NAK 0x5a +#define USB_PID_STALL 0x1e +#define USB_PID_NYET 0x96 +/* data */ +#define USB_PID_DATA0 0xc3 +#define USB_PID_DATA1 0x4b +#define USB_PID_DATA2 0x87 +#define USB_PID_MDATA 0x0f +/* Special */ +#define USB_PID_PREAMBLE 0x3c +#define USB_PID_ERR 0x3c +#define USB_PID_SPLIT 0x78 +#define USB_PID_PING 0xb4 +#define USB_PID_UNDEF_0 0xf0 + +#define USB_PID_DATA_TOGGLE 0x88 +#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE) + +#define PCI_CAP_ID_EHCI_DEBUG 0xa + +#define HUB_ROOT_RESET_TIME 50 /* times are in msec */ +#define HUB_SHORT_RESET_TIME 10 +#define HUB_LONG_RESET_TIME 200 +#define HUB_RESET_TIMEOUT 500 + +#define DBGP_MAX_PACKET 8 + +static int dbgp_wait_until_complete(void) +{ + u32 ctrl; + int loop = 0x100000; + + do { + ctrl = readl(&ehci_debug->control); + /* Stop when the transaction is finished */ + if (ctrl & DBGP_DONE) + break; + } while (--loop > 0); + + if (!loop) + return -1; + + /* + * Now that we have observed the completed transaction, + * clear the done bit. + */ + writel(ctrl | DBGP_DONE, &ehci_debug->control); + return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl); +} + +static void dbgp_mdelay(int ms) +{ + int i; + + while (ms--) { + for (i = 0; i < 1000; i++) + outb(0x1, 0x80); + } +} + +static void dbgp_breath(void) +{ + /* Sleep to give the debug port a chance to breathe */ +} + +static int dbgp_wait_until_done(unsigned ctrl) +{ + u32 pids, lpid; + int ret; + int loop = 3; + +retry: + writel(ctrl | DBGP_GO, &ehci_debug->control); + ret = dbgp_wait_until_complete(); + pids = readl(&ehci_debug->pids); + lpid = DBGP_PID_GET(pids); + + if (ret < 0) + return ret; + + /* + * If the port is getting full or it has dropped data + * start pacing ourselves, not necessary but it's friendly. + */ + if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET)) + dbgp_breath(); + + /* If I get a NACK reissue the transmission */ + if (lpid == USB_PID_NAK) { + if (--loop > 0) + goto retry; + } + + return ret; +} + +static void dbgp_set_data(const void *buf, int size) +{ + const unsigned char *bytes = buf; + u32 lo, hi; + int i; + + lo = hi = 0; + for (i = 0; i < 4 && i < size; i++) + lo |= bytes[i] << (8*i); + for (; i < 8 && i < size; i++) + hi |= bytes[i] << (8*(i - 4)); + writel(lo, &ehci_debug->data03); + writel(hi, &ehci_debug->data47); +} + +static void dbgp_get_data(void *buf, int size) +{ + unsigned char *bytes = buf; + u32 lo, hi; + int i; + + lo = readl(&ehci_debug->data03); + hi = readl(&ehci_debug->data47); + for (i = 0; i < 4 && i < size; i++) + bytes[i] = (lo >> (8*i)) & 0xff; + for (; i < 8 && i < size; i++) + bytes[i] = (hi >> (8*(i - 4))) & 0xff; +} + +static int dbgp_bulk_write(unsigned devnum, unsigned endpoint, + const char *bytes, int size) +{ + u32 pids, addr, ctrl; + int ret; + + if (size > DBGP_MAX_PACKET) + return -1; + + addr = DBGP_EPADDR(devnum, endpoint); + + pids = readl(&ehci_debug->pids); + pids = dbgp_pid_update(pids, USB_PID_OUT); + + ctrl = readl(&ehci_debug->control); + ctrl = dbgp_len_update(ctrl, size); + ctrl |= DBGP_OUT; + ctrl |= DBGP_GO; + + dbgp_set_data(bytes, size); + writel(addr, &ehci_debug->address); + writel(pids, &ehci_debug->pids); + + ret = dbgp_wait_until_done(ctrl); + if (ret < 0) + return ret; + + return ret; +} + +static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, + int size) +{ + u32 pids, addr, ctrl; + int ret; + + if (size > DBGP_MAX_PACKET) + return -1; + + addr = DBGP_EPADDR(devnum, endpoint); + + pids = readl(&ehci_debug->pids); + pids = dbgp_pid_update(pids, USB_PID_IN); + + ctrl = readl(&ehci_debug->control); + ctrl = dbgp_len_update(ctrl, size); + ctrl &= ~DBGP_OUT; + ctrl |= DBGP_GO; + + writel(addr, &ehci_debug->address); + writel(pids, &ehci_debug->pids); + ret = dbgp_wait_until_done(ctrl); + if (ret < 0) + return ret; + + if (size > ret) + size = ret; + dbgp_get_data(data, size); + return ret; +} + +static int dbgp_control_msg(unsigned devnum, int requesttype, int request, + int value, int index, void *data, int size) +{ + u32 pids, addr, ctrl; + struct usb_ctrlrequest req; + int read; + int ret; + + read = (requesttype & USB_DIR_IN) != 0; + if (size > (read ? DBGP_MAX_PACKET:0)) + return -1; + + /* Compute the control message */ + req.bRequestType = requesttype; + req.bRequest = request; + req.wValue = value; + req.wIndex = index; + req.wLength = size; + + pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP); + addr = DBGP_EPADDR(devnum, 0); + + ctrl = readl(&ehci_debug->control); + ctrl = dbgp_len_update(ctrl, sizeof(req)); + ctrl |= DBGP_OUT; + ctrl |= DBGP_GO; + + /* Send the setup message */ + dbgp_set_data(&req, sizeof(req)); + writel(addr, &ehci_debug->address); + writel(pids, &ehci_debug->pids); + ret = dbgp_wait_until_done(ctrl); + if (ret < 0) + return ret; + + /* Read the result */ + return dbgp_bulk_read(devnum, 0, data, size); +} + + +/* Find a PCI capability */ +static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap) +{ + u8 pos; + int bytes; + + if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & + PCI_STATUS_CAP_LIST)) + return 0; + + pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); + for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { + u8 id; + + pos &= ~3; + id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); + if (id == 0xff) + break; + if (id == cap) + return pos; + + pos = read_pci_config_byte(num, slot, func, + pos+PCI_CAP_LIST_NEXT); + } + return 0; +} + +static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func) +{ + u32 class; + + class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); + if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI) + return 0; + + return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG); +} + +static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc) +{ + u32 bus, slot, func; + + for (bus = 0; bus < 256; bus++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { + unsigned cap; + + cap = __find_dbgp(bus, slot, func); + + if (!cap) + continue; + if (ehci_num-- != 0) + continue; + *rbus = bus; + *rslot = slot; + *rfunc = func; + return cap; + } + } + } + return 0; +} + +static int ehci_reset_port(int port) +{ + u32 portsc; + u32 delay_time, delay; + int loop; + + /* Reset the usb debug port */ + portsc = readl(&ehci_regs->port_status[port - 1]); + portsc &= ~PORT_PE; + portsc |= PORT_RESET; + writel(portsc, &ehci_regs->port_status[port - 1]); + + delay = HUB_ROOT_RESET_TIME; + for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT; + delay_time += delay) { + dbgp_mdelay(delay); + + portsc = readl(&ehci_regs->port_status[port - 1]); + if (portsc & PORT_RESET) { + /* force reset to complete */ + loop = 2; + writel(portsc & ~(PORT_RWC_BITS | PORT_RESET), + &ehci_regs->port_status[port - 1]); + do { + portsc = readl(&ehci_regs->port_status[port-1]); + } while ((portsc & PORT_RESET) && (--loop > 0)); + } + + /* Device went away? */ + if (!(portsc & PORT_CONNECT)) + return -ENOTCONN; + + /* bomb out completely if something weird happend */ + if ((portsc & PORT_CSC)) + return -EINVAL; + + /* If we've finished resetting, then break out of the loop */ + if (!(portsc & PORT_RESET) && (portsc & PORT_PE)) + return 0; + } + return -EBUSY; +} + +static int ehci_wait_for_port(int port) +{ + u32 status; + int ret, reps; + + for (reps = 0; reps < 3; reps++) { + dbgp_mdelay(100); + status = readl(&ehci_regs->status); + if (status & STS_PCD) { + ret = ehci_reset_port(port); + if (ret == 0) + return 0; + } + } + return -ENOTCONN; +} + +#ifdef DBGP_DEBUG +# define dbgp_printk early_printk +#else +static inline void dbgp_printk(const char *fmt, ...) { } +#endif + +typedef void (*set_debug_port_t)(int port); + +static void default_set_debug_port(int port) +{ +} + +static set_debug_port_t set_debug_port = default_set_debug_port; + +static void nvidia_set_debug_port(int port) +{ + u32 dword; + dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, + 0x74); + dword &= ~(0x0f<<12); + dword |= ((port & 0x0f)<<12); + write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74, + dword); + dbgp_printk("set debug port to %d\n", port); +} + +static void __init detect_set_debug_port(void) +{ + u32 vendorid; + + vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, + 0x00); + + if ((vendorid & 0xffff) == 0x10de) { + dbgp_printk("using nvidia set_debug_port\n"); + set_debug_port = nvidia_set_debug_port; + } +} + +static int __init ehci_setup(void) +{ + struct usb_debug_descriptor dbgp_desc; + u32 cmd, ctrl, status, portsc, hcs_params; + u32 debug_port, new_debug_port = 0, n_ports; + u32 devnum; + int ret, i; + int loop; + int port_map_tried; + int playtimes = 3; + +try_next_time: + port_map_tried = 0; + +try_next_port: + + hcs_params = readl(&ehci_caps->hcs_params); + debug_port = HCS_DEBUG_PORT(hcs_params); + n_ports = HCS_N_PORTS(hcs_params); + + dbgp_printk("debug_port: %d\n", debug_port); + dbgp_printk("n_ports: %d\n", n_ports); + + for (i = 1; i <= n_ports; i++) { + portsc = readl(&ehci_regs->port_status[i-1]); + dbgp_printk("portstatus%d: %08x\n", i, portsc); + } + + if (port_map_tried && (new_debug_port != debug_port)) { + if (--playtimes) { + set_debug_port(new_debug_port); + goto try_next_time; + } + return -1; + } + + loop = 10; + /* Reset the EHCI controller */ + cmd = readl(&ehci_regs->command); + cmd |= CMD_RESET; + writel(cmd, &ehci_regs->command); + do { + cmd = readl(&ehci_regs->command); + } while ((cmd & CMD_RESET) && (--loop > 0)); + + if (!loop) { + dbgp_printk("can not reset ehci\n"); + return -1; + } + dbgp_printk("ehci reset done\n"); + + /* Claim ownership, but do not enable yet */ + ctrl = readl(&ehci_debug->control); + ctrl |= DBGP_OWNER; + ctrl &= ~(DBGP_ENABLED | DBGP_INUSE); + writel(ctrl, &ehci_debug->control); + + /* Start the ehci running */ + cmd = readl(&ehci_regs->command); + cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET); + cmd |= CMD_RUN; + writel(cmd, &ehci_regs->command); + + /* Ensure everything is routed to the EHCI */ + writel(FLAG_CF, &ehci_regs->configured_flag); + + /* Wait until the controller is no longer halted */ + loop = 10; + do { + status = readl(&ehci_regs->status); + } while ((status & STS_HALT) && (--loop > 0)); + + if (!loop) { + dbgp_printk("ehci can be started\n"); + return -1; + } + dbgp_printk("ehci started\n"); + + /* Wait for a device to show up in the debug port */ + ret = ehci_wait_for_port(debug_port); + if (ret < 0) { + dbgp_printk("No device found in debug port\n"); + goto next_debug_port; + } + dbgp_printk("ehci wait for port done\n"); + + /* Enable the debug port */ + ctrl = readl(&ehci_debug->control); + ctrl |= DBGP_CLAIM; + writel(ctrl, &ehci_debug->control); + ctrl = readl(&ehci_debug->control); + if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) { + dbgp_printk("No device in debug port\n"); + writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control); + goto err; + } + dbgp_printk("debug ported enabled\n"); + + /* Completely transfer the debug device to the debug controller */ + portsc = readl(&ehci_regs->port_status[debug_port - 1]); + portsc &= ~PORT_PE; + writel(portsc, &ehci_regs->port_status[debug_port - 1]); + + dbgp_mdelay(100); + + /* Find the debug device and make it device number 127 */ + for (devnum = 0; devnum <= 127; devnum++) { + ret = dbgp_control_msg(devnum, + USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE, + USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0, + &dbgp_desc, sizeof(dbgp_desc)); + if (ret > 0) + break; + } + if (devnum > 127) { + dbgp_printk("Could not find attached debug device\n"); + goto err; + } + if (ret < 0) { + dbgp_printk("Attached device is not a debug device\n"); + goto err; + } + dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint; + + /* Move the device to 127 if it isn't already there */ + if (devnum != USB_DEBUG_DEVNUM) { + ret = dbgp_control_msg(devnum, + USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, + USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0); + if (ret < 0) { + dbgp_printk("Could not move attached device to %d\n", + USB_DEBUG_DEVNUM); + goto err; + } + devnum = USB_DEBUG_DEVNUM; + dbgp_printk("debug device renamed to 127\n"); + } + + /* Enable the debug interface */ + ret = dbgp_control_msg(USB_DEBUG_DEVNUM, + USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, + USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0); + if (ret < 0) { + dbgp_printk(" Could not enable the debug device\n"); + goto err; + } + dbgp_printk("debug interface enabled\n"); + + /* Perform a small write to get the even/odd data state in sync + */ + ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1); + if (ret < 0) { + dbgp_printk("dbgp_bulk_write failed: %d\n", ret); + goto err; + } + dbgp_printk("small write doned\n"); + + return 0; +err: + /* Things didn't work so remove my claim */ + ctrl = readl(&ehci_debug->control); + ctrl &= ~(DBGP_CLAIM | DBGP_OUT); + writel(ctrl, &ehci_debug->control); + return -1; + +next_debug_port: + port_map_tried |= (1<<(debug_port - 1)); + new_debug_port = ((debug_port-1+1)%n_ports) + 1; + if (port_map_tried != ((1<> 29) & 0x7; + bar = (bar * 4) + 0xc; + offset = (debug_port >> 16) & 0xfff; + dbgp_printk("bar: %02x offset: %03x\n", bar, offset); + if (bar != PCI_BASE_ADDRESS_0) { + dbgp_printk("only debug ports on bar 1 handled.\n"); + + return -1; + } + + bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0); + dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset); + if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) { + dbgp_printk("only simple 32bit mmio bars supported\n"); + + return -1; + } + + /* double check if the mem space is enabled */ + byte = read_pci_config_byte(bus, slot, func, 0x04); + if (!(byte & 0x2)) { + byte |= 0x02; + write_pci_config_byte(bus, slot, func, 0x04, byte); + dbgp_printk("mmio for ehci enabled\n"); + } + + /* + * FIXME I don't have the bar size so just guess PAGE_SIZE is more + * than enough. 1K is the biggest I have seen. + */ + set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK); + ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE); + ehci_bar += bar_val & ~PAGE_MASK; + dbgp_printk("ehci_bar: %p\n", ehci_bar); + + ehci_caps = ehci_bar; + ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase)); + ehci_debug = ehci_bar + offset; + ehci_dev.bus = bus; + ehci_dev.slot = slot; + ehci_dev.func = func; + + detect_set_debug_port(); + + ret = ehci_setup(); + if (ret < 0) { + dbgp_printk("ehci_setup failed\n"); + ehci_debug = 0; + + return -1; + } + + return 0; +} + +static void early_dbgp_write(struct console *con, const char *str, u32 n) +{ + int chunk, ret; + + if (!ehci_debug) + return; + while (n > 0) { + chunk = n; + if (chunk > DBGP_MAX_PACKET) + chunk = DBGP_MAX_PACKET; + ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, + dbgp_endpoint_out, str, chunk); + str += chunk; + n -= chunk; + } +} + +static struct console early_dbgp_console = { + .name = "earlydbg", + .write = early_dbgp_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; +#endif + /* Console interface to a host file on AMD's SimNow! */ static int simnow_fd; @@ -165,6 +889,7 @@ enum { static noinline long simnow(long cmd, long a, long b, long c) { long ret; + asm volatile("cpuid" : "=a" (ret) : "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); @@ -174,6 +899,7 @@ static noinline long simnow(long cmd, long a, long b, long c) static void __init simnow_init(char *str) { char *fn = "klog"; + if (*str == '=') fn = ++str; /* error ignored */ @@ -208,10 +934,11 @@ asmlinkage void early_printk(const char *fmt, ...) va_end(ap); } -static int __initdata keep_early; static int __init setup_early_printk(char *buf) { + int keep_early; + if (!buf) return 0; @@ -219,8 +946,7 @@ static int __init setup_early_printk(char *buf) return 0; early_console_initialized = 1; - if (strstr(buf, "keep")) - keep_early = 1; + keep_early = (strstr(buf, "keep") != NULL); if (!strncmp(buf, "serial", 6)) { early_serial_init(buf + 6); @@ -238,6 +964,17 @@ static int __init setup_early_printk(char *buf) simnow_init(buf + 6); early_console = &simnow_console; keep_early = 1; +#ifdef CONFIG_EARLY_PRINTK_DBGP + } else if (!strncmp(buf, "dbgp", 4)) { + if (early_dbgp_init(buf+4) < 0) + return 0; + early_console = &early_dbgp_console; + /* + * usb subsys will reset ehci controller, so don't keep + * that early console + */ + keep_early = 0; +#endif #ifdef CONFIG_HVC_XEN } else if (!strncmp(buf, "xen", 3)) { early_console = &xenboot_console; @@ -251,4 +988,23 @@ static int __init setup_early_printk(char *buf) register_console(early_console); return 0; } + +void __init enable_debug_console(char *buf) +{ +#ifdef DBGP_DEBUG + struct console *old_early_console = NULL; + + if (early_console_initialized && early_console) { + old_early_console = early_console; + unregister_console(early_console); + early_console_initialized = 0; + } + + setup_early_printk(buf); + + if (early_console == old_early_console && old_early_console) + register_console(old_early_console); +#endif +} + early_param("earlyprintk", setup_early_printk); -- cgit v1.2.3 From a4dbc34d181e87a0d724dee365921e9251f831d4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:14:28 -0700 Subject: x86: add setup_ioapic_ids for numaq in x86_quirks Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 5 ++--- arch/x86/kernel/numaq_32.c | 7 +++++++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 98e4db5373f..72ba06314c7 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -1728,10 +1729,8 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned char old_id; unsigned long flags; -#ifdef CONFIG_X86_NUMAQ - if (found_numaq) + if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) return; -#endif /* * Don't check I/O APIC IDs for xAPIC systems. They have diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index b8c45610b20..2434467ddf7 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, } } +static int __init numaq_setup_ioapic_ids(void) +{ + /* so can skip it */ + return 1; +} + static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, @@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .mpc_oem_bus_info = mpc_oem_bus_info, .mpc_oem_pci_bus = mpc_oem_pci_bus, .smp_read_mpc_oem = smp_read_mpc_oem, + .setup_ioapic_ids = numaq_setup_ioapic_ids, }; void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, -- cgit v1.2.3 From e8c48efdb971cfb1b043076cf68be535d461a20b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:17:44 -0700 Subject: x86: mach_summit to summit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/summit_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index d67ce5f044b..7b987852e87 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; -- cgit v1.2.3 From 39eacc20f93614f7bab63eb1d45060503afc46d0 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Fri, 25 Jul 2008 23:30:13 +0800 Subject: arch/x86/kernel/visws_quirks.c: Removed duplicated #include Removed duplicated #include in arch/x86/kernel/visws_quirks.c. asm/apic.h asm/arch_hooks.h asm/io.h asm/visws/cobalt.h asm/visws/lithium.h asm/visws/piix4.h linux/init.h linux/interrupt.h linux/smp.h Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar --- arch/x86/kernel/visws_quirks.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 41e01b145c4..0c75691bcf5 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -29,41 +29,26 @@ #include #include #include -#include #include #include #include "mach_apic.h" -#include -#include - #include -#include -#include -#include -#include #include #include -#include #include -#include #include #include -#include #include #include extern int no_broadcast; -#include #include -#include -#include -#include char visws_board_type = -1; char visws_board_rev = -1; -- cgit v1.2.3 From 3964cd3a6721f18ef1dd67b9a0a89dc5b36683b9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 26 Jul 2008 19:35:20 +0200 Subject: x86: visws_quirks, fix build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/visws_quirks.c: In function ‘visws_early_detect’: arch/x86/kernel/visws_quirks.c:290: error: ‘skip_ioapic_setup’ undeclared (first use in this function) arch/x86/kernel/visws_quirks.c:290: error: (Each undeclared identifier is reported only once arch/x86/kernel/visws_quirks.c:290: error: for each function it appears in.) Signed-off-by: Ingo Molnar --- arch/x86/kernel/visws_quirks.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 0c75691bcf5..3059eb45a91 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 36a033082b5243d45d508c5ccd47a754edbc6821 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 14 Mar 2008 17:46:38 -0700 Subject: x86: tracehook_signal_handler This makes the x86 signal handling code use tracehook_signal_handler() in place of calling into ptrace guts. The call is moved after the sa_mask processing, but there is no other change. This cleanup doesn't matter to existing debuggers, but is the sensible thing: have all facets of the handler setup complete before the debugger inspects the task again. Signed-off-by: Roland McGrath --- arch/x86/kernel/signal_32.c | 6 ++++-- arch/x86/kernel/signal_64.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 6fb5bcdd893..22aae1683c1 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -558,8 +559,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, * handler too. */ regs->flags &= ~X86_EFLAGS_TF; - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); @@ -568,6 +567,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + tracehook_signal_handler(sig, info, ka, regs, + test_thread_flag(TIF_SINGLESTEP)); + return 0; } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index b45ef8ddd65..3beb2db88c5 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -444,8 +445,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, * handler too. */ regs->flags &= ~X86_EFLAGS_TF; - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); @@ -453,6 +452,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + + tracehook_signal_handler(sig, info, ka, regs, + test_thread_flag(TIF_SINGLESTEP)); } return ret; -- cgit v1.2.3 From eeea3c3ff8af7f6960a0515d46dff6479bdb91f9 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 16 Mar 2008 23:36:28 -0700 Subject: x86: tracehook syscall This changes x86 syscall tracing to use the new tracehook.h entry points. There is no change, only cleanup. Signed-off-by: Roland McGrath --- arch/x86/kernel/ptrace.c | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e37dccce85d..19a7d2c4056 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1375,30 +1376,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) force_sig_info(SIGTRAP, &info, tsk); } -static void syscall_trace(struct pt_regs *regs) -{ - if (!(current->ptrace & PT_PTRACED)) - return; - -#if 0 - printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", - current->comm, - regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0), - current_thread_info()->flags, current->ptrace); -#endif - - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } -} #ifdef CONFIG_X86_32 # define IS_IA32 1 @@ -1432,8 +1409,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) ret = -1L; - if (ret || test_thread_flag(TIF_SYSCALL_TRACE)) - syscall_trace(regs); + if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && + tracehook_report_syscall_entry(regs)) + ret = -1L; if (unlikely(current->audit_context)) { if (IS_IA32) @@ -1459,7 +1437,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); if (test_thread_flag(TIF_SYSCALL_TRACE)) - syscall_trace(regs); + tracehook_report_syscall_exit(regs, 0); /* * If TIF_SYSCALL_EMU is set, we only get here because of @@ -1475,6 +1453,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) * system call instruction. */ if (test_thread_flag(TIF_SINGLESTEP) && - (current->ptrace & PT_PTRACED)) + tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL)) send_sigtrap(current, regs, 0); } -- cgit v1.2.3 From 4dfcbb997aa9f3a6a3ed8c192f0dac28b027e08f Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sat, 19 Apr 2008 15:37:09 -0700 Subject: x86 signals: use asm/syscall.h Replace local inlines with the asm/syscall.h interfaces that do the same things. Signed-off-by: Roland McGrath --- arch/x86/kernel/signal_64.c | 38 +++++--------------------------------- 1 file changed, 5 insertions(+), 33 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 3beb2db88c5..cb7cf0216ab 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "sigframe.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -346,35 +347,6 @@ give_sigsegv: return -EFAULT; } -/* - * Return -1L or the syscall number that @regs is executing. - */ -static long current_syscall(struct pt_regs *regs) -{ - /* - * We always sign-extend a -1 value being set here, - * so this is always either -1L or a syscall number. - */ - return regs->orig_ax; -} - -/* - * Return a value that is -EFOO if the system call in @regs->orig_ax - * returned an error. This only works for @regs from @current. - */ -static long current_syscall_ret(struct pt_regs *regs) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) - /* - * Sign-extend the value so (int)-EFOO becomes (long)-EFOO - * and will match correctly in comparisons. - */ - return (int) regs->ax; -#endif - return regs->ax; -} - /* * OK, we're invoking a handler */ @@ -386,9 +358,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, int ret; /* Are we from a system call? */ - if (current_syscall(regs) >= 0) { + if (syscall_get_nr(current, regs) >= 0) { /* If so, check system call restarting.. */ - switch (current_syscall_ret(regs)) { + switch (syscall_get_error(current, regs)) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: regs->ax = -EINTR; @@ -511,9 +483,9 @@ static void do_signal(struct pt_regs *regs) } /* Did we come from a system call? */ - if (current_syscall(regs) >= 0) { + if (syscall_get_nr(current, regs) >= 0) { /* Restart the system call - no handlers present */ - switch (current_syscall_ret(regs)) { + switch (syscall_get_error(current, regs)) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: -- cgit v1.2.3 From 59e52130f04537d2c80ea44bb007cadd1ad29543 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sat, 19 Apr 2008 19:10:57 -0700 Subject: x86: tracehook: TIF_NOTIFY_RESUME This adds TIF_NOTIFY_RESUME support for x86, both 64-bit and 32-bit. When set, we call tracehook_notify_resume() on the way to user mode. Signed-off-by: Roland McGrath --- arch/x86/kernel/signal_32.c | 5 +++++ arch/x86/kernel/signal_64.c | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 22aae1683c1..4445d26efd4 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -663,5 +663,10 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + clear_thread_flag(TIF_IRET); } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index cb7cf0216ab..d01e3f6ef26 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -523,6 +523,11 @@ void do_notify_resume(struct pt_regs *regs, void *unused, /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) -- cgit v1.2.3 From d25ae38b7e005af03843833bbd811ffe8c5f8cb4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 19:39:03 -0700 Subject: x86: add apic probe for genapic 64bit - fix intr_remapping_enabled get assigned later, so need to check that in setup_apic_routing Signed-off-by: Yinghai Lu Cc: Jack Steiner Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_64.c | 6 ++++++ arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index b3ba969c50d..6c9bfc9e1e9 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,11 @@ static struct genapic *apic_probe[] __initdata = { */ void __init setup_apic_routing(void) { + if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { + if (!intr_remapping_enabled) + genapic = &apic_flat; + } + if (genapic == &apic_flat) { if (max_physical_apicid >= 8) genapic = &apic_physflat; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index ef3f3182d50..fed9f68efd6 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -14,7 +14,7 @@ DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (cpu_has_x2apic && intr_remapping_enabled) + if (cpu_has_x2apic) return 1; return 0; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3229c68aedd..958d537b4cc 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -21,7 +21,7 @@ early_param("x2apic_phys", set_x2apic_phys_mode); static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (cpu_has_x2apic && intr_remapping_enabled && x2apic_phys) + if (cpu_has_x2apic && x2apic_phys) return 1; return 0; -- cgit v1.2.3 From 8cb22bcb1f3ef70d4d48092e9b057175ad9ec78d Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Fri, 18 Jul 2008 16:03:52 -0500 Subject: x86: L3 cache index disable for 2.6.26 New versions of AMD processors have support to disable parts of their L3 caches if too many MCEs are generated by the L3 cache. This patch provides a /sysfs interface under the cache hierarchy to display which caches indices are disabled (if any) and to monitoring applications to disable a cache index. This patch does not set an automatic policy to disable the L3 cache. Policy decisions would need to be made by a RAS handler. This patch merely makes it easier to see what indices are currently disabled. Signed-off-by: Mark Langsdorf Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 87 ++++++++++++++++++++++++++++++++--- 1 file changed, 81 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index ff517f0b8cc..d6ea50e270e 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -16,6 +16,7 @@ #include #include +#include #define LVL_1_INST 1 #define LVL_1_DATA 2 @@ -130,6 +131,7 @@ struct _cpuid4_info { union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; unsigned long size; + unsigned long can_disable; cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; @@ -251,6 +253,13 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } +static void __cpuinit amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +{ + if (index < 3) + return; + this_leaf->can_disable = 1; +} + static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { union _cpuid4_leaf_eax eax; @@ -258,9 +267,12 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le union _cpuid4_leaf_ecx ecx; unsigned edx; - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { amd_cpuid4(index, &eax, &ebx, &ecx); - else + if (boot_cpu_data.x86 >= 0x10) + amd_check_l3_disable(index, this_leaf); + + } else cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); if (eax.split.type == CACHE_TYPE_NULL) return -EIO; /* better error ? */ @@ -637,6 +649,61 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { } } +#define to_object(k) container_of(k, struct _index_kobject, kobj) +#define to_attr(a) container_of(a, struct _cache_attr, attr) + +static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) +{ + struct pci_dev *dev; + if (this_leaf->can_disable) { + int i; + ssize_t ret = 0; + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + dev = k8_northbridges[node]; + + for (i = 0; i < 2; i++) { + unsigned int reg; + pci_read_config_dword(dev, 0x1BC + i * 4, ®); + ret += sprintf(buf, "%sEntry: %d\n", buf, i); + ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", + buf, + reg & 0x80000000 ? "Disabled" : "Allowed", + reg & 0x40000000 ? "Disabled" : "Allowed"); + ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", buf, + (reg & 0x30000) >> 16, reg & 0xfff); + + } + return ret; + } + return sprintf(buf, "Feature not enabled\n"); +} + +static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) +{ + struct pci_dev *dev; + if (this_leaf->can_disable) { + /* write the MSR value */ + unsigned int ret; + unsigned int index, val; + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + dev = k8_northbridges[node]; + + if (strlen(buf) > 15) + return -EINVAL; + ret = sscanf(buf, "%x %x", &index, &val); + if (ret != 2) + return -EINVAL; + if (index > 1) + return -EINVAL; + val |= 0xc0000000; + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); + wbinvd(); + pci_write_config_dword(dev, 0x1BC + index * 4, val); + return 1; + } + return 0; +} + struct _cache_attr { struct attribute attr; ssize_t (*show)(struct _cpuid4_info *, char *); @@ -657,6 +724,8 @@ define_one_ro(size); define_one_ro(shared_cpu_map); define_one_ro(shared_cpu_list); +static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable); + static struct attribute * default_attrs[] = { &type.attr, &level.attr, @@ -667,12 +736,10 @@ static struct attribute * default_attrs[] = { &size.attr, &shared_cpu_map.attr, &shared_cpu_list.attr, + &cache_disable.attr, NULL }; -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) - static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) { struct _cache_attr *fattr = to_attr(attr); @@ -689,7 +756,15 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) static ssize_t store(struct kobject * kobj, struct attribute * attr, const char * buf, size_t count) { - return 0; + struct _cache_attr *fattr = to_attr(attr); + struct _index_kobject *this_leaf = to_object(kobj); + ssize_t ret; + + ret = fattr->store ? + fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), + buf, count) : + 0; + return ret; } static struct sysfs_ops sysfs_ops = { -- cgit v1.2.3 From 7a4983bb5f94f6521aa3236fe5c035cf9bef543f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Jul 2008 13:34:21 +0200 Subject: x86: L3 cache index disable for 2.6.26, cleanups No change in functionality. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 115 ++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 54 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index d6ea50e270e..a61c9e399ba 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -253,14 +253,16 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } -static void __cpuinit amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +static void __cpuinit +amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) { if (index < 3) return; this_leaf->can_disable = 1; } -static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +static int +__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; @@ -271,19 +273,20 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le amd_cpuid4(index, &eax, &ebx, &ecx); if (boot_cpu_data.x86 >= 0x10) amd_check_l3_disable(index, this_leaf); - - } else - cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + } else { + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + } + if (eax.split.type == CACHE_TYPE_NULL) return -EIO; /* better error ? */ this_leaf->eax = eax; this_leaf->ebx = ebx; this_leaf->ecx = ecx; - this_leaf->size = (ecx.split.number_of_sets + 1) * - (ebx.split.coherency_line_size + 1) * - (ebx.split.physical_line_partition + 1) * - (ebx.split.ways_of_associativity + 1); + this_leaf->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); return 0; } @@ -649,59 +652,63 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { } } -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) +#define to_object(k) container_of(k, struct _index_kobject, kobj) +#define to_attr(a) container_of(a, struct _cache_attr, attr) static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { - struct pci_dev *dev; - if (this_leaf->can_disable) { - int i; - ssize_t ret = 0; - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - dev = k8_northbridges[node]; - - for (i = 0; i < 2; i++) { - unsigned int reg; - pci_read_config_dword(dev, 0x1BC + i * 4, ®); - ret += sprintf(buf, "%sEntry: %d\n", buf, i); - ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", - buf, - reg & 0x80000000 ? "Disabled" : "Allowed", - reg & 0x40000000 ? "Disabled" : "Allowed"); - ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", buf, - (reg & 0x30000) >> 16, reg & 0xfff); + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + struct pci_dev *dev = k8_northbridges[node]; + ssize_t ret = 0; + int i; - } - return ret; + if (!this_leaf->can_disable) + return sprintf(buf, "Feature not enabled\n"); + + for (i = 0; i < 2; i++) { + unsigned int reg; + + pci_read_config_dword(dev, 0x1BC + i * 4, ®); + + ret += sprintf(buf, "%sEntry: %d\n", buf, i); + ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", + buf, + reg & 0x80000000 ? "Disabled" : "Allowed", + reg & 0x40000000 ? "Disabled" : "Allowed"); + ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", + buf, (reg & 0x30000) >> 16, reg & 0xfff); } - return sprintf(buf, "Feature not enabled\n"); + return ret; } -static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) +static ssize_t +store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, + size_t count) { - struct pci_dev *dev; - if (this_leaf->can_disable) { - /* write the MSR value */ - unsigned int ret; - unsigned int index, val; - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - dev = k8_northbridges[node]; - - if (strlen(buf) > 15) - return -EINVAL; - ret = sscanf(buf, "%x %x", &index, &val); - if (ret != 2) - return -EINVAL; - if (index > 1) - return -EINVAL; - val |= 0xc0000000; - pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); - wbinvd(); - pci_write_config_dword(dev, 0x1BC + index * 4, val); - return 1; - } - return 0; + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + struct pci_dev *dev = k8_northbridges[node]; + unsigned int ret, index, val; + + if (!this_leaf->can_disable) + return 0; + + /* write the MSR value */ + + if (strlen(buf) > 15) + return -EINVAL; + + ret = sscanf(buf, "%x %x", &index, &val); + if (ret != 2) + return -EINVAL; + if (index > 1) + return -EINVAL; + + val |= 0xc0000000; + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); + wbinvd(); + pci_write_config_dword(dev, 0x1BC + index * 4, val); + + return 1; } struct _cache_attr { -- cgit v1.2.3 From a24e8d36f5fc047dac9af6200322ed393f2e3175 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 22 Jul 2008 13:06:02 -0500 Subject: x86: L3 cache index disable for 2.6.26 On Monday 21 July 2008, Ingo Molnar wrote: > > applied to tip/x86/cpu, thanks Mark. > > > > I've done some coding style fixes for the new functions you've > > introduced, see that commit below. > > -tip testing found the following build failure: > > arch/x86/kernel/built-in.o: In function `show_cache_disable': > intel_cacheinfo.c:(.text+0xbbf2): undefined reference to `k8_northbridges' > arch/x86/kernel/built-in.o: In function `store_cache_disable': > intel_cacheinfo.c:(.text+0xbd91): undefined reference to `k8_northbridges' > > please send a delta fix patch against the tip/x86/cpu branch: > > http://people.redhat.com/mingo/tip.git/README > > which has your patch plus the cleanup applied. delta fix patch follows. It removes the dependency on k8_northbridges. -Mark Langsdorf Operating System Research Center AMD Signed-off-by: Mark Langsdorf Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 43 +++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a61c9e399ba..a0c6c6ffed4 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -13,10 +13,10 @@ #include #include #include +#include #include #include -#include #define LVL_1_INST 1 #define LVL_1_DATA 2 @@ -135,6 +135,12 @@ struct _cpuid4_info { cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; +static struct pci_device_id k8_nb_id[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, + {} +}; + unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same @@ -655,16 +661,39 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { #define to_object(k) container_of(k, struct _index_kobject, kobj) #define to_attr(a) container_of(a, struct _cache_attr, attr) +static struct pci_dev *get_k8_northbridge(int node) +{ + struct pci_dev *dev = NULL; + int i; + + for (i = 0; i <= node; i++) { + do { + dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); + if (!dev) + break; + } while (!pci_match_id(&k8_nb_id[0], dev)); + if (!dev) + break; + } + return dev; +} + static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - struct pci_dev *dev = k8_northbridges[node]; + struct pci_dev *dev = NULL; ssize_t ret = 0; int i; if (!this_leaf->can_disable) return sprintf(buf, "Feature not enabled\n"); + dev = get_k8_northbridge(node); + if (!dev) { + printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); + return -EINVAL; + } + for (i = 0; i < 2; i++) { unsigned int reg; @@ -686,14 +715,12 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) { int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - struct pci_dev *dev = k8_northbridges[node]; + struct pci_dev *dev = NULL; unsigned int ret, index, val; if (!this_leaf->can_disable) return 0; - /* write the MSR value */ - if (strlen(buf) > 15) return -EINVAL; @@ -704,6 +731,12 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, return -EINVAL; val |= 0xc0000000; + dev = get_k8_northbridge(node); + if (!dev) { + printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); + return -EINVAL; + } + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); wbinvd(); pci_write_config_dword(dev, 0x1BC + index * 4, val); -- cgit v1.2.3 From cdcf772ed163651cacac8098b4974aba7f9e1c73 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 28 Jul 2008 16:20:08 +0200 Subject: x86 l3 cache index disable for 2 6 26 fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 39 ++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a0c6c6ffed4..535d662716d 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -1,8 +1,8 @@ /* - * Routines to indentify caches on Intel CPU. + * Routines to indentify caches on Intel CPU. * - * Changes: - * Venkatesh Pallipadi : Adding cache identification through cpuid(4) + * Changes: + * Venkatesh Pallipadi : Adding cache identification through cpuid(4) * Ashok Raj : Work with CPU hotplug infrastructure. * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. */ @@ -136,9 +136,9 @@ struct _cpuid4_info { }; static struct pci_device_id k8_nb_id[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, - {} + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, + {} }; unsigned short num_cache_leaves; @@ -190,9 +190,10 @@ static unsigned short assocs[] __cpuinitdata = { static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; -static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, - union _cpuid4_leaf_ebx *ebx, - union _cpuid4_leaf_ecx *ecx) +static void __cpuinit +amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, + union _cpuid4_leaf_ecx *ecx) { unsigned dummy; unsigned line_size, lines_per_tag, assoc, size_in_kb; @@ -264,7 +265,7 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) { if (index < 3) return; - this_leaf->can_disable = 1; + this_leaf->can_disable = 1; } static int @@ -474,7 +475,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) /* pointer to _cpuid4_info array (for each cache leaf) */ static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); -#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) +#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) #ifdef CONFIG_SMP static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) @@ -511,7 +512,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) this_leaf = CPUID4_INFO_IDX(cpu, index); for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { - sibling_leaf = CPUID4_INFO_IDX(sibling, index); + sibling_leaf = CPUID4_INFO_IDX(sibling, index); cpu_clear(cpu, sibling_leaf->shared_cpu_map); } } @@ -593,7 +594,7 @@ struct _index_kobject { /* pointer to array of kobjects for cpuX/cache/indexY */ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); -#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) +#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) #define show_one_plus(file_name, object, val) \ static ssize_t show_##file_name \ @@ -675,7 +676,7 @@ static struct pci_dev *get_k8_northbridge(int node) if (!dev) break; } - return dev; + return dev; } static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) @@ -736,7 +737,7 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); return -EINVAL; } - + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); wbinvd(); pci_write_config_dword(dev, 0x1BC + index * 4, val); @@ -789,7 +790,7 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) ret = fattr->show ? fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), buf) : - 0; + 0; return ret; } @@ -800,9 +801,9 @@ static ssize_t store(struct kobject * kobj, struct attribute * attr, struct _index_kobject *this_leaf = to_object(kobj); ssize_t ret; - ret = fattr->store ? - fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), - buf, count) : + ret = fattr->store ? + fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), + buf, count) : 0; return ret; } -- cgit v1.2.3 From 239bd83104ec6bcba90221d8b0973d2565142ef8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 28 Jul 2008 16:45:49 +0200 Subject: x86: L3 cache index disable for 2.6.26, fix #2 fix !PCI build failure: arch/x86/kernel/cpu/intel_cacheinfo.c: In function 'get_k8_northbridge': arch/x86/kernel/cpu/intel_cacheinfo.c:675: error: implicit declaration of function 'pci_match_id' Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index d763d24187c..1677b55371a 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -135,11 +135,13 @@ struct _cpuid4_info { cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; +#ifdef CONFIG_PCI static struct pci_device_id k8_nb_id[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, {} }; +#endif unsigned short num_cache_leaves; @@ -663,6 +665,7 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { #define to_object(k) container_of(k, struct _index_kobject, kobj) #define to_attr(a) container_of(a, struct _cache_attr, attr) +#ifdef CONFIG_PCI static struct pci_dev *get_k8_northbridge(int node) { struct pci_dev *dev = NULL; @@ -679,6 +682,12 @@ static struct pci_dev *get_k8_northbridge(int node) } return dev; } +#else +static struct pci_dev *get_k8_northbridge(int node) +{ + return NULL; +} +#endif static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { -- cgit v1.2.3 From 9a56a0f80b52cb41c5e0add47c7ce0bb2ef25eb0 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:13 +0200 Subject: x86: moved Intel microcode patch loader declarations to seperate header file Intel specific microcode declarations have been moved to a seperate header file. There are no code changes to the code itself and no side effects to other parts. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 6994c751590..0d654bd3292 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -93,6 +93,7 @@ #include #include #include +#include MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); -- cgit v1.2.3 From 8e61028dfdc6b8ca996abfe8f9baef6792ea2904 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:14 +0200 Subject: x86: typedef removal Removed typedefs. No functional changes to the code. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 0d654bd3292..74e6a77bf19 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -102,17 +102,17 @@ MODULE_LICENSE("GPL"); #define MICROCODE_VERSION "1.14a" #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ +#define MC_HEADER_SIZE (sizeof (struct microcode_header)) /* 48 bytes */ #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ #define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ #define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ #define DWSIZE (sizeof (u32)) #define get_totalsize(mc) \ - (((microcode_t *)mc)->hdr.totalsize ? \ - ((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) + (((struct microcode *)mc)->hdr.totalsize ? \ + ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) #define get_datasize(mc) \ - (((microcode_t *)mc)->hdr.datasize ? \ - ((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) + (((struct microcode *)mc)->hdr.datasize ? \ + ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) #define sigmatch(s1, s2, p1, p2) \ (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) @@ -130,7 +130,7 @@ static struct ucode_cpu_info { unsigned int sig; unsigned int pf; unsigned int rev; - microcode_t *mc; + struct microcode *mc; } ucode_cpu_info[NR_CPUS]; static void collect_cpu_info(int cpu_num) @@ -171,7 +171,7 @@ static void collect_cpu_info(int cpu_num) } static inline int microcode_update_match(int cpu_num, - microcode_header_t *mc_header, int sig, int pf) + struct microcode_header *mc_header, int sig, int pf) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; @@ -183,7 +183,7 @@ static inline int microcode_update_match(int cpu_num, static int microcode_sanity_check(void *mc) { - microcode_header_t *mc_header = mc; + struct microcode_header *mc_header = mc; struct extended_sigtable *ext_header = NULL; struct extended_signature *ext_sig; unsigned long total_size, data_size, ext_table_size; @@ -268,7 +268,7 @@ static int microcode_sanity_check(void *mc) static int get_maching_microcode(void *mc, int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - microcode_header_t *mc_header = mc; + struct microcode_header *mc_header = mc; struct extended_sigtable *ext_header; unsigned long total_size = get_totalsize(mc_header); int ext_sigcount, i; @@ -355,7 +355,7 @@ static unsigned int user_buffer_size; /* it's size */ static long get_next_ucode(void **mc, long offset) { - microcode_header_t mc_header; + struct microcode_header mc_header; unsigned long total_size; /* No more data */ @@ -497,13 +497,13 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); static long get_next_ucode_from_buffer(void **mc, const u8 *buf, unsigned long size, long offset) { - microcode_header_t *mc_header; + struct microcode_header *mc_header; unsigned long total_size; /* No more data */ if (offset >= size) return 0; - mc_header = (microcode_header_t *)(buf + offset); + mc_header = (struct microcode_header *)(buf + offset); total_size = get_totalsize(mc_header); if (offset + total_size > size) { -- cgit v1.2.3 From c3b71bcec0380836caac9b524fa1585b469b7456 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:15 +0200 Subject: x86: move per CPU microcode structure declaration to header file This structure will be later used by other modules as well and needs therfore to be moved out to a header file. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 74e6a77bf19..4e7b2f65fed 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -125,13 +125,7 @@ static DEFINE_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DEFINE_MUTEX(microcode_mutex); -static struct ucode_cpu_info { - int valid; - unsigned int sig; - unsigned int pf; - unsigned int rev; - struct microcode *mc; -} ucode_cpu_info[NR_CPUS]; +static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; static void collect_cpu_info(int cpu_num) { -- cgit v1.2.3 From 1abae31096007cc993f67ae2576fe8f812270ad6 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:16 +0200 Subject: x86: move microcode.c to microcode_intel.c Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/microcode.c | 855 -------------------------------------- arch/x86/kernel/microcode_intel.c | 855 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 856 insertions(+), 856 deletions(-) delete mode 100644 arch/x86/kernel/microcode.c create mode 100644 arch/x86/kernel/microcode_intel.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3db651fc8ec..a9be2a0dde8 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -51,7 +51,7 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_MICROCODE) += microcode.o +obj-$(CONFIG_MICROCODE) += microcode_intel.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c deleted file mode 100644 index 4e7b2f65fed..00000000000 --- a/arch/x86/kernel/microcode.c +++ /dev/null @@ -1,855 +0,0 @@ -/* - * Intel CPU Microcode Update Driver for Linux - * - * Copyright (C) 2000-2006 Tigran Aivazian - * 2006 Shaohua Li - * - * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture - * Software Developer's Manual - * Order Number 253668 or free download from: - * - * http://developer.intel.com/design/pentium4/manuals/253668.htm - * - * For more information, go to http://www.urbanmyth.org/microcode - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 1.0 16 Feb 2000, Tigran Aivazian - * Initial release. - * 1.01 18 Feb 2000, Tigran Aivazian - * Added read() support + cleanups. - * 1.02 21 Feb 2000, Tigran Aivazian - * Added 'device trimming' support. open(O_WRONLY) zeroes - * and frees the saved copy of applied microcode. - * 1.03 29 Feb 2000, Tigran Aivazian - * Made to use devfs (/dev/cpu/microcode) + cleanups. - * 1.04 06 Jun 2000, Simon Trimmer - * Added misc device support (now uses both devfs and misc). - * Added MICROCODE_IOCFREE ioctl to clear memory. - * 1.05 09 Jun 2000, Simon Trimmer - * Messages for error cases (non Intel & no suitable microcode). - * 1.06 03 Aug 2000, Tigran Aivazian - * Removed ->release(). Removed exclusive open and status bitmap. - * Added microcode_rwsem to serialize read()/write()/ioctl(). - * Removed global kernel lock usage. - * 1.07 07 Sep 2000, Tigran Aivazian - * Write 0 to 0x8B msr and then cpuid before reading revision, - * so that it works even if there were no update done by the - * BIOS. Otherwise, reading from 0x8B gives junk (which happened - * to be 0 on my machine which is why it worked even when I - * disabled update by the BIOS) - * Thanks to Eric W. Biederman for the fix. - * 1.08 11 Dec 2000, Richard Schaal and - * Tigran Aivazian - * Intel Pentium 4 processor support and bugfixes. - * 1.09 30 Oct 2001, Tigran Aivazian - * Bugfix for HT (Hyper-Threading) enabled processors - * whereby processor resources are shared by all logical processors - * in a single CPU package. - * 1.10 28 Feb 2002 Asit K Mallick and - * Tigran Aivazian , - * Serialize updates as required on HT processors due to speculative - * nature of implementation. - * 1.11 22 Mar 2002 Tigran Aivazian - * Fix the panic when writing zero-length microcode chunk. - * 1.12 29 Sep 2003 Nitin Kamble , - * Jun Nakajima - * Support for the microcode updates in the new format. - * 1.13 10 Oct 2003 Tigran Aivazian - * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl - * because we no longer hold a copy of applied microcode - * in kernel memory. - * 1.14 25 Jun 2004 Tigran Aivazian - * Fix sigmatch() macro to handle old CPUs with pf == 0. - * Thanks to Stuart Swales for pointing out this bug. - */ - -//#define DEBUG /* pr_debug */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian "); -MODULE_LICENSE("GPL"); - -#define MICROCODE_VERSION "1.14a" - -#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof (struct microcode_header)) /* 48 bytes */ -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ -#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ -#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ -#define DWSIZE (sizeof (u32)) -#define get_totalsize(mc) \ - (((struct microcode *)mc)->hdr.totalsize ? \ - ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) -#define get_datasize(mc) \ - (((struct microcode *)mc)->hdr.datasize ? \ - ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) - -#define sigmatch(s1, s2, p1, p2) \ - (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) - -#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) - -/* serialize access to the physical write to MSR 0x79 */ -static DEFINE_SPINLOCK(microcode_update_lock); - -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DEFINE_MUTEX(microcode_mutex); - -static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; - -static void collect_cpu_info(int cpu_num) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu_num); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - unsigned int val[2]; - - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu_num); - uci->pf = uci->rev = 0; - uci->mc = NULL; - uci->valid = 1; - - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { - printk(KERN_ERR "microcode: CPU%d not a capable Intel " - "processor\n", cpu_num); - uci->valid = 0; - return; - } - - uci->sig = cpuid_eax(0x00000001); - - if ((c->x86_model >= 5) || (c->x86 > 6)) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - uci->pf = 1 << ((val[1] >> 18) & 7); - } - - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); - pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", - uci->sig, uci->pf, uci->rev); -} - -static inline int microcode_update_match(int cpu_num, - struct microcode_header *mc_header, int sig, int pf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - - if (!sigmatch(sig, uci->sig, pf, uci->pf) - || mc_header->rev <= uci->rev) - return 0; - return 1; -} - -static int microcode_sanity_check(void *mc) -{ - struct microcode_header *mc_header = mc; - struct extended_sigtable *ext_header = NULL; - struct extended_signature *ext_sig; - unsigned long total_size, data_size, ext_table_size; - int sum, orig_sum, ext_sigcount = 0, i; - - total_size = get_totalsize(mc_header); - data_size = get_datasize(mc_header); - if (data_size + MC_HEADER_SIZE > total_size) { - printk(KERN_ERR "microcode: error! " - "Bad data size in microcode data file\n"); - return -EINVAL; - } - - if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { - printk(KERN_ERR "microcode: error! " - "Unknown microcode update format\n"); - return -EINVAL; - } - ext_table_size = total_size - (MC_HEADER_SIZE + data_size); - if (ext_table_size) { - if ((ext_table_size < EXT_HEADER_SIZE) - || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - printk(KERN_ERR "microcode: error! " - "Small exttable size in microcode data file\n"); - return -EINVAL; - } - ext_header = mc + MC_HEADER_SIZE + data_size; - if (ext_table_size != exttable_size(ext_header)) { - printk(KERN_ERR "microcode: error! " - "Bad exttable size in microcode data file\n"); - return -EFAULT; - } - ext_sigcount = ext_header->count; - } - - /* check extended table checksum */ - if (ext_table_size) { - int ext_table_sum = 0; - int *ext_tablep = (int *)ext_header; - - i = ext_table_size / DWSIZE; - while (i--) - ext_table_sum += ext_tablep[i]; - if (ext_table_sum) { - printk(KERN_WARNING "microcode: aborting, " - "bad extended signature table checksum\n"); - return -EINVAL; - } - } - - /* calculate the checksum */ - orig_sum = 0; - i = (MC_HEADER_SIZE + data_size) / DWSIZE; - while (i--) - orig_sum += ((int *)mc)[i]; - if (orig_sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); - return -EINVAL; - } - if (!ext_table_size) - return 0; - /* check extended signature checksum */ - for (i = 0; i < ext_sigcount; i++) { - ext_sig = (void *)ext_header + EXT_HEADER_SIZE + - EXT_SIGNATURE_SIZE * i; - sum = orig_sum - - (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); - if (sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); - return -EINVAL; - } - } - return 0; -} - -/* - * return 0 - no update found - * return 1 - found update - * return < 0 - error - */ -static int get_maching_microcode(void *mc, int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct microcode_header *mc_header = mc; - struct extended_sigtable *ext_header; - unsigned long total_size = get_totalsize(mc_header); - int ext_sigcount, i; - struct extended_signature *ext_sig; - void *new_mc; - - if (microcode_update_match(cpu, mc_header, - mc_header->sig, mc_header->pf)) - goto find; - - if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) - return 0; - - ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; - ext_sigcount = ext_header->count; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - for (i = 0; i < ext_sigcount; i++) { - if (microcode_update_match(cpu, mc_header, - ext_sig->sig, ext_sig->pf)) - goto find; - ext_sig++; - } - return 0; -find: - pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); - new_mc = vmalloc(total_size); - if (!new_mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; - } - - /* free previous update file */ - vfree(uci->mc); - - memcpy(new_mc, mc, total_size); - uci->mc = new_mc; - return 1; -} - -static void apply_microcode(int cpu) -{ - unsigned long flags; - unsigned int val[2]; - int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - - /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); - - if (uci->mc == NULL) - return; - - /* serialize access to the physical write to MSR 0x79 */ - spin_lock_irqsave(µcode_update_lock, flags); - - /* write microcode via MSR 0x79 */ - wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) uci->mc->bits, - (unsigned long) uci->mc->bits >> 16 >> 16); - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - - spin_unlock_irqrestore(µcode_update_lock, flags); - if (val[1] != uci->mc->hdr.rev) { - printk(KERN_ERR "microcode: CPU%d update from revision " - "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); - return; - } - printk(KERN_INFO "microcode: CPU%d updated from revision " - "0x%x to 0x%x, date = %08x \n", - cpu_num, uci->rev, val[1], uci->mc->hdr.date); - uci->rev = val[1]; -} - -#ifdef CONFIG_MICROCODE_OLD_INTERFACE -static void __user *user_buffer; /* user area microcode data buffer */ -static unsigned int user_buffer_size; /* it's size */ - -static long get_next_ucode(void **mc, long offset) -{ - struct microcode_header mc_header; - unsigned long total_size; - - /* No more data */ - if (offset >= user_buffer_size) - return 0; - if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - return -EFAULT; - } - total_size = get_totalsize(&mc_header); - if (offset + total_size > user_buffer_size) { - printk(KERN_ERR "microcode: error! Bad total size in microcode " - "data file\n"); - return -EINVAL; - } - *mc = vmalloc(total_size); - if (!*mc) - return -ENOMEM; - if (copy_from_user(*mc, user_buffer + offset, total_size)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - vfree(*mc); - return -EFAULT; - } - return offset + total_size; -} - -static int do_microcode_update (void) -{ - long cursor = 0; - int error = 0; - void *new_mc = NULL; - int cpu; - cpumask_t old; - cpumask_of_cpu_ptr_declare(newmask); - - old = current->cpus_allowed; - - while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_sanity_check(new_mc); - if (error) - goto out; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->valid) - continue; - cpumask_of_cpu_ptr_next(newmask, cpu); - set_cpus_allowed_ptr(current, newmask); - error = get_maching_microcode(new_mc, cpu); - if (error < 0) - goto out; - if (error == 1) - apply_microcode(cpu); - } - vfree(new_mc); - } -out: - if (cursor > 0) - vfree(new_mc); - if (cursor < 0) - error = cursor; - set_cpus_allowed_ptr(current, &old); - return error; -} - -static int microcode_open (struct inode *unused1, struct file *unused2) -{ - cycle_kernel_lock(); - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} - -static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) -{ - ssize_t ret; - - if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); - return -EINVAL; - } - - get_online_cpus(); - mutex_lock(µcode_mutex); - - user_buffer = (void __user *) buf; - user_buffer_size = (int) len; - - ret = do_microcode_update(); - if (!ret) - ret = (ssize_t)len; - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - return ret; -} - -static const struct file_operations microcode_fops = { - .owner = THIS_MODULE, - .write = microcode_write, - .open = microcode_open, -}; - -static struct miscdevice microcode_dev = { - .minor = MICROCODE_MINOR, - .name = "microcode", - .fops = µcode_fops, -}; - -static int __init microcode_dev_init (void) -{ - int error; - - error = misc_register(µcode_dev); - if (error) { - printk(KERN_ERR - "microcode: can't misc_register on minor=%d\n", - MICROCODE_MINOR); - return error; - } - - return 0; -} - -static void microcode_dev_exit (void) -{ - misc_deregister(µcode_dev); -} - -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); -#else -#define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while(0) -#endif - -static long get_next_ucode_from_buffer(void **mc, const u8 *buf, - unsigned long size, long offset) -{ - struct microcode_header *mc_header; - unsigned long total_size; - - /* No more data */ - if (offset >= size) - return 0; - mc_header = (struct microcode_header *)(buf + offset); - total_size = get_totalsize(mc_header); - - if (offset + total_size > size) { - printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); - return -EINVAL; - } - - *mc = vmalloc(total_size); - if (!*mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; - } - memcpy(*mc, buf + offset, total_size); - return offset + total_size; -} - -/* fake device for request_firmware */ -static struct platform_device *microcode_pdev; - -static int cpu_request_microcode(int cpu) -{ - char name[30]; - struct cpuinfo_x86 *c = &cpu_data(cpu); - const struct firmware *firmware; - const u8 *buf; - unsigned long size; - long offset = 0; - int error; - void *mc; - - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); - sprintf(name,"intel-ucode/%02x-%02x-%02x", - c->x86, c->x86_model, c->x86_mask); - error = request_firmware(&firmware, name, µcode_pdev->dev); - if (error) { - pr_debug("microcode: data file %s load failed\n", name); - return error; - } - buf = firmware->data; - size = firmware->size; - while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) - > 0) { - error = microcode_sanity_check(mc); - if (error) - break; - error = get_maching_microcode(mc, cpu); - if (error < 0) - break; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - if (error == 1) { - apply_microcode(cpu); - error = 0; - } - vfree(mc); - } - if (offset > 0) - vfree(mc); - if (offset < 0) - error = offset; - release_firmware(firmware); - - return error; -} - -static int apply_microcode_check_cpu(int cpu) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - unsigned int val[2]; - int err = 0; - - /* Check if the microcode is available */ - if (!uci->mc) - return 0; - - old = current->cpus_allowed; - set_cpus_allowed_ptr(current, newmask); - - /* Check if the microcode we have in memory matches the CPU */ - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) - err = -EINVAL; - - if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - if (uci->pf != (1 << ((val[1] >> 18) & 7))) - err = -EINVAL; - } - - if (!err) { - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (uci->rev != val[1]) - err = -EINVAL; - } - - if (!err) - apply_microcode(cpu); - else - printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" - " sig=0x%x, pf=0x%x, rev=0x%x\n", - cpu, uci->sig, uci->pf, uci->rev); - - set_cpus_allowed_ptr(current, &old); - return err; -} - -static void microcode_init_cpu(int cpu, int resume) -{ - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - old = current->cpus_allowed; - - set_cpus_allowed_ptr(current, newmask); - mutex_lock(µcode_mutex); - collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING && !resume) - cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - set_cpus_allowed_ptr(current, &old); -} - -static void microcode_fini_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - mutex_lock(µcode_mutex); - uci->valid = 0; - vfree(uci->mc); - uci->mc = NULL; - mutex_unlock(µcode_mutex); -} - -static ssize_t reload_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, size_t sz) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - char *end; - unsigned long val = simple_strtoul(buf, &end, 0); - int err = 0; - int cpu = dev->id; - - if (end == buf) - return -EINVAL; - if (val == 1) { - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - - old = current->cpus_allowed; - - get_online_cpus(); - set_cpus_allowed_ptr(current, newmask); - - mutex_lock(µcode_mutex); - if (uci->valid) - err = cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - put_online_cpus(); - set_cpus_allowed_ptr(current, &old); - } - if (err) - return err; - return sz; -} - -static ssize_t version_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->rev); -} - -static ssize_t pf_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->pf); -} - -static SYSDEV_ATTR(reload, 0200, NULL, reload_store); -static SYSDEV_ATTR(version, 0400, version_show, NULL); -static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); - -static struct attribute *mc_default_attrs[] = { - &attr_reload.attr, - &attr_version.attr, - &attr_processor_flags.attr, - NULL -}; - -static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", -}; - -static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) -{ - int err, cpu = sys_dev->id; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d added\n", cpu); - memset(uci, 0, sizeof(*uci)); - - err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); - if (err) - return err; - - microcode_init_cpu(cpu, resume); - - return 0; -} - -static int mc_sysdev_add(struct sys_device *sys_dev) -{ - return __mc_sysdev_add(sys_dev, 0); -} - -static int mc_sysdev_remove(struct sys_device *sys_dev) -{ - int cpu = sys_dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d removed\n", cpu); - microcode_fini_cpu(cpu); - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - return 0; -} - -static int mc_sysdev_resume(struct sys_device *dev) -{ - int cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - pr_debug("microcode: CPU%d resumed\n", cpu); - /* only CPU 0 will apply ucode here */ - apply_microcode(0); - return 0; -} - -static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, - .resume = mc_sysdev_resume, -}; - -static __cpuinit int -mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; - case CPU_ONLINE: - case CPU_DOWN_FAILED: - mc_sysdev_add(sys_dev); - break; - case CPU_ONLINE_FROZEN: - /* System-wide resume is in progress, try to apply microcode */ - if (apply_microcode_check_cpu(cpu)) { - /* The application of microcode failed */ - microcode_fini_cpu(cpu); - __mc_sysdev_add(sys_dev, 1); - break; - } - case CPU_DOWN_FAILED_FROZEN: - if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - printk(KERN_ERR "microcode: Failed to create the sysfs " - "group for CPU%d\n", cpu); - break; - case CPU_DOWN_PREPARE: - mc_sysdev_remove(sys_dev); - break; - case CPU_DOWN_PREPARE_FROZEN: - /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __refdata mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, -}; - -static int __init microcode_init (void) -{ - int error; - - printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); - - error = microcode_dev_init(); - if (error) - return error; - microcode_pdev = platform_device_register_simple("microcode", -1, - NULL, 0); - if (IS_ERR(microcode_pdev)) { - microcode_dev_exit(); - return PTR_ERR(microcode_pdev); - } - - get_online_cpus(); - error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - if (error) { - microcode_dev_exit(); - platform_device_unregister(microcode_pdev); - return error; - } - - register_hotcpu_notifier(&mc_cpu_notifier); - return 0; -} - -static void __exit microcode_exit (void) -{ - microcode_dev_exit(); - - unregister_hotcpu_notifier(&mc_cpu_notifier); - - get_online_cpus(); - sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - - platform_device_unregister(microcode_pdev); -} - -module_init(microcode_init) -module_exit(microcode_exit) diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c new file mode 100644 index 00000000000..4e7b2f65fed --- /dev/null +++ b/arch/x86/kernel/microcode_intel.c @@ -0,0 +1,855 @@ +/* + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2000-2006 Tigran Aivazian + * 2006 Shaohua Li + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/design/pentium4/manuals/253668.htm + * + * For more information, go to http://www.urbanmyth.org/microcode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 1.0 16 Feb 2000, Tigran Aivazian + * Initial release. + * 1.01 18 Feb 2000, Tigran Aivazian + * Added read() support + cleanups. + * 1.02 21 Feb 2000, Tigran Aivazian + * Added 'device trimming' support. open(O_WRONLY) zeroes + * and frees the saved copy of applied microcode. + * 1.03 29 Feb 2000, Tigran Aivazian + * Made to use devfs (/dev/cpu/microcode) + cleanups. + * 1.04 06 Jun 2000, Simon Trimmer + * Added misc device support (now uses both devfs and misc). + * Added MICROCODE_IOCFREE ioctl to clear memory. + * 1.05 09 Jun 2000, Simon Trimmer + * Messages for error cases (non Intel & no suitable microcode). + * 1.06 03 Aug 2000, Tigran Aivazian + * Removed ->release(). Removed exclusive open and status bitmap. + * Added microcode_rwsem to serialize read()/write()/ioctl(). + * Removed global kernel lock usage. + * 1.07 07 Sep 2000, Tigran Aivazian + * Write 0 to 0x8B msr and then cpuid before reading revision, + * so that it works even if there were no update done by the + * BIOS. Otherwise, reading from 0x8B gives junk (which happened + * to be 0 on my machine which is why it worked even when I + * disabled update by the BIOS) + * Thanks to Eric W. Biederman for the fix. + * 1.08 11 Dec 2000, Richard Schaal and + * Tigran Aivazian + * Intel Pentium 4 processor support and bugfixes. + * 1.09 30 Oct 2001, Tigran Aivazian + * Bugfix for HT (Hyper-Threading) enabled processors + * whereby processor resources are shared by all logical processors + * in a single CPU package. + * 1.10 28 Feb 2002 Asit K Mallick and + * Tigran Aivazian , + * Serialize updates as required on HT processors due to speculative + * nature of implementation. + * 1.11 22 Mar 2002 Tigran Aivazian + * Fix the panic when writing zero-length microcode chunk. + * 1.12 29 Sep 2003 Nitin Kamble , + * Jun Nakajima + * Support for the microcode updates in the new format. + * 1.13 10 Oct 2003 Tigran Aivazian + * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl + * because we no longer hold a copy of applied microcode + * in kernel memory. + * 1.14 25 Jun 2004 Tigran Aivazian + * Fix sigmatch() macro to handle old CPUs with pf == 0. + * Thanks to Stuart Swales for pointing out this bug. + */ + +//#define DEBUG /* pr_debug */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); +MODULE_AUTHOR("Tigran Aivazian "); +MODULE_LICENSE("GPL"); + +#define MICROCODE_VERSION "1.14a" + +#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ +#define MC_HEADER_SIZE (sizeof (struct microcode_header)) /* 48 bytes */ +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ +#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ +#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ +#define DWSIZE (sizeof (u32)) +#define get_totalsize(mc) \ + (((struct microcode *)mc)->hdr.totalsize ? \ + ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) +#define get_datasize(mc) \ + (((struct microcode *)mc)->hdr.datasize ? \ + ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) + +#define sigmatch(s1, s2, p1, p2) \ + (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) + +#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) + +/* serialize access to the physical write to MSR 0x79 */ +static DEFINE_SPINLOCK(microcode_update_lock); + +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +static DEFINE_MUTEX(microcode_mutex); + +static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; + +static void collect_cpu_info(int cpu_num) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu_num); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + unsigned int val[2]; + + /* We should bind the task to the CPU */ + BUG_ON(raw_smp_processor_id() != cpu_num); + uci->pf = uci->rev = 0; + uci->mc = NULL; + uci->valid = 1; + + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64)) { + printk(KERN_ERR "microcode: CPU%d not a capable Intel " + "processor\n", cpu_num); + uci->valid = 0; + return; + } + + uci->sig = cpuid_eax(0x00000001); + + if ((c->x86_model >= 5) || (c->x86 > 6)) { + /* get processor flags from MSR 0x17 */ + rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + uci->pf = 1 << ((val[1] >> 18) & 7); + } + + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + /* see notes above for revision 1.07. Apparent chip bug */ + sync_core(); + /* get the current revision from MSR 0x8B */ + rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); + pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", + uci->sig, uci->pf, uci->rev); +} + +static inline int microcode_update_match(int cpu_num, + struct microcode_header *mc_header, int sig, int pf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + + if (!sigmatch(sig, uci->sig, pf, uci->pf) + || mc_header->rev <= uci->rev) + return 0; + return 1; +} + +static int microcode_sanity_check(void *mc) +{ + struct microcode_header *mc_header = mc; + struct extended_sigtable *ext_header = NULL; + struct extended_signature *ext_sig; + unsigned long total_size, data_size, ext_table_size; + int sum, orig_sum, ext_sigcount = 0, i; + + total_size = get_totalsize(mc_header); + data_size = get_datasize(mc_header); + if (data_size + MC_HEADER_SIZE > total_size) { + printk(KERN_ERR "microcode: error! " + "Bad data size in microcode data file\n"); + return -EINVAL; + } + + if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { + printk(KERN_ERR "microcode: error! " + "Unknown microcode update format\n"); + return -EINVAL; + } + ext_table_size = total_size - (MC_HEADER_SIZE + data_size); + if (ext_table_size) { + if ((ext_table_size < EXT_HEADER_SIZE) + || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { + printk(KERN_ERR "microcode: error! " + "Small exttable size in microcode data file\n"); + return -EINVAL; + } + ext_header = mc + MC_HEADER_SIZE + data_size; + if (ext_table_size != exttable_size(ext_header)) { + printk(KERN_ERR "microcode: error! " + "Bad exttable size in microcode data file\n"); + return -EFAULT; + } + ext_sigcount = ext_header->count; + } + + /* check extended table checksum */ + if (ext_table_size) { + int ext_table_sum = 0; + int *ext_tablep = (int *)ext_header; + + i = ext_table_size / DWSIZE; + while (i--) + ext_table_sum += ext_tablep[i]; + if (ext_table_sum) { + printk(KERN_WARNING "microcode: aborting, " + "bad extended signature table checksum\n"); + return -EINVAL; + } + } + + /* calculate the checksum */ + orig_sum = 0; + i = (MC_HEADER_SIZE + data_size) / DWSIZE; + while (i--) + orig_sum += ((int *)mc)[i]; + if (orig_sum) { + printk(KERN_ERR "microcode: aborting, bad checksum\n"); + return -EINVAL; + } + if (!ext_table_size) + return 0; + /* check extended signature checksum */ + for (i = 0; i < ext_sigcount; i++) { + ext_sig = (void *)ext_header + EXT_HEADER_SIZE + + EXT_SIGNATURE_SIZE * i; + sum = orig_sum + - (mc_header->sig + mc_header->pf + mc_header->cksum) + + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); + if (sum) { + printk(KERN_ERR "microcode: aborting, bad checksum\n"); + return -EINVAL; + } + } + return 0; +} + +/* + * return 0 - no update found + * return 1 - found update + * return < 0 - error + */ +static int get_maching_microcode(void *mc, int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct microcode_header *mc_header = mc; + struct extended_sigtable *ext_header; + unsigned long total_size = get_totalsize(mc_header); + int ext_sigcount, i; + struct extended_signature *ext_sig; + void *new_mc; + + if (microcode_update_match(cpu, mc_header, + mc_header->sig, mc_header->pf)) + goto find; + + if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) + return 0; + + ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; + ext_sigcount = ext_header->count; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + for (i = 0; i < ext_sigcount; i++) { + if (microcode_update_match(cpu, mc_header, + ext_sig->sig, ext_sig->pf)) + goto find; + ext_sig++; + } + return 0; +find: + pr_debug("microcode: CPU%d found a matching microcode update with" + " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); + new_mc = vmalloc(total_size); + if (!new_mc) { + printk(KERN_ERR "microcode: error! Can not allocate memory\n"); + return -ENOMEM; + } + + /* free previous update file */ + vfree(uci->mc); + + memcpy(new_mc, mc, total_size); + uci->mc = new_mc; + return 1; +} + +static void apply_microcode(int cpu) +{ + unsigned long flags; + unsigned int val[2]; + int cpu_num = raw_smp_processor_id(); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + + /* We should bind the task to the CPU */ + BUG_ON(cpu_num != cpu); + + if (uci->mc == NULL) + return; + + /* serialize access to the physical write to MSR 0x79 */ + spin_lock_irqsave(µcode_update_lock, flags); + + /* write microcode via MSR 0x79 */ + wrmsr(MSR_IA32_UCODE_WRITE, + (unsigned long) uci->mc->bits, + (unsigned long) uci->mc->bits >> 16 >> 16); + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + + /* see notes above for revision 1.07. Apparent chip bug */ + sync_core(); + + /* get the current revision from MSR 0x8B */ + rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + + spin_unlock_irqrestore(µcode_update_lock, flags); + if (val[1] != uci->mc->hdr.rev) { + printk(KERN_ERR "microcode: CPU%d update from revision " + "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); + return; + } + printk(KERN_INFO "microcode: CPU%d updated from revision " + "0x%x to 0x%x, date = %08x \n", + cpu_num, uci->rev, val[1], uci->mc->hdr.date); + uci->rev = val[1]; +} + +#ifdef CONFIG_MICROCODE_OLD_INTERFACE +static void __user *user_buffer; /* user area microcode data buffer */ +static unsigned int user_buffer_size; /* it's size */ + +static long get_next_ucode(void **mc, long offset) +{ + struct microcode_header mc_header; + unsigned long total_size; + + /* No more data */ + if (offset >= user_buffer_size) + return 0; + if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + return -EFAULT; + } + total_size = get_totalsize(&mc_header); + if (offset + total_size > user_buffer_size) { + printk(KERN_ERR "microcode: error! Bad total size in microcode " + "data file\n"); + return -EINVAL; + } + *mc = vmalloc(total_size); + if (!*mc) + return -ENOMEM; + if (copy_from_user(*mc, user_buffer + offset, total_size)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + vfree(*mc); + return -EFAULT; + } + return offset + total_size; +} + +static int do_microcode_update (void) +{ + long cursor = 0; + int error = 0; + void *new_mc = NULL; + int cpu; + cpumask_t old; + cpumask_of_cpu_ptr_declare(newmask); + + old = current->cpus_allowed; + + while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { + error = microcode_sanity_check(new_mc); + if (error) + goto out; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + for_each_online_cpu(cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!uci->valid) + continue; + cpumask_of_cpu_ptr_next(newmask, cpu); + set_cpus_allowed_ptr(current, newmask); + error = get_maching_microcode(new_mc, cpu); + if (error < 0) + goto out; + if (error == 1) + apply_microcode(cpu); + } + vfree(new_mc); + } +out: + if (cursor > 0) + vfree(new_mc); + if (cursor < 0) + error = cursor; + set_cpus_allowed_ptr(current, &old); + return error; +} + +static int microcode_open (struct inode *unused1, struct file *unused2) +{ + cycle_kernel_lock(); + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) +{ + ssize_t ret; + + if ((len >> PAGE_SHIFT) > num_physpages) { + printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); + return -EINVAL; + } + + get_online_cpus(); + mutex_lock(µcode_mutex); + + user_buffer = (void __user *) buf; + user_buffer_size = (int) len; + + ret = do_microcode_update(); + if (!ret) + ret = (ssize_t)len; + + mutex_unlock(µcode_mutex); + put_online_cpus(); + + return ret; +} + +static const struct file_operations microcode_fops = { + .owner = THIS_MODULE, + .write = microcode_write, + .open = microcode_open, +}; + +static struct miscdevice microcode_dev = { + .minor = MICROCODE_MINOR, + .name = "microcode", + .fops = µcode_fops, +}; + +static int __init microcode_dev_init (void) +{ + int error; + + error = misc_register(µcode_dev); + if (error) { + printk(KERN_ERR + "microcode: can't misc_register on minor=%d\n", + MICROCODE_MINOR); + return error; + } + + return 0; +} + +static void microcode_dev_exit (void) +{ + misc_deregister(µcode_dev); +} + +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +#else +#define microcode_dev_init() 0 +#define microcode_dev_exit() do { } while(0) +#endif + +static long get_next_ucode_from_buffer(void **mc, const u8 *buf, + unsigned long size, long offset) +{ + struct microcode_header *mc_header; + unsigned long total_size; + + /* No more data */ + if (offset >= size) + return 0; + mc_header = (struct microcode_header *)(buf + offset); + total_size = get_totalsize(mc_header); + + if (offset + total_size > size) { + printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); + return -EINVAL; + } + + *mc = vmalloc(total_size); + if (!*mc) { + printk(KERN_ERR "microcode: error! Can not allocate memory\n"); + return -ENOMEM; + } + memcpy(*mc, buf + offset, total_size); + return offset + total_size; +} + +/* fake device for request_firmware */ +static struct platform_device *microcode_pdev; + +static int cpu_request_microcode(int cpu) +{ + char name[30]; + struct cpuinfo_x86 *c = &cpu_data(cpu); + const struct firmware *firmware; + const u8 *buf; + unsigned long size; + long offset = 0; + int error; + void *mc; + + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + sprintf(name,"intel-ucode/%02x-%02x-%02x", + c->x86, c->x86_model, c->x86_mask); + error = request_firmware(&firmware, name, µcode_pdev->dev); + if (error) { + pr_debug("microcode: data file %s load failed\n", name); + return error; + } + buf = firmware->data; + size = firmware->size; + while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) + > 0) { + error = microcode_sanity_check(mc); + if (error) + break; + error = get_maching_microcode(mc, cpu); + if (error < 0) + break; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + if (error == 1) { + apply_microcode(cpu); + error = 0; + } + vfree(mc); + } + if (offset > 0) + vfree(mc); + if (offset < 0) + error = offset; + release_firmware(firmware); + + return error; +} + +static int apply_microcode_check_cpu(int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + unsigned int val[2]; + int err = 0; + + /* Check if the microcode is available */ + if (!uci->mc) + return 0; + + old = current->cpus_allowed; + set_cpus_allowed_ptr(current, newmask); + + /* Check if the microcode we have in memory matches the CPU */ + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) + err = -EINVAL; + + if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { + /* get processor flags from MSR 0x17 */ + rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + if (uci->pf != (1 << ((val[1] >> 18) & 7))) + err = -EINVAL; + } + + if (!err) { + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + /* see notes above for revision 1.07. Apparent chip bug */ + sync_core(); + /* get the current revision from MSR 0x8B */ + rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + if (uci->rev != val[1]) + err = -EINVAL; + } + + if (!err) + apply_microcode(cpu); + else + printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" + " sig=0x%x, pf=0x%x, rev=0x%x\n", + cpu, uci->sig, uci->pf, uci->rev); + + set_cpus_allowed_ptr(current, &old); + return err; +} + +static void microcode_init_cpu(int cpu, int resume) +{ + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + old = current->cpus_allowed; + + set_cpus_allowed_ptr(current, newmask); + mutex_lock(µcode_mutex); + collect_cpu_info(cpu); + if (uci->valid && system_state == SYSTEM_RUNNING && !resume) + cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + set_cpus_allowed_ptr(current, &old); +} + +static void microcode_fini_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + mutex_lock(µcode_mutex); + uci->valid = 0; + vfree(uci->mc); + uci->mc = NULL; + mutex_unlock(µcode_mutex); +} + +static ssize_t reload_store(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t sz) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + char *end; + unsigned long val = simple_strtoul(buf, &end, 0); + int err = 0; + int cpu = dev->id; + + if (end == buf) + return -EINVAL; + if (val == 1) { + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + + old = current->cpus_allowed; + + get_online_cpus(); + set_cpus_allowed_ptr(current, newmask); + + mutex_lock(µcode_mutex); + if (uci->valid) + err = cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + put_online_cpus(); + set_cpus_allowed_ptr(current, &old); + } + if (err) + return err; + return sz; +} + +static ssize_t version_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->rev); +} + +static ssize_t pf_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->pf); +} + +static SYSDEV_ATTR(reload, 0200, NULL, reload_store); +static SYSDEV_ATTR(version, 0400, version_show, NULL); +static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); + +static struct attribute *mc_default_attrs[] = { + &attr_reload.attr, + &attr_version.attr, + &attr_processor_flags.attr, + NULL +}; + +static struct attribute_group mc_attr_group = { + .attrs = mc_default_attrs, + .name = "microcode", +}; + +static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) +{ + int err, cpu = sys_dev->id; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d added\n", cpu); + memset(uci, 0, sizeof(*uci)); + + err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); + if (err) + return err; + + microcode_init_cpu(cpu, resume); + + return 0; +} + +static int mc_sysdev_add(struct sys_device *sys_dev) +{ + return __mc_sysdev_add(sys_dev, 0); +} + +static int mc_sysdev_remove(struct sys_device *sys_dev) +{ + int cpu = sys_dev->id; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d removed\n", cpu); + microcode_fini_cpu(cpu); + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + return 0; +} + +static int mc_sysdev_resume(struct sys_device *dev) +{ + int cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + pr_debug("microcode: CPU%d resumed\n", cpu); + /* only CPU 0 will apply ucode here */ + apply_microcode(0); + return 0; +} + +static struct sysdev_driver mc_sysdev_driver = { + .add = mc_sysdev_add, + .remove = mc_sysdev_remove, + .resume = mc_sysdev_resume, +}; + +static __cpuinit int +mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_UP_CANCELED_FROZEN: + /* The CPU refused to come up during a system resume */ + microcode_fini_cpu(cpu); + break; + case CPU_ONLINE: + case CPU_DOWN_FAILED: + mc_sysdev_add(sys_dev); + break; + case CPU_ONLINE_FROZEN: + /* System-wide resume is in progress, try to apply microcode */ + if (apply_microcode_check_cpu(cpu)) { + /* The application of microcode failed */ + microcode_fini_cpu(cpu); + __mc_sysdev_add(sys_dev, 1); + break; + } + case CPU_DOWN_FAILED_FROZEN: + if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) + printk(KERN_ERR "microcode: Failed to create the sysfs " + "group for CPU%d\n", cpu); + break; + case CPU_DOWN_PREPARE: + mc_sysdev_remove(sys_dev); + break; + case CPU_DOWN_PREPARE_FROZEN: + /* Suspend is in progress, only remove the interface */ + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __refdata mc_cpu_notifier = { + .notifier_call = mc_cpu_callback, +}; + +static int __init microcode_init (void) +{ + int error; + + printk(KERN_INFO + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); + + error = microcode_dev_init(); + if (error) + return error; + microcode_pdev = platform_device_register_simple("microcode", -1, + NULL, 0); + if (IS_ERR(microcode_pdev)) { + microcode_dev_exit(); + return PTR_ERR(microcode_pdev); + } + + get_online_cpus(); + error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + if (error) { + microcode_dev_exit(); + platform_device_unregister(microcode_pdev); + return error; + } + + register_hotcpu_notifier(&mc_cpu_notifier); + return 0; +} + +static void __exit microcode_exit (void) +{ + microcode_dev_exit(); + + unregister_hotcpu_notifier(&mc_cpu_notifier); + + get_online_cpus(); + sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + + platform_device_unregister(microcode_pdev); +} + +module_init(microcode_init) +module_exit(microcode_exit) -- cgit v1.2.3 From 3e135d887c973b525d43fbb67dfc5972694882f6 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:17 +0200 Subject: x86: code split to two parts Split off existing code into two seperate files. One file holds general code, the other file vendor specific parts. No functional changes, only refactoring. Temporarily Introduced a new module name 'ucode' for result, due to already taken name 'microcode'. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 3 +- arch/x86/kernel/microcode.c | 463 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/microcode_intel.c | 387 ++----------------------------- 3 files changed, 488 insertions(+), 365 deletions(-) create mode 100644 arch/x86/kernel/microcode.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a9be2a0dde8..abb32aed7f4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -51,7 +51,8 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_MICROCODE) += microcode_intel.o +obj-$(CONFIG_MICROCODE) += ucode.o +ucode-objs := microcode.o microcode_intel.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c new file mode 100644 index 00000000000..c1047d7f7ed --- /dev/null +++ b/arch/x86/kernel/microcode.c @@ -0,0 +1,463 @@ +/* + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2000-2006 Tigran Aivazian + * 2006 Shaohua Li + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/design/pentium4/manuals/253668.htm + * + * For more information, go to http://www.urbanmyth.org/microcode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 1.0 16 Feb 2000, Tigran Aivazian + * Initial release. + * 1.01 18 Feb 2000, Tigran Aivazian + * Added read() support + cleanups. + * 1.02 21 Feb 2000, Tigran Aivazian + * Added 'device trimming' support. open(O_WRONLY) zeroes + * and frees the saved copy of applied microcode. + * 1.03 29 Feb 2000, Tigran Aivazian + * Made to use devfs (/dev/cpu/microcode) + cleanups. + * 1.04 06 Jun 2000, Simon Trimmer + * Added misc device support (now uses both devfs and misc). + * Added MICROCODE_IOCFREE ioctl to clear memory. + * 1.05 09 Jun 2000, Simon Trimmer + * Messages for error cases (non Intel & no suitable microcode). + * 1.06 03 Aug 2000, Tigran Aivazian + * Removed ->release(). Removed exclusive open and status bitmap. + * Added microcode_rwsem to serialize read()/write()/ioctl(). + * Removed global kernel lock usage. + * 1.07 07 Sep 2000, Tigran Aivazian + * Write 0 to 0x8B msr and then cpuid before reading revision, + * so that it works even if there were no update done by the + * BIOS. Otherwise, reading from 0x8B gives junk (which happened + * to be 0 on my machine which is why it worked even when I + * disabled update by the BIOS) + * Thanks to Eric W. Biederman for the fix. + * 1.08 11 Dec 2000, Richard Schaal and + * Tigran Aivazian + * Intel Pentium 4 processor support and bugfixes. + * 1.09 30 Oct 2001, Tigran Aivazian + * Bugfix for HT (Hyper-Threading) enabled processors + * whereby processor resources are shared by all logical processors + * in a single CPU package. + * 1.10 28 Feb 2002 Asit K Mallick and + * Tigran Aivazian , + * Serialize updates as required on HT processors due to speculative + * nature of implementation. + * 1.11 22 Mar 2002 Tigran Aivazian + * Fix the panic when writing zero-length microcode chunk. + * 1.12 29 Sep 2003 Nitin Kamble , + * Jun Nakajima + * Support for the microcode updates in the new format. + * 1.13 10 Oct 2003 Tigran Aivazian + * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl + * because we no longer hold a copy of applied microcode + * in kernel memory. + * 1.14 25 Jun 2004 Tigran Aivazian + * Fix sigmatch() macro to handle old CPUs with pf == 0. + * Thanks to Stuart Swales for pointing out this bug. + */ + +//#define DEBUG /* pr_debug */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Microcode Update Driver"); +MODULE_AUTHOR("Tigran Aivazian "); +MODULE_LICENSE("GPL"); + +#define MICROCODE_VERSION "1.14a" + +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +DEFINE_MUTEX(microcode_mutex); + +struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; + +extern long get_next_ucode(void **mc, long offset); +extern int microcode_sanity_check(void *mc); +extern int get_matching_microcode(void *mc, int cpu); +extern void collect_cpu_info(int cpu_num); +extern int cpu_request_microcode(int cpu); +extern void microcode_fini_cpu(int cpu); +extern void apply_microcode(int cpu); +extern int apply_microcode_check_cpu(int cpu); + +#ifdef CONFIG_MICROCODE_OLD_INTERFACE +void __user *user_buffer; /* user area microcode data buffer */ +unsigned int user_buffer_size; /* it's size */ + +static int do_microcode_update (void) +{ + long cursor = 0; + int error = 0; + void *new_mc = NULL; + int cpu; + cpumask_t old; + cpumask_of_cpu_ptr_declare(newmask); + + old = current->cpus_allowed; + + while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { + error = microcode_sanity_check(new_mc); + if (error) + goto out; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + for_each_online_cpu(cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!uci->valid) + continue; + cpumask_of_cpu_ptr_next(newmask, cpu); + set_cpus_allowed_ptr(current, newmask); + error = get_maching_microcode(new_mc, cpu); + if (error < 0) + goto out; + if (error == 1) + apply_microcode(cpu); + } + vfree(new_mc); + } +out: + if (cursor > 0) + vfree(new_mc); + if (cursor < 0) + error = cursor; + set_cpus_allowed_ptr(current, &old); + return error; +} + +static int microcode_open (struct inode *unused1, struct file *unused2) +{ + cycle_kernel_lock(); + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) +{ + ssize_t ret; + + if ((len >> PAGE_SHIFT) > num_physpages) { + printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); + return -EINVAL; + } + + get_online_cpus(); + mutex_lock(µcode_mutex); + + user_buffer = (void __user *) buf; + user_buffer_size = (int) len; + + ret = do_microcode_update(); + if (!ret) + ret = (ssize_t)len; + + mutex_unlock(µcode_mutex); + put_online_cpus(); + + return ret; +} + +static const struct file_operations microcode_fops = { + .owner = THIS_MODULE, + .write = microcode_write, + .open = microcode_open, +}; + +static struct miscdevice microcode_dev = { + .minor = MICROCODE_MINOR, + .name = "microcode", + .fops = µcode_fops, +}; + +static int __init microcode_dev_init (void) +{ + int error; + + error = misc_register(µcode_dev); + if (error) { + printk(KERN_ERR + "microcode: can't misc_register on minor=%d\n", + MICROCODE_MINOR); + return error; + } + + return 0; +} + +static void microcode_dev_exit (void) +{ + misc_deregister(µcode_dev); +} + +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +#else +#define microcode_dev_init() 0 +#define microcode_dev_exit() do { } while(0) +#endif + +/* fake device for request_firmware */ +struct platform_device *microcode_pdev; + +static void microcode_init_cpu(int cpu, int resume) +{ + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + old = current->cpus_allowed; + + set_cpus_allowed_ptr(current, newmask); + mutex_lock(µcode_mutex); + collect_cpu_info(cpu); + if (uci->valid && system_state == SYSTEM_RUNNING && !resume) + cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + set_cpus_allowed_ptr(current, &old); +} + +static ssize_t reload_store(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t sz) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + char *end; + unsigned long val = simple_strtoul(buf, &end, 0); + int err = 0; + int cpu = dev->id; + + if (end == buf) + return -EINVAL; + if (val == 1) { + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + + old = current->cpus_allowed; + + get_online_cpus(); + set_cpus_allowed_ptr(current, newmask); + + mutex_lock(µcode_mutex); + if (uci->valid) + err = cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + put_online_cpus(); + set_cpus_allowed_ptr(current, &old); + } + if (err) + return err; + return sz; +} + +static ssize_t version_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->rev); +} + +static ssize_t pf_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->pf); +} + +static SYSDEV_ATTR(reload, 0200, NULL, reload_store); +static SYSDEV_ATTR(version, 0400, version_show, NULL); +static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); + +static struct attribute *mc_default_attrs[] = { + &attr_reload.attr, + &attr_version.attr, + &attr_processor_flags.attr, + NULL +}; + +static struct attribute_group mc_attr_group = { + .attrs = mc_default_attrs, + .name = "microcode", +}; + +static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) +{ + int err, cpu = sys_dev->id; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d added\n", cpu); + memset(uci, 0, sizeof(*uci)); + + err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); + if (err) + return err; + + microcode_init_cpu(cpu, resume); + + return 0; +} + +static int mc_sysdev_add(struct sys_device *sys_dev) +{ + return __mc_sysdev_add(sys_dev, 0); +} + +static int mc_sysdev_remove(struct sys_device *sys_dev) +{ + int cpu = sys_dev->id; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d removed\n", cpu); + microcode_fini_cpu(cpu); + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + return 0; +} + +static int mc_sysdev_resume(struct sys_device *dev) +{ + int cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + pr_debug("microcode: CPU%d resumed\n", cpu); + /* only CPU 0 will apply ucode here */ + apply_microcode(0); + return 0; +} + +static struct sysdev_driver mc_sysdev_driver = { + .add = mc_sysdev_add, + .remove = mc_sysdev_remove, + .resume = mc_sysdev_resume, +}; + +static __cpuinit int +mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_UP_CANCELED_FROZEN: + /* The CPU refused to come up during a system resume */ + microcode_fini_cpu(cpu); + break; + case CPU_ONLINE: + case CPU_DOWN_FAILED: + mc_sysdev_add(sys_dev); + break; + case CPU_ONLINE_FROZEN: + /* System-wide resume is in progress, try to apply microcode */ + if (apply_microcode_check_cpu(cpu)) { + /* The application of microcode failed */ + microcode_fini_cpu(cpu); + __mc_sysdev_add(sys_dev, 1); + break; + } + case CPU_DOWN_FAILED_FROZEN: + if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) + printk(KERN_ERR "microcode: Failed to create the sysfs " + "group for CPU%d\n", cpu); + break; + case CPU_DOWN_PREPARE: + mc_sysdev_remove(sys_dev); + break; + case CPU_DOWN_PREPARE_FROZEN: + /* Suspend is in progress, only remove the interface */ + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __refdata mc_cpu_notifier = { + .notifier_call = mc_cpu_callback, +}; + +static int __init microcode_init (void) +{ + int error; + + printk(KERN_INFO + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); + + error = microcode_dev_init(); + if (error) + return error; + microcode_pdev = platform_device_register_simple("microcode", -1, + NULL, 0); + if (IS_ERR(microcode_pdev)) { + microcode_dev_exit(); + return PTR_ERR(microcode_pdev); + } + + get_online_cpus(); + error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + if (error) { + microcode_dev_exit(); + platform_device_unregister(microcode_pdev); + return error; + } + + register_hotcpu_notifier(&mc_cpu_notifier); + return 0; +} + +static void __exit microcode_exit (void) +{ + microcode_dev_exit(); + + unregister_hotcpu_notifier(&mc_cpu_notifier); + + get_online_cpus(); + sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + + platform_device_unregister(microcode_pdev); +} + +module_init(microcode_init) +module_exit(microcode_exit) diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 4e7b2f65fed..eded0a154ea 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -95,18 +95,16 @@ #include #include -MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); +MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); -#define MICROCODE_VERSION "1.14a" - #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof (struct microcode_header)) /* 48 bytes */ +#define MC_HEADER_SIZE (sizeof(struct microcode_header)) /* 48 bytes */ #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ -#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ -#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ -#define DWSIZE (sizeof (u32)) +#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) /* 20 bytes */ +#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) /* 12 bytes */ +#define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ (((struct microcode *)mc)->hdr.totalsize ? \ ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) @@ -123,11 +121,11 @@ MODULE_LICENSE("GPL"); static DEFINE_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DEFINE_MUTEX(microcode_mutex); +extern struct mutex microcode_mutex; -static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -static void collect_cpu_info(int cpu_num) +void collect_cpu_info(int cpu_num) { struct cpuinfo_x86 *c = &cpu_data(cpu_num); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; @@ -140,7 +138,7 @@ static void collect_cpu_info(int cpu_num) uci->valid = 1; if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { + cpu_has(c, X86_FEATURE_IA64)) { printk(KERN_ERR "microcode: CPU%d not a capable Intel " "processor\n", cpu_num); uci->valid = 0; @@ -175,7 +173,7 @@ static inline int microcode_update_match(int cpu_num, return 1; } -static int microcode_sanity_check(void *mc) +int microcode_sanity_check(void *mc) { struct microcode_header *mc_header = mc; struct extended_sigtable *ext_header = NULL; @@ -259,7 +257,7 @@ static int microcode_sanity_check(void *mc) * return 1 - found update * return < 0 - error */ -static int get_maching_microcode(void *mc, int cpu) +int get_matching_microcode(void *mc, int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct microcode_header *mc_header = mc; @@ -288,7 +286,7 @@ static int get_maching_microcode(void *mc, int cpu) return 0; find: pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); + " version 0x%x (current=0x%x)\n", cpu, mc_header->rev, uci->rev); new_mc = vmalloc(total_size); if (!new_mc) { printk(KERN_ERR "microcode: error! Can not allocate memory\n"); @@ -303,7 +301,7 @@ find: return 1; } -static void apply_microcode(int cpu) +void apply_microcode(int cpu) { unsigned long flags; unsigned int val[2]; @@ -344,10 +342,10 @@ static void apply_microcode(int cpu) } #ifdef CONFIG_MICROCODE_OLD_INTERFACE -static void __user *user_buffer; /* user area microcode data buffer */ -static unsigned int user_buffer_size; /* it's size */ +extern void __user *user_buffer; /* user area microcode data buffer */ +extern unsigned int user_buffer_size; /* it's size */ -static long get_next_ucode(void **mc, long offset) +long get_next_ucode(void **mc, long offset) { struct microcode_header mc_header; unsigned long total_size; @@ -375,117 +373,6 @@ static long get_next_ucode(void **mc, long offset) } return offset + total_size; } - -static int do_microcode_update (void) -{ - long cursor = 0; - int error = 0; - void *new_mc = NULL; - int cpu; - cpumask_t old; - cpumask_of_cpu_ptr_declare(newmask); - - old = current->cpus_allowed; - - while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_sanity_check(new_mc); - if (error) - goto out; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->valid) - continue; - cpumask_of_cpu_ptr_next(newmask, cpu); - set_cpus_allowed_ptr(current, newmask); - error = get_maching_microcode(new_mc, cpu); - if (error < 0) - goto out; - if (error == 1) - apply_microcode(cpu); - } - vfree(new_mc); - } -out: - if (cursor > 0) - vfree(new_mc); - if (cursor < 0) - error = cursor; - set_cpus_allowed_ptr(current, &old); - return error; -} - -static int microcode_open (struct inode *unused1, struct file *unused2) -{ - cycle_kernel_lock(); - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} - -static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) -{ - ssize_t ret; - - if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); - return -EINVAL; - } - - get_online_cpus(); - mutex_lock(µcode_mutex); - - user_buffer = (void __user *) buf; - user_buffer_size = (int) len; - - ret = do_microcode_update(); - if (!ret) - ret = (ssize_t)len; - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - return ret; -} - -static const struct file_operations microcode_fops = { - .owner = THIS_MODULE, - .write = microcode_write, - .open = microcode_open, -}; - -static struct miscdevice microcode_dev = { - .minor = MICROCODE_MINOR, - .name = "microcode", - .fops = µcode_fops, -}; - -static int __init microcode_dev_init (void) -{ - int error; - - error = misc_register(µcode_dev); - if (error) { - printk(KERN_ERR - "microcode: can't misc_register on minor=%d\n", - MICROCODE_MINOR); - return error; - } - - return 0; -} - -static void microcode_dev_exit (void) -{ - misc_deregister(µcode_dev); -} - -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); -#else -#define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while(0) #endif static long get_next_ucode_from_buffer(void **mc, const u8 *buf, @@ -515,9 +402,9 @@ static long get_next_ucode_from_buffer(void **mc, const u8 *buf, } /* fake device for request_firmware */ -static struct platform_device *microcode_pdev; +extern struct platform_device *microcode_pdev; -static int cpu_request_microcode(int cpu) +int cpu_request_microcode(int cpu) { char name[30]; struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -530,7 +417,7 @@ static int cpu_request_microcode(int cpu) /* We should bind the task to the CPU */ BUG_ON(cpu != raw_smp_processor_id()); - sprintf(name,"intel-ucode/%02x-%02x-%02x", + sprintf(name, "intel-ucode/%02x-%02x-%02x", c->x86, c->x86_model, c->x86_mask); error = request_firmware(&firmware, name, µcode_pdev->dev); if (error) { @@ -544,7 +431,7 @@ static int cpu_request_microcode(int cpu) error = microcode_sanity_check(mc); if (error) break; - error = get_maching_microcode(mc, cpu); + error = get_matching_microcode(mc, cpu); if (error < 0) break; /* @@ -566,7 +453,7 @@ static int cpu_request_microcode(int cpu) return error; } -static int apply_microcode_check_cpu(int cpu) +int apply_microcode_check_cpu(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -615,241 +502,13 @@ static int apply_microcode_check_cpu(int cpu) return err; } -static void microcode_init_cpu(int cpu, int resume) -{ - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - old = current->cpus_allowed; - - set_cpus_allowed_ptr(current, newmask); - mutex_lock(µcode_mutex); - collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING && !resume) - cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - set_cpus_allowed_ptr(current, &old); -} - -static void microcode_fini_cpu(int cpu) +void microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; mutex_lock(µcode_mutex); uci->valid = 0; - vfree(uci->mc); + kfree(uci->mc); uci->mc = NULL; mutex_unlock(µcode_mutex); } - -static ssize_t reload_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, size_t sz) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - char *end; - unsigned long val = simple_strtoul(buf, &end, 0); - int err = 0; - int cpu = dev->id; - - if (end == buf) - return -EINVAL; - if (val == 1) { - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - - old = current->cpus_allowed; - - get_online_cpus(); - set_cpus_allowed_ptr(current, newmask); - - mutex_lock(µcode_mutex); - if (uci->valid) - err = cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - put_online_cpus(); - set_cpus_allowed_ptr(current, &old); - } - if (err) - return err; - return sz; -} - -static ssize_t version_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->rev); -} - -static ssize_t pf_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->pf); -} - -static SYSDEV_ATTR(reload, 0200, NULL, reload_store); -static SYSDEV_ATTR(version, 0400, version_show, NULL); -static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); - -static struct attribute *mc_default_attrs[] = { - &attr_reload.attr, - &attr_version.attr, - &attr_processor_flags.attr, - NULL -}; - -static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", -}; - -static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) -{ - int err, cpu = sys_dev->id; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d added\n", cpu); - memset(uci, 0, sizeof(*uci)); - - err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); - if (err) - return err; - - microcode_init_cpu(cpu, resume); - - return 0; -} - -static int mc_sysdev_add(struct sys_device *sys_dev) -{ - return __mc_sysdev_add(sys_dev, 0); -} - -static int mc_sysdev_remove(struct sys_device *sys_dev) -{ - int cpu = sys_dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d removed\n", cpu); - microcode_fini_cpu(cpu); - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - return 0; -} - -static int mc_sysdev_resume(struct sys_device *dev) -{ - int cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - pr_debug("microcode: CPU%d resumed\n", cpu); - /* only CPU 0 will apply ucode here */ - apply_microcode(0); - return 0; -} - -static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, - .resume = mc_sysdev_resume, -}; - -static __cpuinit int -mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; - case CPU_ONLINE: - case CPU_DOWN_FAILED: - mc_sysdev_add(sys_dev); - break; - case CPU_ONLINE_FROZEN: - /* System-wide resume is in progress, try to apply microcode */ - if (apply_microcode_check_cpu(cpu)) { - /* The application of microcode failed */ - microcode_fini_cpu(cpu); - __mc_sysdev_add(sys_dev, 1); - break; - } - case CPU_DOWN_FAILED_FROZEN: - if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - printk(KERN_ERR "microcode: Failed to create the sysfs " - "group for CPU%d\n", cpu); - break; - case CPU_DOWN_PREPARE: - mc_sysdev_remove(sys_dev); - break; - case CPU_DOWN_PREPARE_FROZEN: - /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __refdata mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, -}; - -static int __init microcode_init (void) -{ - int error; - - printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); - - error = microcode_dev_init(); - if (error) - return error; - microcode_pdev = platform_device_register_simple("microcode", -1, - NULL, 0); - if (IS_ERR(microcode_pdev)) { - microcode_dev_exit(); - return PTR_ERR(microcode_pdev); - } - - get_online_cpus(); - error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - if (error) { - microcode_dev_exit(); - platform_device_unregister(microcode_pdev); - return error; - } - - register_hotcpu_notifier(&mc_cpu_notifier); - return 0; -} - -static void __exit microcode_exit (void) -{ - microcode_dev_exit(); - - unregister_hotcpu_notifier(&mc_cpu_notifier); - - get_online_cpus(); - sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - - platform_device_unregister(microcode_pdev); -} - -module_init(microcode_init) -module_exit(microcode_exit) -- cgit v1.2.3 From d4ee36686853d5714437c4409f17ad42bfaf4211 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:18 +0200 Subject: x86: structure declaration renaming Renamed common structures to vendor specific naming scheme so other vendors will be able to use the same naming convention. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_intel.c | 46 ++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index eded0a154ea..ca9861bf067 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -100,17 +100,19 @@ MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof(struct microcode_header)) /* 48 bytes */ +#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) /* 48 bytes */ #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) /* 20 bytes */ #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) /* 12 bytes */ #define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ - (((struct microcode *)mc)->hdr.totalsize ? \ - ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) + (((struct microcode_intel *)mc)->hdr.totalsize ? \ + ((struct microcode_intel *)mc)->hdr.totalsize : \ + DEFAULT_UCODE_TOTALSIZE) + #define get_datasize(mc) \ - (((struct microcode *)mc)->hdr.datasize ? \ - ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) + (((struct microcode_intel *)mc)->hdr.datasize ? \ + ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) #define sigmatch(s1, s2, p1, p2) \ (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) @@ -134,7 +136,7 @@ void collect_cpu_info(int cpu_num) /* We should bind the task to the CPU */ BUG_ON(raw_smp_processor_id() != cpu_num); uci->pf = uci->rev = 0; - uci->mc = NULL; + uci->mc.mc_intel = NULL; uci->valid = 1; if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || @@ -163,7 +165,7 @@ void collect_cpu_info(int cpu_num) } static inline int microcode_update_match(int cpu_num, - struct microcode_header *mc_header, int sig, int pf) + struct microcode_header_intel *mc_header, int sig, int pf) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; @@ -175,7 +177,7 @@ static inline int microcode_update_match(int cpu_num, int microcode_sanity_check(void *mc) { - struct microcode_header *mc_header = mc; + struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header = NULL; struct extended_signature *ext_sig; unsigned long total_size, data_size, ext_table_size; @@ -260,7 +262,7 @@ int microcode_sanity_check(void *mc) int get_matching_microcode(void *mc, int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct microcode_header *mc_header = mc; + struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header; unsigned long total_size = get_totalsize(mc_header); int ext_sigcount, i; @@ -294,10 +296,10 @@ find: } /* free previous update file */ - vfree(uci->mc); + vfree(uci->mc.mc_intel); memcpy(new_mc, mc, total_size); - uci->mc = new_mc; + uci->mc.mc_intel = new_mc; return 1; } @@ -311,7 +313,7 @@ void apply_microcode(int cpu) /* We should bind the task to the CPU */ BUG_ON(cpu_num != cpu); - if (uci->mc == NULL) + if (uci->mc.mc_intel == NULL) return; /* serialize access to the physical write to MSR 0x79 */ @@ -319,8 +321,8 @@ void apply_microcode(int cpu) /* write microcode via MSR 0x79 */ wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) uci->mc->bits, - (unsigned long) uci->mc->bits >> 16 >> 16); + (unsigned long) uci->mc.mc_intel->bits, + (unsigned long) uci->mc.mc_intel->bits >> 16 >> 16); wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ @@ -330,14 +332,14 @@ void apply_microcode(int cpu) rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); spin_unlock_irqrestore(µcode_update_lock, flags); - if (val[1] != uci->mc->hdr.rev) { + if (val[1] != uci->mc.mc_intel->hdr.rev) { printk(KERN_ERR "microcode: CPU%d update from revision " "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); return; } printk(KERN_INFO "microcode: CPU%d updated from revision " "0x%x to 0x%x, date = %08x \n", - cpu_num, uci->rev, val[1], uci->mc->hdr.date); + cpu_num, uci->rev, val[1], uci->mc.mc_intel->hdr.date); uci->rev = val[1]; } @@ -347,7 +349,7 @@ extern unsigned int user_buffer_size; /* it's size */ long get_next_ucode(void **mc, long offset) { - struct microcode_header mc_header; + struct microcode_header_intel mc_header; unsigned long total_size; /* No more data */ @@ -378,13 +380,13 @@ long get_next_ucode(void **mc, long offset) static long get_next_ucode_from_buffer(void **mc, const u8 *buf, unsigned long size, long offset) { - struct microcode_header *mc_header; + struct microcode_header_intel *mc_header; unsigned long total_size; /* No more data */ if (offset >= size) return 0; - mc_header = (struct microcode_header *)(buf + offset); + mc_header = (struct microcode_header_intel *)(buf + offset); total_size = get_totalsize(mc_header); if (offset + total_size > size) { @@ -463,7 +465,7 @@ int apply_microcode_check_cpu(int cpu) int err = 0; /* Check if the microcode is available */ - if (!uci->mc) + if (!uci->mc.mc_intel) return 0; old = current->cpus_allowed; @@ -508,7 +510,7 @@ void microcode_fini_cpu(int cpu) mutex_lock(µcode_mutex); uci->valid = 0; - kfree(uci->mc); - uci->mc = NULL; + kfree(uci->mc.mc_intel); + uci->mc.mc_intel = NULL; mutex_unlock(µcode_mutex); } -- cgit v1.2.3 From 8d86f390d9bb5b39f0a315838d1616de6363e1b9 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:21 +0200 Subject: x86: major refactoring Refactored code by introducing a two-module solution. There is one general module in which vendor specific modules can hook into. However, that is exclusive, there is only one vendor specific module allowed at a time. A CPU vendor check makes sure only the correct module for the underlying system gets called. Functinally in terms of patch loading itself there are no changes. This refactoring provides a basis for future implementations of other vendors' patch loaders. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 4 +- arch/x86/kernel/microcode.c | 80 +++++++++++++++++++++++---------------- arch/x86/kernel/microcode_intel.c | 50 +++++++++++++++++++----- 3 files changed, 89 insertions(+), 45 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index abb32aed7f4..f2f9f6da8c1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -51,8 +51,8 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_MICROCODE) += ucode.o -ucode-objs := microcode.o microcode_intel.o +obj-$(CONFIG_MICROCODE) += microcode.o +obj-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index c1047d7f7ed..1e42e79ca69 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -99,25 +99,22 @@ MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); -#define MICROCODE_VERSION "1.14a" +#define MICROCODE_VERSION "2.00" -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -DEFINE_MUTEX(microcode_mutex); +struct microcode_ops *microcode_ops; -struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +static DEFINE_MUTEX(microcode_mutex); +EXPORT_SYMBOL_GPL(microcode_mutex); -extern long get_next_ucode(void **mc, long offset); -extern int microcode_sanity_check(void *mc); -extern int get_matching_microcode(void *mc, int cpu); -extern void collect_cpu_info(int cpu_num); -extern int cpu_request_microcode(int cpu); -extern void microcode_fini_cpu(int cpu); -extern void apply_microcode(int cpu); -extern int apply_microcode_check_cpu(int cpu); +static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +EXPORT_SYMBOL_GPL(ucode_cpu_info); #ifdef CONFIG_MICROCODE_OLD_INTERFACE -void __user *user_buffer; /* user area microcode data buffer */ -unsigned int user_buffer_size; /* it's size */ +static void __user *user_buffer; /* user area microcode data buffer */ +EXPORT_SYMBOL_GPL(user_buffer); +static unsigned int user_buffer_size; /* it's size */ +EXPORT_SYMBOL_GPL(user_buffer_size); static int do_microcode_update (void) { @@ -130,8 +127,8 @@ static int do_microcode_update (void) old = current->cpus_allowed; - while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_sanity_check(new_mc); + while ((cursor = microcode_ops->get_next_ucode(&new_mc, cursor)) > 0) { + error = microcode_ops->microcode_sanity_check(new_mc); if (error) goto out; /* @@ -145,11 +142,12 @@ static int do_microcode_update (void) continue; cpumask_of_cpu_ptr_next(newmask, cpu); set_cpus_allowed_ptr(current, newmask); - error = get_maching_microcode(new_mc, cpu); + error = microcode_ops->get_matching_microcode(new_mc, + cpu); if (error < 0) goto out; if (error == 1) - apply_microcode(cpu); + microcode_ops->apply_microcode(cpu); } vfree(new_mc); } @@ -232,7 +230,8 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); #endif /* fake device for request_firmware */ -struct platform_device *microcode_pdev; +static struct platform_device *microcode_pdev; +EXPORT_SYMBOL_GPL(microcode_pdev); static void microcode_init_cpu(int cpu, int resume) { @@ -244,9 +243,9 @@ static void microcode_init_cpu(int cpu, int resume) set_cpus_allowed_ptr(current, newmask); mutex_lock(µcode_mutex); - collect_cpu_info(cpu); + microcode_ops->collect_cpu_info(cpu); if (uci->valid && system_state == SYSTEM_RUNNING && !resume) - cpu_request_microcode(cpu); + microcode_ops->cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); set_cpus_allowed_ptr(current, &old); } @@ -274,7 +273,7 @@ static ssize_t reload_store(struct sys_device *dev, mutex_lock(µcode_mutex); if (uci->valid) - err = cpu_request_microcode(cpu); + err = microcode_ops->cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); put_online_cpus(); set_cpus_allowed_ptr(current, &old); @@ -349,7 +348,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) return 0; pr_debug("microcode: CPU%d removed\n", cpu); - microcode_fini_cpu(cpu); + microcode_ops->microcode_fini_cpu(cpu); sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); return 0; } @@ -362,7 +361,7 @@ static int mc_sysdev_resume(struct sys_device *dev) return 0; pr_debug("microcode: CPU%d resumed\n", cpu); /* only CPU 0 will apply ucode here */ - apply_microcode(0); + microcode_ops->apply_microcode(0); return 0; } @@ -382,7 +381,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) switch (action) { case CPU_UP_CANCELED_FROZEN: /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); + microcode_ops->microcode_fini_cpu(cpu); break; case CPU_ONLINE: case CPU_DOWN_FAILED: @@ -390,9 +389,9 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) break; case CPU_ONLINE_FROZEN: /* System-wide resume is in progress, try to apply microcode */ - if (apply_microcode_check_cpu(cpu)) { + if (microcode_ops->apply_microcode_check_cpu(cpu)) { /* The application of microcode failed */ - microcode_fini_cpu(cpu); + microcode_ops->microcode_fini_cpu(cpu); __mc_sysdev_add(sys_dev, 1); break; } @@ -416,12 +415,17 @@ static struct notifier_block __refdata mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; -static int __init microcode_init (void) +static int microcode_init(void *opaque, struct module *module) { + struct microcode_ops *ops = (struct microcode_ops *)opaque; int error; - printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); + if (microcode_ops) { + printk(KERN_ERR "microcode: already loaded the other module\n"); + return -EEXIST; + } + + microcode_ops = ops; error = microcode_dev_init(); if (error) @@ -443,8 +447,15 @@ static int __init microcode_init (void) } register_hotcpu_notifier(&mc_cpu_notifier); + + printk(KERN_INFO + "Microcode Update Driver: v" MICROCODE_VERSION + " " + " \n"); + return 0; } +EXPORT_SYMBOL_GPL(microcode_init); static void __exit microcode_exit (void) { @@ -457,7 +468,10 @@ static void __exit microcode_exit (void) put_online_cpus(); platform_device_unregister(microcode_pdev); -} -module_init(microcode_init) -module_exit(microcode_exit) + microcode_ops = NULL; + + printk(KERN_INFO + "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); +} +EXPORT_SYMBOL_GPL(microcode_exit); diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index ca9861bf067..831db5363db 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -127,7 +127,7 @@ extern struct mutex microcode_mutex; extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -void collect_cpu_info(int cpu_num) +static void collect_cpu_info(int cpu_num) { struct cpuinfo_x86 *c = &cpu_data(cpu_num); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; @@ -175,7 +175,7 @@ static inline int microcode_update_match(int cpu_num, return 1; } -int microcode_sanity_check(void *mc) +static int microcode_sanity_check(void *mc) { struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header = NULL; @@ -259,7 +259,7 @@ int microcode_sanity_check(void *mc) * return 1 - found update * return < 0 - error */ -int get_matching_microcode(void *mc, int cpu) +static int get_matching_microcode(void *mc, int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct microcode_header_intel *mc_header = mc; @@ -288,7 +288,8 @@ int get_matching_microcode(void *mc, int cpu) return 0; find: pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", cpu, mc_header->rev, uci->rev); + " version 0x%x (current=0x%x)\n", + cpu, mc_header->rev, uci->rev); new_mc = vmalloc(total_size); if (!new_mc) { printk(KERN_ERR "microcode: error! Can not allocate memory\n"); @@ -303,7 +304,7 @@ find: return 1; } -void apply_microcode(int cpu) +static void apply_microcode(int cpu) { unsigned long flags; unsigned int val[2]; @@ -347,7 +348,7 @@ void apply_microcode(int cpu) extern void __user *user_buffer; /* user area microcode data buffer */ extern unsigned int user_buffer_size; /* it's size */ -long get_next_ucode(void **mc, long offset) +static long get_next_ucode(void **mc, long offset) { struct microcode_header_intel mc_header; unsigned long total_size; @@ -406,7 +407,7 @@ static long get_next_ucode_from_buffer(void **mc, const u8 *buf, /* fake device for request_firmware */ extern struct platform_device *microcode_pdev; -int cpu_request_microcode(int cpu) +static int cpu_request_microcode(int cpu) { char name[30]; struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -455,7 +456,7 @@ int cpu_request_microcode(int cpu) return error; } -int apply_microcode_check_cpu(int cpu) +static int apply_microcode_check_cpu(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -504,13 +505,42 @@ int apply_microcode_check_cpu(int cpu) return err; } -void microcode_fini_cpu(int cpu) +static void microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; mutex_lock(µcode_mutex); uci->valid = 0; - kfree(uci->mc.mc_intel); + vfree(uci->mc.mc_intel); uci->mc.mc_intel = NULL; mutex_unlock(µcode_mutex); } + +static struct microcode_ops microcode_intel_ops = { + .get_next_ucode = get_next_ucode, + .get_matching_microcode = get_matching_microcode, + .microcode_sanity_check = microcode_sanity_check, + .apply_microcode_check_cpu = apply_microcode_check_cpu, + .cpu_request_microcode = cpu_request_microcode, + .collect_cpu_info = collect_cpu_info, + .apply_microcode = apply_microcode, + .microcode_fini_cpu = microcode_fini_cpu, +}; + +static int __init microcode_intel_module_init(void) +{ + struct cpuinfo_x86 *c = &cpu_data(get_cpu()); + + if (c->x86_vendor == X86_VENDOR_INTEL) + return microcode_init(µcode_intel_ops, THIS_MODULE); + else + return -ENODEV; +} + +static void __exit microcode_intel_module_exit(void) +{ + microcode_exit(); +} + +module_init(microcode_intel_module_init) +module_exit(microcode_intel_module_exit) -- cgit v1.2.3 From 80cc9f1020f49c9e5b50898c102fd444de70a0a3 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:22 +0200 Subject: x86: AMD microcode patch loading support This patch introduces microcode patch loading for AMD processors. It is based on previous corresponding work for Intel processors. It hooks into the general patch loading module. Main difference is that a container file format is used to hold all patch data for multiple processors as well as an equivalent CPU table, which comes seperately, as opposed to Intel's microcode patching solution. Kconfig and Makefile have been changed provice config and build option for new source file. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/microcode_amd.c | 521 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 522 insertions(+) create mode 100644 arch/x86/kernel/microcode_amd.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f2f9f6da8c1..be454f348c3 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o +obj-$(CONFIG_MICROCODE_AMD) += microcode_amd.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c new file mode 100644 index 00000000000..db199e3fdd1 --- /dev/null +++ b/arch/x86/kernel/microcode_amd.c @@ -0,0 +1,521 @@ +/* + * AMD CPU Microcode Update Driver for Linux + * Copyright (C) 2008 Advanced Micro Devices Inc. + * + * Author: Peter Oruba + * + * Based on work by: + * Tigran Aivazian + * + * This driver allows to upgrade microcode on AMD + * family 0x10 and 0x11 processors. + * + * Licensed unter the terms of the GNU General Public + * License version 2. See file COPYING for details. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_DESCRIPTION("AMD Microcode Update Driver"); +MODULE_AUTHOR("Peter Oruba "); +MODULE_LICENSE("GPLv2"); + +#define UCODE_MAGIC 0x00414d44 +#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 +#define UCODE_UCODE_TYPE 0x00000001 + +#define UCODE_MAX_SIZE (2048) +#define DEFAULT_UCODE_DATASIZE (896) /* 896 bytes */ +#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) /* 64 bytes */ +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 960 bytes */ +#define DWSIZE (sizeof(u32)) +/* For now we support a fixed ucode total size only */ +#define get_totalsize(mc) \ + ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \ + + MC_HEADER_SIZE) + +extern int microcode_init(void *opaque, struct module *module); +extern void microcode_exit(void); + +/* serialize access to the physical write */ +static DEFINE_SPINLOCK(microcode_update_lock); + +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +extern struct mutex (microcode_mutex); + +struct equiv_cpu_entry *equiv_cpu_table; + +extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; + +static void collect_cpu_info_amd(int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + /* We should bind the task to the CPU */ + BUG_ON(raw_smp_processor_id() != cpu); + uci->rev = 0; + uci->pf = 0; + uci->mc.mc_amd = NULL; + uci->valid = 1; + + if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { + printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n", + cpu); + uci->valid = 0; + return; + } + + asm volatile("movl %1, %%ecx; rdmsr" + : "=a" (uci->rev) + : "i" (0x0000008B) : "ecx"); + + printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n", + uci->rev); +} + +static int get_matching_microcode_amd(void *mc, int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct microcode_header_amd *mc_header = mc; + unsigned long total_size = get_totalsize(mc_header); + void *new_mc; + struct pci_dev *nb_pci_dev, *sb_pci_dev; + unsigned int current_cpu_id; + unsigned int equiv_cpu_id = 0x00; + unsigned int i = 0; + + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + + /* This is a tricky part. We might be called from a write operation */ + /* to the device file instead of the usual process of firmware */ + /* loading. This routine needs to be able to distinguish both */ +/* cases. This is done by checking if there alread is a equivalent */ + /* CPU table installed. If not, we're written through */ + /* /dev/cpu/microcode. */ +/* Since we ignore all checks. The error case in which going through */ +/* firmware loading and that table is not loaded has already been */ + /* checked earlier. */ + if (equiv_cpu_table == NULL) { + printk(KERN_INFO "microcode: CPU%d microcode update with " + "version 0x%x (current=0x%x)\n", + cpu, mc_header->patch_id, uci->rev); + goto out; + } + + current_cpu_id = cpuid_eax(0x00000001); + + while (equiv_cpu_table[i].installed_cpu != 0) { + if (current_cpu_id == equiv_cpu_table[i].installed_cpu) { + equiv_cpu_id = equiv_cpu_table[i].equiv_cpu; + break; + } + i++; + } + + if (!equiv_cpu_id) { + printk(KERN_ERR "microcode: CPU%d cpu_id " + "not found in equivalent cpu table \n", cpu); + return 0; + } + + if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) { + printk(KERN_ERR + "microcode: CPU%d patch does not match " + "(patch is %x, cpu extended is %x) \n", + cpu, mc_header->processor_rev_id[0], + (equiv_cpu_id & 0xff)); + return 0; + } + + if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) { + printk(KERN_ERR "microcode: CPU%d patch does not match " + "(patch is %x, cpu base id is %x) \n", + cpu, mc_header->processor_rev_id[1], + ((equiv_cpu_id >> 16) & 0xff)); + + return 0; + } + + /* ucode may be northbridge specific */ + if (mc_header->nb_dev_id) { + nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, + (mc_header->nb_dev_id & 0xff), + NULL); + if ((!nb_pci_dev) || + (mc_header->nb_rev_id != nb_pci_dev->revision)) { + printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu); + pci_dev_put(nb_pci_dev); + return 0; + } + pci_dev_put(nb_pci_dev); + } + + /* ucode may be southbridge specific */ + if (mc_header->sb_dev_id) { + sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, + (mc_header->sb_dev_id & 0xff), + NULL); + if ((!sb_pci_dev) || + (mc_header->sb_rev_id != sb_pci_dev->revision)) { + printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu); + pci_dev_put(sb_pci_dev); + return 0; + } + pci_dev_put(sb_pci_dev); + } + + if (mc_header->patch_id <= uci->rev) + return 0; + + printk(KERN_INFO "microcode: CPU%d found a matching microcode " + "update with version 0x%x (current=0x%x)\n", + cpu, mc_header->patch_id, uci->rev); + +out: + new_mc = vmalloc(UCODE_MAX_SIZE); + if (!new_mc) { + printk(KERN_ERR "microcode: error, can't allocate memory\n"); + return -ENOMEM; + } + memset(new_mc, 0, UCODE_MAX_SIZE); + + /* free previous update file */ + vfree(uci->mc.mc_amd); + + memcpy(new_mc, mc, total_size); + + uci->mc.mc_amd = new_mc; + return 1; +} + +static void apply_microcode_amd(int cpu) +{ + unsigned long flags; + unsigned int eax, edx; + unsigned int rev; + int cpu_num = raw_smp_processor_id(); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + + /* We should bind the task to the CPU */ + BUG_ON(cpu_num != cpu); + + if (uci->mc.mc_amd == NULL) + return; + + spin_lock_irqsave(µcode_update_lock, flags); + + edx = (unsigned int)(((unsigned long) + &(uci->mc.mc_amd->hdr.data_code)) >> 32); + eax = (unsigned int)(((unsigned long) + &(uci->mc.mc_amd->hdr.data_code)) & 0xffffffffL); + + asm volatile("movl %0, %%ecx; wrmsr" : + : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx"); + + /* get patch id after patching */ + asm volatile("movl %1, %%ecx; rdmsr" + : "=a" (rev) + : "i" (0x0000008B) : "ecx"); + + spin_unlock_irqrestore(µcode_update_lock, flags); + + /* check current patch id and patch's id for match */ + if (rev != uci->mc.mc_amd->hdr.patch_id) { + printk(KERN_ERR "microcode: CPU%d update from revision " + "0x%x to 0x%x failed\n", cpu_num, + uci->mc.mc_amd->hdr.patch_id, rev); + return; + } + + printk(KERN_INFO "microcode: CPU%d updated from revision " + "0x%x to 0x%x \n", + cpu_num, uci->rev, uci->mc.mc_amd->hdr.patch_id); + + uci->rev = rev; +} + +#ifdef CONFIG_MICROCODE_OLD_INTERFACE +extern void __user *user_buffer; /* user area microcode data buffer */ +extern unsigned int user_buffer_size; /* it's size */ + +static long get_next_ucode_amd(void **mc, long offset) +{ + struct microcode_header_amd mc_header; + unsigned long total_size; + + /* No more data */ + if (offset >= user_buffer_size) + return 0; + if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + return -EFAULT; + } + total_size = get_totalsize(&mc_header); + if (offset + total_size > user_buffer_size) { + printk(KERN_ERR "microcode: error! Bad total size in microcode " + "data file\n"); + return -EINVAL; + } + *mc = vmalloc(UCODE_MAX_SIZE); + if (!*mc) + return -ENOMEM; + memset(*mc, 0, UCODE_MAX_SIZE); + + if (copy_from_user(*mc, user_buffer + offset, total_size)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + vfree(*mc); + return -EFAULT; + } + return offset + total_size; +} +#else +#define get_next_ucode_amd() NULL +#endif + +static long get_next_ucode_from_buffer_amd(void **mc, void *buf, + unsigned long size, long offset) +{ + struct microcode_header_amd *mc_header; + unsigned long total_size; + unsigned char *buf_pos = buf; + + /* No more data */ + if (offset >= size) + return 0; + + if (buf_pos[offset] != UCODE_UCODE_TYPE) { + printk(KERN_ERR "microcode: error! " + "Wrong microcode payload type field\n"); + return -EINVAL; + } + + mc_header = (struct microcode_header_amd *)(&buf_pos[offset+8]); + + total_size = (unsigned long) (buf_pos[offset+4] + + (buf_pos[offset+5] << 8)); + + printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n", + size, total_size, offset); + + if (offset + total_size > size) { + printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); + return -EINVAL; + } + + *mc = vmalloc(UCODE_MAX_SIZE); + if (!*mc) { + printk(KERN_ERR "microcode: error! " + "Can not allocate memory for microcode patch\n"); + return -ENOMEM; + } + + memset(*mc, 0, UCODE_MAX_SIZE); + memcpy(*mc, buf + offset + 8, total_size); + + return offset + total_size + 8; +} + +static long install_equiv_cpu_table(void *buf, unsigned long size, long offset) +{ + unsigned int *buf_pos = buf; + + /* No more data */ + if (offset >= size) + return 0; + + if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE) { + printk(KERN_ERR "microcode: error! " + "Wrong microcode equivalnet cpu table type field\n"); + return 0; + } + + if (size == 0) { + printk(KERN_ERR "microcode: error! " + "Wrong microcode equivalnet cpu table length\n"); + return 0; + } + + equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); + if (!equiv_cpu_table) { + printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n"); + return 0; + } + + memset(equiv_cpu_table, 0, size); + memcpy(equiv_cpu_table, &buf_pos[3], size); + + return size + 12; /* add header length */ +} + +/* fake device for request_firmware */ +extern struct platform_device *microcode_pdev; + +static int cpu_request_microcode_amd(int cpu) +{ + char name[30]; + const struct firmware *firmware; + void *buf; + unsigned int *buf_pos; + unsigned long size; + long offset = 0; + int error; + void *mc; + + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + + sprintf(name, "amd-ucode/microcode_amd.bin"); + error = request_firmware(&firmware, "amd-ucode/microcode_amd.bin", + µcode_pdev->dev); + if (error) { + printk(KERN_ERR "microcode: ucode data file %s load failed\n", + name); + return error; + } + + buf_pos = buf = firmware->data; + size = firmware->size; + + if (buf_pos[0] != UCODE_MAGIC) { + printk(KERN_ERR "microcode: error! Wrong microcode patch file magic\n"); + return -EINVAL; + } + + offset = install_equiv_cpu_table(buf, buf_pos[2], offset); + + if (!offset) { + printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); + return -EINVAL; + } + + while ((offset = + get_next_ucode_from_buffer_amd(&mc, buf, size, offset)) > 0) { + error = get_matching_microcode_amd(mc, cpu); + if (error < 0) + break; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + if (error == 1) { + apply_microcode_amd(cpu); + error = 0; + } + vfree(mc); + } + if (offset > 0) { + vfree(mc); + vfree(equiv_cpu_table); + equiv_cpu_table = NULL; + } + if (offset < 0) + error = offset; + release_firmware(firmware); + + return error; +} + +static int apply_microcode_check_cpu_amd(int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + unsigned int rev; + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + int err = 0; + + /* Check if the microcode is available */ + if (!uci->mc.mc_amd) + return 0; + + old = current->cpus_allowed; + set_cpus_allowed(current, newmask); + + /* Check if the microcode we have in memory matches the CPU */ + if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 16) + err = -EINVAL; + + if (!err) { + asm volatile("movl %1, %%ecx; rdmsr" + : "=a" (rev) + : "i" (0x0000008B) : "ecx"); + + if (uci->rev != rev) + err = -EINVAL; + } + + if (!err) + apply_microcode_amd(cpu); + else + printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" + " rev=0x%x\n", + cpu, uci->rev); + + set_cpus_allowed(current, old); + return err; +} + +static void microcode_fini_cpu_amd(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + mutex_lock(µcode_mutex); + uci->valid = 0; + vfree(uci->mc.mc_amd); + uci->mc.mc_amd = NULL; + mutex_unlock(µcode_mutex); +} + +static struct microcode_ops microcode_amd_ops = { + .get_next_ucode = get_next_ucode_amd, + .get_matching_microcode = get_matching_microcode_amd, + .microcode_sanity_check = NULL, + .apply_microcode_check_cpu = apply_microcode_check_cpu_amd, + .cpu_request_microcode = cpu_request_microcode_amd, + .collect_cpu_info = collect_cpu_info_amd, + .apply_microcode = apply_microcode_amd, + .microcode_fini_cpu = microcode_fini_cpu_amd, +}; + +static int __init microcode_amd_module_init(void) +{ + struct cpuinfo_x86 *c = &cpu_data(get_cpu()); + + equiv_cpu_table = NULL; + if (c->x86_vendor == X86_VENDOR_AMD) + return microcode_init(µcode_amd_ops, THIS_MODULE); + else + return -ENODEV; +} + +static void __exit microcode_amd_module_exit(void) +{ + microcode_exit(); +} + +module_init(microcode_amd_module_init) +module_exit(microcode_amd_module_exit) -- cgit v1.2.3 From 45b1e23eca1c53fa79a611a2bc8c93697ede6c97 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 29 Jul 2008 09:42:17 +0200 Subject: x86, microcode support: fix build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/microcode.c:412: error: static declaration of ‘microcode_init’ follows non-static declaration include/asm/microcode.h:1: error: previous declaration of ‘microcode_init’ was here arch/x86/kernel/microcode.c:454: error: static declaration of ‘microcode_exit’ follows non-static declaration include/asm/microcode.h:2: error: previous declaration of ‘microcode_exit’ was here Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 1e42e79ca69..9a881845b35 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -415,7 +415,7 @@ static struct notifier_block __refdata mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; -static int microcode_init(void *opaque, struct module *module) +int microcode_init(void *opaque, struct module *module) { struct microcode_ops *ops = (struct microcode_ops *)opaque; int error; @@ -457,7 +457,7 @@ static int microcode_init(void *opaque, struct module *module) } EXPORT_SYMBOL_GPL(microcode_init); -static void __exit microcode_exit (void) +void __exit microcode_exit (void) { microcode_dev_exit(); -- cgit v1.2.3 From 224e946b81166d732f3e9b414cb69612072fb500 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 29 Jul 2008 09:52:55 +0200 Subject: x86, microcode: fix symbol exports fix tons of build errors: arch/x86/kernel/built-in.o: In function `microcode_fini_cpu': microcode_intel.c:(.text+0x11598): undefined reference to `microcode_mutex' microcode_intel.c:(.text+0x115a4): undefined reference to `ucode_cpu_info' microcode_intel.c:(.text+0x115ae): undefined reference to `ucode_cpu_info' microcode_intel.c:(.text+0x115bc): undefined reference to `microcode_mutex' [...] Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 9a881845b35..758b958a663 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -104,16 +104,16 @@ MODULE_LICENSE("GPL"); struct microcode_ops *microcode_ops; /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DEFINE_MUTEX(microcode_mutex); +DEFINE_MUTEX(microcode_mutex); EXPORT_SYMBOL_GPL(microcode_mutex); -static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; EXPORT_SYMBOL_GPL(ucode_cpu_info); #ifdef CONFIG_MICROCODE_OLD_INTERFACE -static void __user *user_buffer; /* user area microcode data buffer */ +void __user *user_buffer; /* user area microcode data buffer */ EXPORT_SYMBOL_GPL(user_buffer); -static unsigned int user_buffer_size; /* it's size */ +unsigned int user_buffer_size; /* it's size */ EXPORT_SYMBOL_GPL(user_buffer_size); static int do_microcode_update (void) @@ -230,7 +230,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); #endif /* fake device for request_firmware */ -static struct platform_device *microcode_pdev; +struct platform_device *microcode_pdev; EXPORT_SYMBOL_GPL(microcode_pdev); static void microcode_init_cpu(int cpu, int resume) -- cgit v1.2.3 From 5d7b605245b1aa1a9cd6549b1f57d69273eb0c37 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 29 Jul 2008 10:07:36 +0200 Subject: x86, microcode: fix module license string fix: FATAL: modpost: GPL-incompatible module microcode_amd.ko uses GPL-only symbol 'set_cpus_allowed_ptr' Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index db199e3fdd1..fd9e68e8889 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -40,7 +40,7 @@ MODULE_DESCRIPTION("AMD Microcode Update Driver"); MODULE_AUTHOR("Peter Oruba "); -MODULE_LICENSE("GPLv2"); +MODULE_LICENSE("GPL v2"); #define UCODE_MAGIC 0x00414d44 #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 -- cgit v1.2.3 From a648bf4632628c787abb0514277f2a231fca39ca Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:18 -0700 Subject: x86, xsave: xsave cpuid feature bits Add xsave CPU feature bits. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/feature_names.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index 0bf4d37a048..74154722565 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -46,7 +46,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = { "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, "xsave", NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", -- cgit v1.2.3 From dc1e35c6e95e8923cf1d3510438b63c600fee1e2 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:19 -0700 Subject: x86, xsave: enable xsave/xrstor on cpus with xsave support Enables xsave/xrstor by turning on cr4.osxsave on cpu's which have the xsave support. For now, features that OS supports/enabled are FP and SSE. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/cpu/common.c | 8 ++++ arch/x86/kernel/i387.c | 12 ++++++ arch/x86/kernel/traps_32.c | 1 - arch/x86/kernel/traps_64.c | 4 -- arch/x86/kernel/xsave.c | 87 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 108 insertions(+), 6 deletions(-) create mode 100644 arch/x86/kernel/xsave.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a07ec14f331..d6ea91abaeb 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -38,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o -obj-y += i387.o +obj-y += i387.o xsave.o obj-y += ptrace.o obj-y += ds.o obj-$(CONFIG_X86_32) += tls.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa3..fabbcb7020f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -712,6 +712,14 @@ void __cpuinit cpu_init(void) current_thread_info()->status = 0; clear_used_math(); mxcsr_feature_mask_init(); + + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + + xsave_init(); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index eb9ddd8efb8..e22a9a9dce8 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -61,6 +61,11 @@ void __init init_thread_xstate(void) return; } + if (cpu_has_xsave) { + xsave_cntxt_init(); + return; + } + if (cpu_has_fxsr) xstate_size = sizeof(struct i387_fxsave_struct); #ifdef CONFIG_X86_32 @@ -83,6 +88,13 @@ void __cpuinit fpu_init(void) write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + xsave_init(); + mxcsr_feature_mask_init(); /* clean state in init */ current_thread_info()->status = 0; diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 03df8e45e5a..da5a5964fcc 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1228,7 +1228,6 @@ void __init trap_init(void) set_bit(SYSCALL_VECTOR, used_vectors); - init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 513caaca711..3580a7938a2 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1172,10 +1172,6 @@ void __init trap_init(void) #ifdef CONFIG_IA32_EMULATION set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); #endif - /* - * initialize the per thread extended state: - */ - init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c new file mode 100644 index 00000000000..c68b7c4ca24 --- /dev/null +++ b/arch/x86/kernel/xsave.c @@ -0,0 +1,87 @@ +/* + * xsave/xrstor support. + * + * Author: Suresh Siddha + */ +#include +#include +#include + +/* + * Supported feature mask by the CPU and the kernel. + */ +unsigned int pcntxt_hmask, pcntxt_lmask; + +/* + * Represents init state for the supported extended state. + */ +struct xsave_struct *init_xstate_buf; + +/* + * Enable the extended processor state save/restore feature + */ +void __cpuinit xsave_init(void) +{ + if (!cpu_has_xsave) + return; + + set_in_cr4(X86_CR4_OSXSAVE); + + /* + * Enable all the features that the HW is capable of + * and the Linux kernel is aware of. + * + * xsetbv(); + */ + asm volatile(".byte 0x0f,0x01,0xd1" : : "c" (0), + "a" (pcntxt_lmask), "d" (pcntxt_hmask)); +} + +/* + * setup the xstate image representing the init state + */ +void setup_xstate_init(void) +{ + init_xstate_buf = alloc_bootmem(xstate_size); + init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; +} + +/* + * Enable and initialize the xsave feature. + */ +void __init xsave_cntxt_init(void) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + + pcntxt_lmask = eax; + pcntxt_hmask = edx; + + if ((pcntxt_lmask & XSTATE_FPSSE) != XSTATE_FPSSE) { + printk(KERN_ERR "FP/SSE not shown under xsave features %x\n", + pcntxt_lmask); + BUG(); + } + + /* + * for now OS knows only about FP/SSE + */ + pcntxt_lmask = pcntxt_lmask & XCNTXT_LMASK; + pcntxt_hmask = pcntxt_hmask & XCNTXT_HMASK; + + xsave_init(); + + /* + * Recompute the context size for enabled features + */ + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + + xstate_size = ebx; + + setup_xstate_init(); + + printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%Lx, " + "cntxt size 0x%x\n", + (pcntxt_lmask | ((u64) pcntxt_hmask << 32)), xstate_size); +} -- cgit v1.2.3 From b359e8a434cc3d09847010fc4aeccf48d69740e4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:20 -0700 Subject: x86, xsave: context switch support using xsave/xrstor Uses xsave/xrstor (instead of traditional fxsave/fxrstor) in context switch when available. Introduces TS_XSAVE flag, which determine the need to use xsave/xrstor instructions during context switch instead of the legacy fxsave/fxrstor instructions. Thread-synchronous status word is already in L1 cache during this code patch and thus minimizes the performance penality compared to (cpu_has_xsave) checks. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 5 ++++- arch/x86/kernel/i387.c | 5 ++++- arch/x86/kernel/traps_64.c | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fabbcb7020f..6c2b9e756db 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -709,7 +709,10 @@ void __cpuinit cpu_init(void) /* * Force FPU initialization: */ - current_thread_info()->status = 0; + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; clear_used_math(); mxcsr_feature_mask_init(); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e22a9a9dce8..b778e17e4b0 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -97,7 +97,10 @@ void __cpuinit fpu_init(void) mxcsr_feature_mask_init(); /* clean state in init */ - current_thread_info()->status = 0; + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; clear_used_math(); } #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3580a7938a2..38eb76156a4 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1134,7 +1134,7 @@ asmlinkage void math_state_restore(void) /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ - if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { + if (unlikely(restore_fpu_checking(me))) { stts(); force_sig(SIGSEGV, me); return; -- cgit v1.2.3 From 3c1c7f101426cb2ecc79d817a8a65928965fc860 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:21 -0700 Subject: x86, xsave: dynamically allocate sigframes fpstate instead of static allocation dynamically allocate fpstate on the stack, instead of static allocation in the current sigframe layout on the user stack. This will allow the fpstate structure to grow in the future, which includes extended state information supporting xsave/xrstor. signal handlers will be able to access the fpstate pointer from the sigcontext structure asusual, with no change. For the non RT sigframe's (which are supported only for 32bit apps), current static fpstate layout in the sigframe will be unused(so that we don't change the extramask[] offset in the sigframe and thus prevent breaking app's which modify extramask[]). Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 2 ++ arch/x86/kernel/sigframe.h | 14 ++++++++++++-- arch/x86/kernel/signal_32.c | 18 +++++++++++++----- arch/x86/kernel/signal_64.c | 2 +- arch/x86/kernel/xsave.c | 4 ++++ 5 files changed, 32 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index b778e17e4b0..51fb288a2c9 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -24,6 +24,7 @@ # define save_i387_ia32 save_i387 # define restore_i387_ia32 restore_i387 # define _fpstate_ia32 _fpstate +# define sig_xstate_ia32_size sig_xstate_size # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct #endif @@ -36,6 +37,7 @@ static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; +unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); static struct i387_fxsave_struct fx_scratch __cpuinitdata; void __cpuinit mxcsr_feature_mask_init(void) diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h index 72bbb519d2d..6dd7e2b70a4 100644 --- a/arch/x86/kernel/sigframe.h +++ b/arch/x86/kernel/sigframe.h @@ -3,9 +3,18 @@ struct sigframe { char __user *pretcode; int sig; struct sigcontext sc; - struct _fpstate fpstate; + /* + * fpstate is unused. fpstate is moved/allocated after + * retcode[] below. This movement allows to have the FP state and the + * future state extensions (xsave) stay together. + * And at the same time retaining the unused fpstate, prevents changing + * the offset of extramask[] in the sigframe and thus prevent any + * legacy application accessing/modifying it. + */ + struct _fpstate fpstate_unused; unsigned long extramask[_NSIG_WORDS-1]; char retcode[8]; + /* fp state follows here */ }; struct rt_sigframe { @@ -15,13 +24,14 @@ struct rt_sigframe { void __user *puc; struct siginfo info; struct ucontext uc; - struct _fpstate fpstate; char retcode[8]; + /* fp state follows here */ }; #else struct rt_sigframe { char __user *pretcode; struct ucontext uc; struct siginfo info; + /* fp state follows here */ }; #endif diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 6fb5bcdd893..19a7a5669b5 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -306,7 +306,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, * Determine which stack to use.. */ static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) +get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, + struct _fpstate **fpstate) { unsigned long sp; @@ -332,6 +333,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) sp = (unsigned long) ka->sa.sa_restorer; } + if (used_math()) { + sp = sp - sig_xstate_size; + *fpstate = (struct _fpstate *) sp; + } + sp -= frame_size; /* * Align the stack pointer according to the i386 ABI, @@ -350,8 +356,9 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, void __user *restorer; int err = 0; int usig; + struct _fpstate __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -366,7 +373,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, if (err) goto give_sigsegv; - err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); if (err) goto give_sigsegv; @@ -427,8 +434,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, void __user *restorer; int err = 0; int usig; + struct _fpstate __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -453,7 +461,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ca316b5b742..0deab8eff33 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -281,7 +281,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct task_struct *me = current; if (used_math()) { - fp = get_stack(ka, regs, sizeof(struct _fpstate)); + fp = get_stack(ka, regs, sig_xstate_size); frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index c68b7c4ca24..7ad169e3352 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -17,6 +17,10 @@ unsigned int pcntxt_hmask, pcntxt_lmask; */ struct xsave_struct *init_xstate_buf; +#ifdef CONFIG_X86_64 +unsigned int sig_xstate_size = sizeof(struct _fpstate); +#endif + /* * Enable the extended processor state save/restore feature */ -- cgit v1.2.3 From ab5137015fed9b948fe835a2d99a4cfbd50a0c40 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:22 -0700 Subject: x86, xsave: reorganization of signal save/restore fpstate code layout move 64bit routines that saves/restores fpstate in/from user stack from signal_64.c to xsave.c restore_i387_xstate() now handles the condition when user passes NULL fpstate. Other misc changes for prepartion of xsave/xrstor sigcontext support. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 44 +++++++++++++++++------- arch/x86/kernel/signal_32.c | 28 ++++----------- arch/x86/kernel/signal_64.c | 83 ++------------------------------------------- arch/x86/kernel/xsave.c | 79 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+), 114 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 51fb288a2c9..7daf3a011dd 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -21,9 +21,10 @@ # include # include #else -# define save_i387_ia32 save_i387 -# define restore_i387_ia32 restore_i387 +# define save_i387_xstate_ia32 save_i387_xstate +# define restore_i387_xstate_ia32 restore_i387_xstate # define _fpstate_ia32 _fpstate +# define _xstate_ia32 _xstate # define sig_xstate_ia32_size sig_xstate_size # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct @@ -424,7 +425,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) struct task_struct *tsk = current; struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; - unlazy_fpu(tsk); fp->status = fp->swd; if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) return -1; @@ -438,8 +438,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) struct user_i387_ia32_struct env; int err = 0; - unlazy_fpu(tsk); - convert_from_fxsr(&env, tsk); if (__copy_to_user(buf, &env, sizeof(env))) return -1; @@ -455,10 +453,16 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) return 1; } -int save_i387_ia32(struct _fpstate_ia32 __user *buf) +int save_i387_xstate_ia32(void __user *buf) { + struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; + struct task_struct *tsk = current; + if (!used_math()) return 0; + + if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) + return -EACCES; /* * This will cause a "finit" to be triggered by the next * attempted FPU operation by the 'current' process. @@ -468,13 +472,15 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf) if (!HAVE_HWFP) { return fpregs_soft_get(current, NULL, 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) ? -1 : 1; + NULL, fp) ? -1 : 1; } + unlazy_fpu(tsk); + if (cpu_has_fxsr) - return save_i387_fxsave(buf); + return save_i387_fxsave(fp); else - return save_i387_fsave(buf); + return save_i387_fsave(fp); } static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) @@ -502,14 +508,26 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) return 0; } -int restore_i387_ia32(struct _fpstate_ia32 __user *buf) +int restore_i387_xstate_ia32(void __user *buf) { int err; struct task_struct *tsk = current; + struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; if (HAVE_HWFP) clear_fpu(tsk); + if (!buf) { + if (used_math()) { + clear_fpu(tsk); + clear_used_math(); + } + + return 0; + } else + if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) + return -EACCES; + if (!used_math()) { err = init_fpu(tsk); if (err) @@ -518,13 +536,13 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf) if (HAVE_HWFP) { if (cpu_has_fxsr) - err = restore_i387_fxsave(buf); + err = restore_i387_fxsave(fp); else - err = restore_i387_fsave(buf); + err = restore_i387_fsave(fp); } else { err = fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; + NULL, fp) != 0; } set_used_math(); diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 19a7a5669b5..690cc616ac0 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -159,28 +159,14 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, } { - struct _fpstate __user *buf; + void __user *buf; err |= __get_user(buf, &sc->fpstate); - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); - } else { - struct task_struct *me = current; - - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } - } + err |= restore_i387_xstate(buf); } err |= __get_user(*pax, &sc->ax); return err; - -badframe: - return 1; } asmlinkage unsigned long sys_sigreturn(unsigned long __unused) @@ -262,7 +248,7 @@ badframe: * Set up a signal frame. */ static int -setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, +setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned long mask) { int tmp, err = 0; @@ -289,7 +275,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, err |= __put_user(regs->sp, &sc->sp_at_signal); err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); - tmp = save_i387(fpstate); + tmp = save_i387_xstate(fpstate); if (tmp < 0) err = 1; else @@ -307,7 +293,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, */ static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - struct _fpstate **fpstate) + void **fpstate) { unsigned long sp; @@ -356,7 +342,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, void __user *restorer; int err = 0; int usig; - struct _fpstate __user *fpstate = NULL; + void __user *fpstate = NULL; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); @@ -434,7 +420,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, void __user *restorer; int err = 0; int usig; - struct _fpstate __user *fpstate = NULL; + void __user *fpstate = NULL; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 0deab8eff33..ddf6123a55c 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -53,69 +53,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, return do_sigaltstack(uss, uoss, regs->sp); } -/* - * Signal frame handlers. - */ - -static inline int save_i387(struct _fpstate __user *buf) -{ - struct task_struct *tsk = current; - int err = 0; - - BUILD_BUG_ON(sizeof(struct user_i387_struct) != - sizeof(tsk->thread.xstate->fxsave)); - - if ((unsigned long)buf % 16) - printk("save_i387: bad fpstate %p\n", buf); - - if (!used_math()) - return 0; - clear_used_math(); /* trigger finit */ - if (task_thread_info(tsk)->status & TS_USEDFPU) { - err = save_i387_checking((struct i387_fxsave_struct __user *) - buf); - if (err) - return err; - task_thread_info(tsk)->status &= ~TS_USEDFPU; - stts(); - } else { - if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, - sizeof(struct i387_fxsave_struct))) - return -1; - } - return 1; -} - -/* - * This restores directly out of user space. Exceptions are handled. - */ -static inline int restore_i387(struct _fpstate __user *buf) -{ - struct task_struct *tsk = current; - int err; - - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } - - if (!(task_thread_info(current)->status & TS_USEDFPU)) { - clts(); - task_thread_info(current)->status |= TS_USEDFPU; - } - err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf); - if (unlikely(err)) { - /* - * Encountered an error while doing the restore from the - * user buffer, clear the fpu state. - */ - clear_fpu(tsk); - clear_used_math(); - } - return err; -} - /* * Do a signal return; undo the signal stack. */ @@ -160,25 +97,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, { struct _fpstate __user * buf; err |= __get_user(buf, &sc->fpstate); - - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); - } else { - struct task_struct *me = current; - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } - } + err |= restore_i387_xstate(buf); } err |= __get_user(*pax, &sc->ax); return err; - -badframe: - return 1; } asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) @@ -276,7 +199,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { struct rt_sigframe __user *frame; - struct _fpstate __user *fp = NULL; + void __user *fp = NULL; int err = 0; struct task_struct *me = current; @@ -288,7 +211,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) goto give_sigsegv; - if (save_i387(fp) < 0) + if (save_i387_xstate(fp) < 0) err |= -1; } else frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 7ad169e3352..608e72d7ca6 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -12,6 +12,85 @@ */ unsigned int pcntxt_hmask, pcntxt_lmask; +#ifdef CONFIG_X86_64 +/* + * Signal frame handlers. + */ + +int save_i387_xstate(void __user *buf) +{ + struct task_struct *tsk = current; + int err = 0; + + if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) + return -EACCES; + + BUILD_BUG_ON(sizeof(struct user_i387_struct) != + sizeof(tsk->thread.xstate->fxsave)); + + if ((unsigned long)buf % 16) + printk("save_i387_xstate: bad fpstate %p\n", buf); + + if (!used_math()) + return 0; + clear_used_math(); /* trigger finit */ + if (task_thread_info(tsk)->status & TS_USEDFPU) { + err = save_i387_checking((struct i387_fxsave_struct __user *) + buf); + if (err) + return err; + task_thread_info(tsk)->status &= ~TS_USEDFPU; + stts(); + } else { + if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, + xstate_size)) + return -1; + } + return 1; +} + +/* + * This restores directly out of user space. Exceptions are handled. + */ +int restore_i387_xstate(void __user *buf) +{ + struct task_struct *tsk = current; + int err; + + if (!buf) { + if (used_math()) { + clear_fpu(tsk); + clear_used_math(); + } + + return 0; + } else + if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) + return -EACCES; + + if (!used_math()) { + err = init_fpu(tsk); + if (err) + return err; + } + + if (!(task_thread_info(current)->status & TS_USEDFPU)) { + clts(); + task_thread_info(current)->status |= TS_USEDFPU; + } + err = fxrstor_checking((__force struct i387_fxsave_struct *)buf); + if (unlikely(err)) { + /* + * Encountered an error while doing the restore from the + * user buffer, clear the fpu state. + */ + clear_fpu(tsk); + clear_used_math(); + } + return err; +} +#endif + /* * Represents init state for the supported extended state. */ -- cgit v1.2.3 From c37b5efea43f9e500363f9973dd00e3d2cdcc685 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:25 -0700 Subject: x86, xsave: save/restore the extended state context in sigframe On cpu's supporting xsave/xrstor, fpstate pointer in the sigcontext, will include the extended state information along with fpstate information. Presence of extended state information is indicated by the presence of FP_XSTATE_MAGIC1 at fpstate.sw_reserved.magic1 and FP_XSTATE_MAGIC2 at fpstate + (fpstate.sw_reserved.extended_size - FP_XSTATE_MAGIC2_SIZE). Extended feature bit mask that is saved in the memory layout is represented by the fpstate.sw_reserved.xstate_bv For RT signal frames, UC_FP_XSTATE in the uc_flags also indicate the presence of extended state information in the sigcontext's fpstate pointer. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 82 +++++++++++++++++++-- arch/x86/kernel/signal_32.c | 5 +- arch/x86/kernel/signal_64.c | 7 +- arch/x86/kernel/xsave.c | 172 +++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 247 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 7daf3a011dd..cbb9dc474a2 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -26,6 +26,7 @@ # define _fpstate_ia32 _fpstate # define _xstate_ia32 _xstate # define sig_xstate_ia32_size sig_xstate_size +# define fx_sw_reserved_ia32 fx_sw_reserved # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct #endif @@ -447,12 +448,30 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) if (err) return -1; - if (__copy_to_user(&buf->_fxsr_env[0], fx, - sizeof(struct i387_fxsave_struct))) + if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) return -1; return 1; } +static int save_i387_xsave(void __user *buf) +{ + struct _fpstate_ia32 __user *fx = buf; + int err = 0; + + if (save_i387_fxsave(fx) < 0) + return -1; + + err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, + sizeof(struct _fpx_sw_bytes)); + err |= __put_user(FP_XSTATE_MAGIC2, + (__u32 __user *) (buf + sig_xstate_ia32_size + - FP_XSTATE_MAGIC2_SIZE)); + if (err) + return -1; + + return 1; +} + int save_i387_xstate_ia32(void __user *buf) { struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; @@ -477,6 +496,8 @@ int save_i387_xstate_ia32(void __user *buf) unlazy_fpu(tsk); + if (cpu_has_xsave) + return save_i387_xsave(fp); if (cpu_has_fxsr) return save_i387_fxsave(fp); else @@ -491,14 +512,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) sizeof(struct i387_fsave_struct)); } -static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) +static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, + unsigned int size) { struct task_struct *tsk = current; struct user_i387_ia32_struct env; int err; err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], - sizeof(struct i387_fxsave_struct)); + size); /* mxcsr reserved bits must be masked to zero for security reasons */ tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; if (err || __copy_from_user(&env, buf, sizeof(env))) @@ -508,6 +530,51 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) return 0; } +static int restore_i387_xsave(void __user *buf) +{ + struct _fpx_sw_bytes fx_sw_user; + struct _fpstate_ia32 __user *fx_user = + ((struct _fpstate_ia32 __user *) buf); + struct i387_fxsave_struct __user *fx = + (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; + struct xsave_hdr_struct *xsave_hdr = + ¤t->thread.xstate->xsave.xsave_hdr; + unsigned int lmask, hmask; + int err; + + if (check_for_xstate(fx, buf, &fx_sw_user)) + goto fx_only; + + lmask = fx_sw_user.xstate_bv; + hmask = fx_sw_user.xstate_bv >> 32; + + err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); + + xsave_hdr->xstate_bv &= (pcntxt_lmask | (((u64) pcntxt_hmask) << 32)); + /* + * These bits must be zero. + */ + xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + + /* + * Init the state that is not present in the memory layout + * and enabled by the OS. + */ + lmask = ~(pcntxt_lmask & ~lmask); + hmask = ~(pcntxt_hmask & ~hmask); + xsave_hdr->xstate_bv &= (lmask | (((u64) hmask) << 32)); + + return err; +fx_only: + /* + * Couldn't find the extended state information in the memory + * layout. Restore the FP/SSE and init the other extended state + * enabled by the OS. + */ + xsave_hdr->xstate_bv = XSTATE_FPSSE; + return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); +} + int restore_i387_xstate_ia32(void __user *buf) { int err; @@ -535,8 +602,11 @@ int restore_i387_xstate_ia32(void __user *buf) } if (HAVE_HWFP) { - if (cpu_has_fxsr) - err = restore_i387_fxsave(fp); + if (cpu_has_xsave) + err = restore_i387_xsave(buf); + else if (cpu_has_fxsr) + err = restore_i387_fxsave(fp, sizeof(struct + i387_fxsave_struct)); else err = restore_i387_fsave(fp); } else { diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 690cc616ac0..0f98d69fbdb 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -441,7 +441,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, goto give_sigsegv; /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + if (cpu_has_xsave) + err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); + else + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ddf6123a55c..2621b98f5bf 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -192,7 +192,7 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) sp = current->sas_ss_sp + current->sas_ss_size; } - return (void __user *)round_down(sp - size, 16); + return (void __user *)round_down(sp - size, 64); } static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, @@ -226,7 +226,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + if (cpu_has_xsave) + err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); + else + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 608e72d7ca6..dd66d0714c1 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -6,12 +6,68 @@ #include #include #include +#ifdef CONFIG_IA32_EMULATION +#include +#endif /* * Supported feature mask by the CPU and the kernel. */ unsigned int pcntxt_hmask, pcntxt_lmask; +struct _fpx_sw_bytes fx_sw_reserved; +#ifdef CONFIG_IA32_EMULATION +struct _fpx_sw_bytes fx_sw_reserved_ia32; +#endif + +/* + * Check for the presence of extended state information in the + * user fpstate pointer in the sigcontext. + */ +int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *fx_sw_user) +{ + int min_xstate_size = sizeof(struct i387_fxsave_struct) + + sizeof(struct xsave_hdr_struct); + unsigned int magic2; + int err; + + err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], + sizeof(struct _fpx_sw_bytes)); + + if (err) + return err; + + /* + * First Magic check failed. + */ + if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) + return -1; + + /* + * Check for error scenarios. + */ + if (fx_sw_user->xstate_size < min_xstate_size || + fx_sw_user->xstate_size > xstate_size || + fx_sw_user->xstate_size > fx_sw_user->extended_size) + return -1; + + err = __get_user(magic2, (__u32 *) (((void *)fpstate) + + fx_sw_user->extended_size - + FP_XSTATE_MAGIC2_SIZE)); + /* + * Check for the presence of second magic word at the end of memory + * layout. This detects the case where the user just copied the legacy + * fpstate layout with out copying the extended state information + * in the memory layout. + */ + if (err || magic2 != FP_XSTATE_MAGIC2) + return -1; + + return 0; +} + #ifdef CONFIG_X86_64 /* * Signal frame handlers. @@ -28,15 +84,18 @@ int save_i387_xstate(void __user *buf) BUILD_BUG_ON(sizeof(struct user_i387_struct) != sizeof(tsk->thread.xstate->fxsave)); - if ((unsigned long)buf % 16) + if ((unsigned long)buf % 64) printk("save_i387_xstate: bad fpstate %p\n", buf); if (!used_math()) return 0; clear_used_math(); /* trigger finit */ if (task_thread_info(tsk)->status & TS_USEDFPU) { - err = save_i387_checking((struct i387_fxsave_struct __user *) - buf); + if (task_thread_info(tsk)->status & TS_XSAVE) + err = xsave_user(buf); + else + err = fxsave_user(buf); + if (err) return err; task_thread_info(tsk)->status &= ~TS_USEDFPU; @@ -46,23 +105,77 @@ int save_i387_xstate(void __user *buf) xstate_size)) return -1; } + + if (task_thread_info(tsk)->status & TS_XSAVE) { + struct _fpstate __user *fx = buf; + + err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, + sizeof(struct _fpx_sw_bytes)); + + err |= __put_user(FP_XSTATE_MAGIC2, + (__u32 __user *) (buf + sig_xstate_size + - FP_XSTATE_MAGIC2_SIZE)); + } + return 1; } +/* + * Restore the extended state if present. Otherwise, restore the FP/SSE + * state. + */ +int restore_user_xstate(void __user *buf) +{ + struct _fpx_sw_bytes fx_sw_user; + unsigned int lmask, hmask; + int err; + + if (((unsigned long)buf % 64) || + check_for_xstate(buf, buf, &fx_sw_user)) + goto fx_only; + + lmask = fx_sw_user.xstate_bv; + hmask = fx_sw_user.xstate_bv >> 32; + + /* + * restore the state passed by the user. + */ + err = xrestore_user(buf, lmask, hmask); + if (err) + return err; + + /* + * init the state skipped by the user. + */ + lmask = pcntxt_lmask & ~lmask; + hmask = pcntxt_hmask & ~hmask; + + xrstor_state(init_xstate_buf, lmask, hmask); + + return 0; + +fx_only: + /* + * couldn't find the extended state information in the + * memory layout. Restore just the FP/SSE and init all + * the other extended state. + */ + xrstor_state(init_xstate_buf, pcntxt_lmask & ~XSTATE_FPSSE, + pcntxt_hmask); + return fxrstor_checking((__force struct i387_fxsave_struct *)buf); +} + /* * This restores directly out of user space. Exceptions are handled. */ int restore_i387_xstate(void __user *buf) { struct task_struct *tsk = current; - int err; + int err = 0; if (!buf) { - if (used_math()) { - clear_fpu(tsk); - clear_used_math(); - } - + if (used_math()) + goto clear; return 0; } else if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) @@ -78,12 +191,17 @@ int restore_i387_xstate(void __user *buf) clts(); task_thread_info(current)->status |= TS_USEDFPU; } - err = fxrstor_checking((__force struct i387_fxsave_struct *)buf); + if (task_thread_info(tsk)->status & TS_XSAVE) + err = restore_user_xstate(buf); + else + err = fxrstor_checking((__force struct i387_fxsave_struct *) + buf); if (unlikely(err)) { /* * Encountered an error while doing the restore from the * user buffer, clear the fpu state. */ +clear: clear_fpu(tsk); clear_used_math(); } @@ -91,6 +209,38 @@ int restore_i387_xstate(void __user *buf) } #endif +/* + * Prepare the SW reserved portion of the fxsave memory layout, indicating + * the presence of the extended state information in the memory layout + * pointed by the fpstate pointer in the sigcontext. + * This will be saved when ever the FP and extended state context is + * saved on the user stack during the signal handler delivery to the user. + */ +void prepare_fx_sw_frame(void) +{ + int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + + FP_XSTATE_MAGIC2_SIZE; + + sig_xstate_size = sizeof(struct _fpstate) + size_extended; + +#ifdef CONFIG_IA32_EMULATION + sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; +#endif + + memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); + + fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; + fx_sw_reserved.extended_size = sig_xstate_size; + fx_sw_reserved.xstate_bv = pcntxt_lmask | + (((u64) (pcntxt_hmask)) << 32); + fx_sw_reserved.xstate_size = xstate_size; +#ifdef CONFIG_IA32_EMULATION + memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, + sizeof(struct _fpx_sw_bytes)); + fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; +#endif +} + /* * Represents init state for the supported extended state. */ @@ -162,6 +312,8 @@ void __init xsave_cntxt_init(void) xstate_size = ebx; + prepare_fx_sw_frame(); + setup_xstate_init(); printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%Lx, " -- cgit v1.2.3 From 42deec6f2c3688fdaf986225ac901b817cd91568 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:26 -0700 Subject: x86, xsave: update xsave header bits during ptrace fpregs set FP/SSE bits may be zero in the xsave header(representing the init state). Update these bits during the ptrace fpregs set operation, to indicate the non-init state. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index cbb9dc474a2..e0ed59f5c19 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -214,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, */ target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; + /* + * update the header bits in the xsave header, indicating the + * presence of FP and SSE state. + */ + if (cpu_has_xsave) + target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; + return ret; } @@ -414,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!ret) convert_to_fxsr(target, &env); + /* + * update the header bit in the xsave header, indicating the + * presence of FP. + */ + if (cpu_has_xsave) + target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; return ret; } -- cgit v1.2.3 From 6152e4b1c99a3689fc318d092cd144597f7dbd14 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 29 Jul 2008 17:23:16 -0700 Subject: x86, xsave: keep the XSAVE feature mask as an u64 The XSAVE feature mask is a 64-bit number; keep it that way, in order to avoid the mistake done with rdmsr/wrmsr. Use the xsetbv() function provided in the previous patch. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/i387.c | 12 +++++------- arch/x86/kernel/xsave.c | 45 +++++++++++++++++---------------------------- 2 files changed, 22 insertions(+), 35 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e0ed59f5c19..45723f1fe19 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -552,18 +552,17 @@ static int restore_i387_xsave(void __user *buf) (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; struct xsave_hdr_struct *xsave_hdr = ¤t->thread.xstate->xsave.xsave_hdr; - unsigned int lmask, hmask; + u64 mask; int err; if (check_for_xstate(fx, buf, &fx_sw_user)) goto fx_only; - lmask = fx_sw_user.xstate_bv; - hmask = fx_sw_user.xstate_bv >> 32; + mask = fx_sw_user.xstate_bv; err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); - xsave_hdr->xstate_bv &= (pcntxt_lmask | (((u64) pcntxt_hmask) << 32)); + xsave_hdr->xstate_bv &= pcntxt_mask; /* * These bits must be zero. */ @@ -573,9 +572,8 @@ static int restore_i387_xsave(void __user *buf) * Init the state that is not present in the memory layout * and enabled by the OS. */ - lmask = ~(pcntxt_lmask & ~lmask); - hmask = ~(pcntxt_hmask & ~hmask); - xsave_hdr->xstate_bv &= (lmask | (((u64) hmask) << 32)); + mask = ~(pcntxt_mask & ~mask); + xsave_hdr->xstate_bv &= mask; return err; fx_only: diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index dd66d0714c1..7415f3e38a5 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -9,11 +9,12 @@ #ifdef CONFIG_IA32_EMULATION #include #endif +#include /* * Supported feature mask by the CPU and the kernel. */ -unsigned int pcntxt_hmask, pcntxt_lmask; +u64 pcntxt_mask; struct _fpx_sw_bytes fx_sw_reserved; #ifdef CONFIG_IA32_EMULATION @@ -127,30 +128,28 @@ int save_i387_xstate(void __user *buf) int restore_user_xstate(void __user *buf) { struct _fpx_sw_bytes fx_sw_user; - unsigned int lmask, hmask; + u64 mask; int err; if (((unsigned long)buf % 64) || check_for_xstate(buf, buf, &fx_sw_user)) goto fx_only; - lmask = fx_sw_user.xstate_bv; - hmask = fx_sw_user.xstate_bv >> 32; + mask = fx_sw_user.xstate_bv; /* * restore the state passed by the user. */ - err = xrestore_user(buf, lmask, hmask); + err = xrestore_user(buf, mask); if (err) return err; /* * init the state skipped by the user. */ - lmask = pcntxt_lmask & ~lmask; - hmask = pcntxt_hmask & ~hmask; + mask = pcntxt_mask & ~mask; - xrstor_state(init_xstate_buf, lmask, hmask); + xrstor_state(init_xstate_buf, mask); return 0; @@ -160,8 +159,7 @@ fx_only: * memory layout. Restore just the FP/SSE and init all * the other extended state. */ - xrstor_state(init_xstate_buf, pcntxt_lmask & ~XSTATE_FPSSE, - pcntxt_hmask); + xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); return fxrstor_checking((__force struct i387_fxsave_struct *)buf); } @@ -231,8 +229,7 @@ void prepare_fx_sw_frame(void) fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = sig_xstate_size; - fx_sw_reserved.xstate_bv = pcntxt_lmask | - (((u64) (pcntxt_hmask)) << 32); + fx_sw_reserved.xstate_bv = pcntxt_mask; fx_sw_reserved.xstate_size = xstate_size; #ifdef CONFIG_IA32_EMULATION memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, @@ -263,11 +260,8 @@ void __cpuinit xsave_init(void) /* * Enable all the features that the HW is capable of * and the Linux kernel is aware of. - * - * xsetbv(); */ - asm volatile(".byte 0x0f,0x01,0xd1" : : "c" (0), - "a" (pcntxt_lmask), "d" (pcntxt_hmask)); + xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); } /* @@ -287,36 +281,31 @@ void __init xsave_cntxt_init(void) unsigned int eax, ebx, ecx, edx; cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + pcntxt_mask = eax + ((u64)edx << 32); - pcntxt_lmask = eax; - pcntxt_hmask = edx; - - if ((pcntxt_lmask & XSTATE_FPSSE) != XSTATE_FPSSE) { - printk(KERN_ERR "FP/SSE not shown under xsave features %x\n", - pcntxt_lmask); + if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { + printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", + pcntxt_mask); BUG(); } /* * for now OS knows only about FP/SSE */ - pcntxt_lmask = pcntxt_lmask & XCNTXT_LMASK; - pcntxt_hmask = pcntxt_hmask & XCNTXT_HMASK; - + pcntxt_mask = pcntxt_mask & XCNTXT_MASK; xsave_init(); /* * Recompute the context size for enabled features */ cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); - xstate_size = ebx; prepare_fx_sw_frame(); setup_xstate_init(); - printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%Lx, " + printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " "cntxt size 0x%x\n", - (pcntxt_lmask | ((u64) pcntxt_hmask << 32)), xstate_size); + pcntxt_mask, xstate_size); } -- cgit v1.2.3 From 7de08b4e1ed8d80e6086f71b7e99fc4b397aae39 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:51 -0300 Subject: x86: coding styles fixes to arch/x86/kernel/process_64.c Fix about 50 errors and many warnings without change process_64.o arch/x86/kernel/process_64.o: text data bss dec hex filename 5236 8 24 5268 1494 process_64.o.after 5236 8 24 5268 1494 process_64.o.before md5: 9c35e9debdea4e471288c6e8ca267a75 process_64.o.after 9c35e9debdea4e471288c6e8ca267a75 process_64.o.before Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 101 +++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 51 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3fb62a7d9a1..4da8514dd25 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -37,11 +37,11 @@ #include #include #include +#include +#include -#include #include #include -#include #include #include #include @@ -88,7 +88,7 @@ void exit_idle(void) #ifdef CONFIG_HOTPLUG_CPU DECLARE_PER_CPU(int, cpu_state); -#include +#include /* We halt the CPU with physical CPU hotplug */ static inline void play_dead(void) { @@ -152,7 +152,7 @@ void cpu_idle(void) } /* Prints also some state that isn't saved in the pt_regs */ -void __show_regs(struct pt_regs * regs) +void __show_regs(struct pt_regs *regs) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; unsigned long d0, d1, d2, d3, d6, d7; @@ -177,28 +177,28 @@ void __show_regs(struct pt_regs * regs) printk("RBP: %016lx R08: %016lx R09: %016lx\n", regs->bp, regs->r8, regs->r9); printk("R10: %016lx R11: %016lx R12: %016lx\n", - regs->r10, regs->r11, regs->r12); + regs->r10, regs->r11, regs->r12); printk("R13: %016lx R14: %016lx R15: %016lx\n", - regs->r13, regs->r14, regs->r15); + regs->r13, regs->r14, regs->r15); - asm("movl %%ds,%0" : "=r" (ds)); - asm("movl %%cs,%0" : "=r" (cs)); - asm("movl %%es,%0" : "=r" (es)); + asm("movl %%ds,%0" : "=r" (ds)); + asm("movl %%cs,%0" : "=r" (cs)); + asm("movl %%es,%0" : "=r" (es)); asm("movl %%fs,%0" : "=r" (fsindex)); asm("movl %%gs,%0" : "=r" (gsindex)); rdmsrl(MSR_FS_BASE, fs); - rdmsrl(MSR_GS_BASE, gs); - rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); + rdmsrl(MSR_GS_BASE, gs); + rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); cr4 = read_cr4(); - printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", - fs,fsindex,gs,gsindex,shadowgs); - printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); + printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", + fs, fsindex, gs, gsindex, shadowgs); + printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); get_debugreg(d0, 0); @@ -314,10 +314,10 @@ void prepare_to_copy(struct task_struct *tsk) int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, unsigned long unused, - struct task_struct * p, struct pt_regs * regs) + struct task_struct *p, struct pt_regs *regs) { int err; - struct pt_regs * childregs; + struct pt_regs *childregs; struct task_struct *me = current; childregs = ((struct pt_regs *) @@ -362,10 +362,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, if (test_thread_flag(TIF_IA32)) err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); - else -#endif - err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); - if (err) + else +#endif + err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); + if (err) goto out; } err = 0; @@ -544,7 +544,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) unsigned fsindex, gsindex; /* we're going to use this soon, after a few expensive things */ - if (next_p->fpu_counter>5) + if (next_p->fpu_counter > 5) prefetch(next->xstate); /* @@ -552,13 +552,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ load_sp0(tss, next); - /* + /* * Switch DS and ES. * This won't pick up thread selector changes, but I guess that is ok. */ savesegment(es, prev->es); if (unlikely(next->es | prev->es)) - loadsegment(es, next->es); + loadsegment(es, next->es); savesegment(ds, prev->ds); if (unlikely(next->ds | prev->ds)) @@ -584,7 +584,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ arch_leave_lazy_cpu_mode(); - /* + /* * Switch FS and GS. * * Segment register != 0 always requires a reload. Also @@ -593,13 +593,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (unlikely(fsindex | next->fsindex | prev->fs)) { loadsegment(fs, next->fsindex); - /* + /* * Check if the user used a selector != 0; if yes * clear 64bit base, since overloaded base is always * mapped to the Null selector */ if (fsindex) - prev->fs = 0; + prev->fs = 0; } /* when next process has a 64bit base use it */ if (next->fs) @@ -609,7 +609,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (unlikely(gsindex | next->gsindex | prev->gs)) { load_gs_index(next->gsindex); if (gsindex) - prev->gs = 0; + prev->gs = 0; } if (next->gs) wrmsrl(MSR_KERNEL_GS_BASE, next->gs); @@ -618,12 +618,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Must be after DS reload */ unlazy_fpu(prev_p); - /* + /* * Switch the PDA and FPU contexts. */ prev->usersp = read_pda(oldrsp); write_pda(oldrsp, next->usersp); - write_pda(pcurrent, next_p); + write_pda(pcurrent, next_p); write_pda(kernelstack, (unsigned long)task_stack_page(next_p) + @@ -664,7 +664,7 @@ long sys_execve(char __user *name, char __user * __user *argv, char __user * __user *envp, struct pt_regs *regs) { long error; - char * filename; + char *filename; filename = getname(name); error = PTR_ERR(filename); @@ -722,55 +722,55 @@ asmlinkage long sys_vfork(struct pt_regs *regs) unsigned long get_wchan(struct task_struct *p) { unsigned long stack; - u64 fp,ip; + u64 fp, ip; int count = 0; - if (!p || p == current || p->state==TASK_RUNNING) - return 0; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; stack = (unsigned long)task_stack_page(p); if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) return 0; fp = *(u64 *)(p->thread.sp); - do { + do { if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE) - return 0; + return 0; ip = *(u64 *)(fp+8); if (!in_sched_functions(ip)) return ip; - fp = *(u64 *)fp; - } while (count++ < 16); + fp = *(u64 *)fp; + } while (count++ < 16); return 0; } long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) -{ - int ret = 0; +{ + int ret = 0; int doit = task == current; int cpu; - switch (code) { + switch (code) { case ARCH_SET_GS: if (addr >= TASK_SIZE_OF(task)) - return -EPERM; + return -EPERM; cpu = get_cpu(); - /* handle small bases via the GDT because that's faster to + /* handle small bases via the GDT because that's faster to switch. */ - if (addr <= 0xffffffff) { - set_32bit_tls(task, GS_TLS, addr); - if (doit) { + if (addr <= 0xffffffff) { + set_32bit_tls(task, GS_TLS, addr); + if (doit) { load_TLS(&task->thread, cpu); - load_gs_index(GS_TLS_SEL); + load_gs_index(GS_TLS_SEL); } - task->thread.gsindex = GS_TLS_SEL; + task->thread.gsindex = GS_TLS_SEL; task->thread.gs = 0; - } else { + } else { task->thread.gsindex = 0; task->thread.gs = addr; if (doit) { load_gs_index(0); ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); - } + } } put_cpu(); break; @@ -824,8 +824,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) rdmsrl(MSR_KERNEL_GS_BASE, base); else base = task->thread.gs; - } - else + } else base = task->thread.gs; ret = put_user(base, (unsigned long __user *)addr); break; -- cgit v1.2.3 From 8092c654de9a964c14d89da56834f73a80548a58 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:52 -0300 Subject: x86: add KERN_INFO to printks on process_64.c Fix many coding style warnings. Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4da8514dd25..3560d7f4d74 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -161,24 +161,24 @@ void __show_regs(struct pt_regs *regs) printk("\n"); print_modules(); - printk("Pid: %d, comm: %.20s %s %s %.*s\n", + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); + printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); - printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, - regs->flags); - printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", + printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, + regs->sp, regs->flags); + printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", regs->ax, regs->bx, regs->cx); - printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", + printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", regs->dx, regs->si, regs->di); - printk("RBP: %016lx R08: %016lx R09: %016lx\n", + printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", regs->bp, regs->r8, regs->r9); - printk("R10: %016lx R11: %016lx R12: %016lx\n", + printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", regs->r10, regs->r11, regs->r12); - printk("R13: %016lx R14: %016lx R15: %016lx\n", + printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", regs->r13, regs->r14, regs->r15); asm("movl %%ds,%0" : "=r" (ds)); @@ -196,24 +196,26 @@ void __show_regs(struct pt_regs *regs) cr3 = read_cr3(); cr4 = read_cr4(); - printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", + printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", fs, fsindex, gs, gsindex, shadowgs); - printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); - printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); + printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, + es, cr0); + printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, + cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); get_debugreg(d2, 2); - printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); + printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); get_debugreg(d3, 3); get_debugreg(d6, 6); get_debugreg(d7, 7); - printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); + printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); } void show_regs(struct pt_regs *regs) { - printk("CPU %d:", smp_processor_id()); + printk(KERN_INFO "CPU %d:", smp_processor_id()); __show_regs(regs); show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } -- cgit v1.2.3 From 08aadf069d0482ade033badefa8f03eb2fcddd9c Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:53 -0300 Subject: x86: coding style fixes to arch/x86/kernel/crash_dump_64.c Fix conding style without change crash_dump_64.o arch/x86/kernel/crash_dump_64.o text data bss dec hex filename 129 0 0 129 81 crash_dump_64.o.after 129 0 0 129 81 crash_dump_64.o.before md5: 885b52c1b92737e6b12e5107e90fc1f1 crash_dump_64.o.after 885b52c1b92737e6b12e5107e90fc1f1 crash_dump_64.o.before Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash_dump_64.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 15e6c6bc4a4..d3e524c8452 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -7,9 +7,8 @@ #include #include - -#include -#include +#include +#include /** * copy_oldmem_page - copy one page from "oldmem" @@ -25,7 +24,7 @@ * in the current kernel. We stitch up a pte, similar to kmap_atomic. */ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) + size_t csize, unsigned long offset, int userbuf) { void *vaddr; -- cgit v1.2.3 From caa007dd3687d38a0252484d9d0a8f9d929ba932 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:54 -0300 Subject: x86: coding style fixes to arch/x86/kernel/signal_64.c Fix all errors and many warnings reported by checkpatch.pl without change signal_64.o arch/x86/kernel/signal_64.o text data bss dec hex filename 5143 0 8 5151 141f signal_64.o.after 5143 0 8 5151 141f signal_64.o.before md5: e68718092b3641cb27e79e55ce57e3ad signal_64.o.after e68718092b3641cb27e79e55ce57e3ad signal_64.o.before Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/sigframe.h | 5 ++++ arch/x86/kernel/signal_64.c | 62 ++++++++++++++++++++++----------------------- 2 files changed, 35 insertions(+), 32 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h index 72bbb519d2d..8b4956e800a 100644 --- a/arch/x86/kernel/sigframe.h +++ b/arch/x86/kernel/sigframe.h @@ -24,4 +24,9 @@ struct rt_sigframe { struct ucontext uc; struct siginfo info; }; + +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs *regs); #endif diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index b45ef8ddd65..87a9c2f28d9 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -19,9 +19,10 @@ #include #include #include +#include + #include #include -#include #include #include #include @@ -41,11 +42,6 @@ # define FIX_EFLAGS __FIX_EFLAGS #endif -int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs); -int ia32_setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs); - asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct pt_regs *regs) @@ -119,7 +115,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; -#define COPY(x) err |= __get_user(regs->x, &sc->x) +#define COPY(x) (err |= __get_user(regs->x, &sc->x)) COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); COPY(dx); COPY(cx); COPY(ip); @@ -149,7 +145,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, } { - struct _fpstate __user * buf; + struct _fpstate __user *buf; err |= __get_user(buf, &sc->fpstate); if (buf) { @@ -189,7 +185,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) current->blocked = set; recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) goto badframe; @@ -199,16 +195,17 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) return ax; badframe: - signal_fault(regs,frame,"sigreturn"); + signal_fault(regs, frame, "sigreturn"); return 0; -} +} /* * Set up a signal frame. */ static inline int -setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me) +setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, + unsigned long mask, struct task_struct *me) { int err = 0; @@ -264,35 +261,35 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) } static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs) + sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; - struct _fpstate __user *fp = NULL; + struct _fpstate __user *fp = NULL; int err = 0; struct task_struct *me = current; if (used_math()) { - fp = get_stack(ka, regs, sizeof(struct _fpstate)); + fp = get_stack(ka, regs, sizeof(struct _fpstate)); frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) goto give_sigsegv; - if (save_i387(fp) < 0) - err |= -1; + if (save_i387(fp) < 0) + err |= -1; } else frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - if (ka->sa.sa_flags & SA_SIGINFO) { + if (ka->sa.sa_flags & SA_SIGINFO) { err |= copy_siginfo_to_user(&frame->info, info); if (err) goto give_sigsegv; } - + /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); @@ -302,9 +299,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); - if (sizeof(*set) == 16) { + if (sizeof(*set) == 16) { __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); - __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); + __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); } else err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -315,7 +312,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); } else { /* could use a vstub here */ - goto give_sigsegv; + goto give_sigsegv; } if (err) @@ -323,7 +320,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->di = sig; - /* In case the signal handler was declared without prototypes */ + /* In case the signal handler was declared without prototypes */ regs->ax = 0; /* This also works for non SA_SIGINFO handlers because they expect the @@ -376,7 +373,7 @@ static long current_syscall_ret(struct pt_regs *regs) /* * OK, we're invoking a handler - */ + */ static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, @@ -420,7 +417,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); else ret = ia32_setup_frame(sig, ka, oldset, regs); - } else + } else #endif ret = setup_rt_frame(sig, ka, info, oldset, regs); @@ -448,9 +445,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, ptrace_notify(SIGTRAP); spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); + sigaddset(¤t->blocked, sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } @@ -552,14 +549,15 @@ void do_notify_resume(struct pt_regs *regs, void *unused, } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) -{ - struct task_struct *me = current; +{ + struct task_struct *me = current; if (show_unhandled_signals && printk_ratelimit()) { printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", - me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax); + me->comm, me->pid, where, frame, regs->ip, + regs->sp, regs->orig_ax); print_vma_addr(" in ", regs->ip); printk("\n"); } - force_sig(SIGSEGV, me); -} + force_sig(SIGSEGV, me); +} -- cgit v1.2.3 From 4df9e510a9fda29aca71d8acac853b98aa6884d1 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:55 -0300 Subject: x86: coding style fixes to arch/x86/kernel/traps_64.c Fix all errors and many warnings reported by checkpath.pl. Except the change of include to the traps.o before and after changes are the same. Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 59 +++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 27 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3f18d73f420..fe36d96ba70 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #if defined(CONFIG_EDAC) #include @@ -45,9 +47,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -85,7 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) void printk_address(unsigned long address, int reliable) { - printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address); + printk(" [<%016lx>] %s%pS\n", address, reliable ? + "" : "? ", (void *) address); } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, @@ -98,7 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, [STACKFAULT_STACK - 1] = "#SS", [MCE_STACK - 1] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ - [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" + [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / + EXCEPTION_STKSZ - 2] = "#DB[?]" #endif }; unsigned k; @@ -163,7 +164,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, } /* - * x86-64 can have up to three kernel stacks: + * x86-64 can have up to three kernel stacks: * process stack * interrupt stack * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack @@ -219,7 +220,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; + unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; @@ -237,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!bp) { if (task == current) { /* Grab bp right from our regs */ - asm("movq %%rbp, %0" : "=r" (bp) :); + asm("movq %%rbp, %0" : "=r" (bp) : ); } else { /* bp is the last reg pushed by switch_to */ bp = *(unsigned long *) task->thread.sp; @@ -357,11 +358,13 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack; int i; const int cpu = smp_processor_id(); - unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); - unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); + unsigned long *irqstack_end = + (unsigned long *) (cpu_pda(cpu)->irqstackptr); + unsigned long *irqstack = + (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); - // debugging aid: "show_stack(NULL, NULL);" prints the - // back trace for this cpu. + /* debugging aid: "show_stack(NULL, NULL);" prints the + back trace for this cpu. */ if (sp == NULL) { if (task) @@ -404,7 +407,7 @@ void dump_stack(void) #ifdef CONFIG_FRAME_POINTER if (!bp) - asm("movq %%rbp, %0" : "=r" (bp):); + asm("movq %%rbp, %0" : "=r" (bp) : ); #endif printk("Pid: %d, comm: %.20s %s %s %.*s\n", @@ -414,7 +417,6 @@ void dump_stack(void) init_utsname()->version); show_trace(NULL, NULL, &stack, bp); } - EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) @@ -493,7 +495,7 @@ unsigned __kprobes long oops_begin(void) raw_local_irq_save(flags); cpu = smp_processor_id(); if (!__raw_spin_trylock(&die_lock)) { - if (cpu == die_owner) + if (cpu == die_owner) /* nested oops. should stop eventually */; else __raw_spin_lock(&die_lock); @@ -638,7 +640,7 @@ kernel_trap: } #define DO_ERROR(trapnr, signr, str, name) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ @@ -648,7 +650,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ siginfo_t info; \ info.si_signo = signr; \ @@ -683,7 +685,7 @@ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); } -asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) +asmlinkage void do_double_fault(struct pt_regs *regs, long error_code) { static const char str[] = "double fault"; struct task_struct *tsk = current; @@ -778,9 +780,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs) } static notrace __kprobes void -unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { - if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == + NOTIFY_STOP) return; printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", reason); @@ -882,7 +885,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) else if (user_mode(eregs)) regs = task_pt_regs(current); /* Exception from kernel and interrupts are enabled. Move to - kernel process stack. */ + kernel process stack. */ else if (eregs->flags & X86_EFLAGS_IF) regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); if (eregs != regs) @@ -891,7 +894,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) } /* runs on IST stack. */ -asmlinkage void __kprobes do_debug(struct pt_regs * regs, +asmlinkage void __kprobes do_debug(struct pt_regs *regs, unsigned long error_code) { struct task_struct *tsk = current; @@ -1035,7 +1038,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) asmlinkage void bad_intr(void) { - printk("bad interrupt"); + printk("bad interrupt"); } asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) @@ -1047,7 +1050,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) conditional_sti(regs); if (!user_mode(regs) && - kernel_math_error(regs, "kernel simd math error", 19)) + kernel_math_error(regs, "kernel simd math error", 19)) return; /* @@ -1092,7 +1095,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) force_sig_info(SIGFPE, &info, task); } -asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs) +asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs) { } @@ -1142,8 +1145,10 @@ void __init trap_init(void) set_intr_gate(0, ÷_error); set_intr_gate_ist(1, &debug, DEBUG_STACK); set_intr_gate_ist(2, &nmi, NMI_STACK); - set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */ - set_system_gate(4, &overflow); /* int4 can be called from all */ + /* int3 can be called from all */ + set_system_gate_ist(3, &int3, DEBUG_STACK); + /* int4 can be called from all */ + set_system_gate(4, &overflow); set_intr_gate(5, &bounds); set_intr_gate(6, &invalid_op); set_intr_gate(7, &device_not_available); -- cgit v1.2.3 From e9c8abb66cc37801bdb5d4360bb78d180c3bbb73 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:56 -0300 Subject: x86: coding style fixes to arch/x86/kernel/sys_x86_64.c Fix all errors and many warnings reported by checkpatch.pl without change sys_x86_64.o arch/x86/kernel/sys_x86_64.o: text data bss dec hex filename 1567 0 0 1567 61f sys_x86_64.o.after 1567 0 0 1567 61f sys_x86_64.o.before md5: de28ffedcb5851dfd7ec87a03afec1fd sys_x86_64.o.after de28ffedcb5851dfd7ec87a03afec1fd sys_x86_64.o.before Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/sys_x86_64.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 3b360ef3381..56eb8f916e9 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -13,15 +13,16 @@ #include #include #include +#include -#include #include -asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long off) +asmlinkage long sys_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long off) { long error; - struct file * file; + struct file *file; error = -EINVAL; if (off & ~PAGE_MASK) @@ -56,9 +57,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, unmapped base down for this case. This can give conflicts with the heap, but we assume that glibc malloc knows how to fall back to mmap. Give it 1GB - of playground for now. -AK */ - *begin = 0x40000000; - *end = 0x80000000; + of playground for now. -AK */ + *begin = 0x40000000; + *end = 0x80000000; if (current->flags & PF_RANDOMIZE) { new_begin = randomize_range(*begin, *begin + 0x02000000, 0); if (new_begin) @@ -66,9 +67,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, } } else { *begin = TASK_UNMAPPED_BASE; - *end = TASK_SIZE; + *end = TASK_SIZE; } -} +} unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, @@ -78,11 +79,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_area_struct *vma; unsigned long start_addr; unsigned long begin, end; - + if (flags & MAP_FIXED) return addr; - find_start_end(flags, &begin, &end); + find_start_end(flags, &begin, &end); if (len > end) return -ENOMEM; @@ -96,12 +97,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, } if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) && len <= mm->cached_hole_size) { - mm->cached_hole_size = 0; + mm->cached_hole_size = 0; mm->free_area_cache = begin; } addr = mm->free_area_cache; - if (addr < begin) - addr = begin; + if (addr < begin) + addr = begin; start_addr = addr; full_search: @@ -127,7 +128,7 @@ full_search: return addr; } if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; + mm->cached_hole_size = vma->vm_start - addr; addr = vma->vm_end; } @@ -177,7 +178,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr-len); if (!vma || addr <= vma->vm_start) /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr-len); + return mm->free_area_cache = addr-len; } if (mm->mmap_base < len) @@ -194,7 +195,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr); if (!vma || addr+len <= vma->vm_start) /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr); + return mm->free_area_cache = addr; /* remember the largest hole we saw so far */ if (addr + mm->cached_hole_size < vma->vm_start) @@ -224,13 +225,13 @@ bottomup: } -asmlinkage long sys_uname(struct new_utsname __user * name) +asmlinkage long sys_uname(struct new_utsname __user *name) { int err; down_read(&uts_sem); - err = copy_to_user(name, utsname(), sizeof (*name)); + err = copy_to_user(name, utsname(), sizeof(*name)); up_read(&uts_sem); - if (personality(current->personality) == PER_LINUX32) - err |= copy_to_user(&name->machine, "i686", 5); + if (personality(current->personality) == PER_LINUX32) + err |= copy_to_user(&name->machine, "i686", 5); return err ? -EFAULT : 0; } -- cgit v1.2.3 From 64a76f667d987a559ad0726b4692c987800b22bc Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 29 Jul 2008 12:47:38 -0700 Subject: hpet: /dev/hpet - fixes and cleanup Minor /dev/hpet updates and bugfixes: * Remove dead code, mostly remnants of an incomplete/unusable kernel interface ... noted when addressing "sparse" warnings: + hpet_unregister() and a routine it calls + hpet_task and all references, including hpet_task_lock + hpet_data.hd_flags (and HPET_DATA_PLATFORM) * Correct and improve boot message: + displays *counter* (shared between comparators) bit width, not *timer* bit widths (which are often mixed) + relabel "timers" as "comparators"; this is less confusing, they are not independent like normal timers are (sigh) + display MHz not Hz; it's never less than 10 MHz. * Tighten and correct the userspace interface code + don't accidentally program comparators in 64-bit mode using 32-bit values ... always force comparators into 32-bit mode + provide the correct bit definition flagging comparators with periodic capability ... the ABI is unchanged * Update Documentation/hpet.txt + be more correct and current + expand description a bit + don't mention that now-gone kernel interface Plus, add a FIXME comment for something that could cause big trouble on systems with more capable HPETs than at least Intel seems to ship. It seems that few folk use this userspace interface; it's not very usable given the general lack of HPET IRQ routing. I'm told that the only real point of it any more is to mmap for fast timestamps; IMO that's handled better through the gettimeofday() vsyscall. Signed-off-by: David Brownell Acked-by: Clemens Ladisch Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ad2b15a1334..82d459186fd 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -115,13 +115,17 @@ static void hpet_reserve_platform_timers(unsigned long id) hd.hd_phys_address = hpet_address; hd.hd_address = hpet; hd.hd_nirqs = nrtimers; - hd.hd_flags = HPET_DATA_PLATFORM; hpet_reserve_timer(&hd, 0); #ifdef CONFIG_HPET_EMULATE_RTC hpet_reserve_timer(&hd, 1); #endif + /* + * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254 + * is wrong for i8259!) not the output IRQ. Many BIOS writers + * don't bother configuring *any* comparator interrupts. + */ hd.hd_irq[0] = HPET_LEGACY_8254; hd.hd_irq[1] = HPET_LEGACY_RTC; -- cgit v1.2.3 From a677f58a8c8c541bf7d02c658545084040f3708d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Jul 2008 00:37:10 -0700 Subject: x86: print per_cpu data address to make sure per_cpu data on correct node. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index f7745f94c00..61f3966632a 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -180,9 +180,16 @@ void __init setup_per_cpu_areas(void) printk(KERN_INFO "cpu %d has no node %d or node-local memory\n", cpu, node); + if (ptr) + printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", + cpu, __pa(ptr)); } - else + else { ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); + if (ptr) + printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", + cpu, node, __pa(ptr)); + } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); -- cgit v1.2.3 From a0ac87d61ba20932e54f08464d3f3439d78fa878 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Tue, 29 Jul 2008 17:41:04 +0200 Subject: x86: AMD microcode patch loader style corrections Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 07e52beacb4..6789530ef33 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -47,9 +47,9 @@ MODULE_LICENSE("GPL v2"); #define UCODE_UCODE_TYPE 0x00000001 #define UCODE_MAX_SIZE (2048) -#define DEFAULT_UCODE_DATASIZE (896) /* 896 bytes */ -#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) /* 64 bytes */ -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 960 bytes */ +#define DEFAULT_UCODE_DATASIZE (896) +#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) #define DWSIZE (sizeof(u32)) /* For now we support a fixed ucode total size only */ #define get_totalsize(mc) \ -- cgit v1.2.3 From f516526febb75500f280def2108688d388df4e1e Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Tue, 29 Jul 2008 17:41:05 +0200 Subject: x86: Intel microcode patch loader style corrections Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_intel.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 6da4a85ff46..a61986e8b69 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -55,8 +55,8 @@ * in a single CPU package. * 1.10 28 Feb 2002 Asit K Mallick and * Tigran Aivazian , - * Serialize updates as required on HT processors due to speculative - * nature of implementation. + * Serialize updates as required on HT processors due to + * speculative nature of implementation. * 1.11 22 Mar 2002 Tigran Aivazian * Fix the panic when writing zero-length microcode chunk. * 1.12 29 Sep 2003 Nitin Kamble , @@ -71,7 +71,7 @@ * Thanks to Stuart Swales for pointing out this bug. */ -//#define DEBUG /* pr_debug */ +/* #define DEBUG */ /* pr_debug */ #include #include #include @@ -99,11 +99,11 @@ MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); -#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) /* 48 bytes */ -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ -#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) /* 20 bytes */ -#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) /* 12 bytes */ +#define DEFAULT_UCODE_DATASIZE (2000) +#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) +#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) +#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) #define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ (((struct microcode_intel *)mc)->hdr.totalsize ? \ -- cgit v1.2.3 From 831f9bd3151ebd22959a0cb2a20b44a06b26d4ed Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Tue, 29 Jul 2008 17:41:06 +0200 Subject: x86: moved function declarations out from AMD microcode patch loader to heade file Signed-off-by: Peter Oruba Cc: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 6789530ef33..2b98e6ab1bf 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -56,9 +56,6 @@ MODULE_LICENSE("GPL v2"); ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \ + MC_HEADER_SIZE) -extern int microcode_init(void *opaque, struct module *module); -extern void microcode_exit(void); - /* serialize access to the physical write */ static DEFINE_SPINLOCK(microcode_update_lock); -- cgit v1.2.3 From daa9c0fee1cbe1d49fbe29af3d4bb16312043b1e Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Tue, 29 Jul 2008 17:41:07 +0200 Subject: x86: minor pointer type cast in AMD microcode patch loader Signed-off-by: Peter Oruba Cc: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 2b98e6ab1bf..33b2a217a8c 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -394,7 +394,8 @@ static int cpu_request_microcode_amd(int cpu) return error; } - buf_pos = buf = firmware->data; + buf_pos = (unsigned int *)firmware->data; + buf = (void *)firmware->data; size = firmware->size; if (buf_pos[0] != UCODE_MAGIC) { -- cgit v1.2.3 From 7ab6af7ab69df8c9c5fbc380004fb81187742096 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 30 Jul 2008 17:36:48 -0700 Subject: x86_32: use apic_ops at print_local_APIC() Use apic_icr_read at print_local_APIC() in io_apic_32.c Signed-off-by: Hiroshi Shimamoto Cc: Suresh Siddha Cc: Yinghai Lu Signed-off-by: Ingo Molnar Cc: Suresh Siddha Cc: Yinghai Lu --- arch/x86/kernel/io_apic_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 98e4db5373f..39e063a9a28 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1486,6 +1486,7 @@ static void print_APIC_bitfield(int base) void /*__init*/ print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; + u64 icr; if (apic_verbosity == APIC_QUIET) return; @@ -1536,10 +1537,9 @@ void /*__init*/ print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC ESR: %08x\n", v); } - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + icr = apic_icr_read(); + printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); -- cgit v1.2.3 From 90936cfe6c8f7e90a6f8b0c5cb44d3a012dfd313 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:07:44 +0200 Subject: x86, tsc: fix section mismatch warning WARNING: vmlinux.o(.text+0x7950): Section mismatch in reference from the function native_calibrate_tsc() to the function .init.text:tsc_read_refs() The function native_calibrate_tsc() references the function __init tsc_read_refs(). This is often because native_calibrate_tsc lacks a __init annotation or the annotation of tsc_read_refs is wrong. tsc_read_refs is called from native_calibrate_tsc which is not __init and native_calibrate_tsc cannot be marked __init Signed-off-by: Marcin Slusarz Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7603c055390..46af7167673 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *pm, u64 *hpet) { u64 t1, t2; int i; -- cgit v1.2.3 From 85a14437ed24244c78f9a70d58b8299753b03c92 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:12:37 +0200 Subject: x86: fix MP_processor_info section mismatch warning WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x1fe7): Section mismatch in reference from the function MP_processor_info() to the variable .init.data:x86_quirks The function __cpuinit MP_processor_info() references a variable __initdata x86_quirks. If x86_quirks is only used by MP_processor_info then annotate x86_quirks with a matching annotation. MP_processor_info uses x86_quirks which is __init and is used only from smp_read_mpc and construct_default_ISA_mptable which are __init Signed-off-by: Marcin Slusarz Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin --- arch/x86/kernel/mpparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 6ae005ccaed..78509ee8cc7 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __cpuinit MP_processor_info(struct mpc_config_processor *m) +static void __init MP_processor_info(struct mpc_config_processor *m) { int apicid; char *bootup_cpu = ""; -- cgit v1.2.3 From bafc1dae8215c862c2e6ae913ddadc20581e59b9 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:11:13 +0200 Subject: x86: mmconf: fix section mismatch warning WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x1591): Section mismatch in reference from the function init_amd() to the function .init.text:check_enable_amd_mmconf_dmi() The function __cpuinit init_amd() references a function __init check_enable_amd_mmconf_dmi(). If check_enable_amd_mmconf_dmi is only used by init_amd then annotate check_enable_amd_mmconf_dmi with a matching annotation. check_enable_amd_mmconf_dmi is only called from init_amd which is __cpuinit Signed-off-by: Marcin Slusarz Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin --- arch/x86/kernel/mmconf-fam10h_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index fdfdc550b36..efc2f361fe8 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { {} }; -void __init check_enable_amd_mmconf_dmi(void) +void __cpuinit check_enable_amd_mmconf_dmi(void) { dmi_check_system(mmconf_dmi_table); } -- cgit v1.2.3 From d406d21d90dce2e66c7eb4a44605aac947fe55fb Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:09:38 +0200 Subject: x86: mpparse.c: fix section mismatch warning WARNING: vmlinux.o(.text+0x118f7): Section mismatch in reference from the function construct_ioapic_table() to the function .init.text:MP_bus_info() The function construct_ioapic_table() references the function __init MP_bus_info(). This is often because construct_ioapic_table lacks a __init annotation or the annotation of MP_bus_info is wrong. construct_ioapic_table is called only from construct_default_ISA_mptable which is __init Signed-off-by: Marcin Slusarz Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin --- arch/x86/kernel/mpparse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 78509ee8cc7..2c1963b3962 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -484,7 +484,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) } -static void construct_ioapic_table(int mpc_default_type) +static void __init construct_ioapic_table(int mpc_default_type) { struct mpc_config_ioapic ioapic; struct mpc_config_bus bus; @@ -529,7 +529,7 @@ static void construct_ioapic_table(int mpc_default_type) construct_default_ioirq_mptable(mpc_default_type); } #else -static inline void construct_ioapic_table(int mpc_default_type) { } +static inline void __init construct_ioapic_table(int mpc_default_type) { } #endif static inline void __init construct_default_ISA_mptable(int mpc_default_type) -- cgit v1.2.3 From 4c942654a4514d7d0a9b592a7d1b198a212e8a03 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sun, 10 Aug 2008 20:57:45 +0800 Subject: arch/x86/kernel/acpi/boot.c: removed duplicated #include Removed duplicated include file in arch/x86/kernel/acpi/boot.c. Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fa88a1d7129..d8d118935b0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -58,7 +58,6 @@ EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_X86_64 #include -#include #else /* X86 */ -- cgit v1.2.3 From 8aeb4022633f7d0eca5e13a9622bd73df92bbf2a Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sun, 10 Aug 2008 21:09:22 +0800 Subject: arch/x86/kernel/cpuid.c: removed duplicated #include Removed duplicated include file in arch/x86/kernel/cpuid.c. Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpuid.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 14b11b3be31..3fa4e926b51 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 2ae111cdd8d83ebf9de72e36e68a8c84b6ebbeea Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 11 Aug 2008 18:34:08 +0400 Subject: x86: apic interrupts - move assignments to irqinit_32.c, v2 64bit mode APIC interrupt handlers are set within irqinit_64.c. Lets do tha same for 32bit mode which would help in furter code merging. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 48 ------------------------------------------- arch/x86/kernel/irqinit_32.c | 49 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 48 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f93c18f5b79..9e341c9d941 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1354,54 +1354,6 @@ void smp_error_interrupt(struct pt_regs *regs) irq_exit(); } -#ifdef CONFIG_SMP -void __init smp_intr_init(void) -{ - /* - * IRQ0 must be given a fixed assignment and initialized, - * because it's used before the IO-APIC is set up. - */ - set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); - - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPI for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for single call function */ - set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); -} -#endif - -/* - * Initialize APIC interrupts - */ -void __init apic_intr_init(void) -{ -#ifdef CONFIG_SMP - smp_intr_init(); -#endif - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - - /* thermal monitor LVT interrupt */ -#ifdef CONFIG_X86_MCE_P4THERMAL - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -} - /** * connect_bsp_APIC - attach the APIC to the interrupt system */ diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index d66914287ee..9200a1e2752 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -74,6 +74,15 @@ void __init init_ISA_irqs (void) } } +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; + /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -98,6 +107,46 @@ void __init native_init_IRQ(void) set_intr_gate(vector, interrupt[i]); } +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) + /* + * IRQ0 must be given a fixed assignment and initialized, + * because it's used before the IO-APIC is set up. + */ + set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); + + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPI for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + + /* IPI for generic function call */ + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + + /* IPI for single call function */ + set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + /* self generated IPI for local APIC timer */ + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + + /* IPI vectors for APIC spurious and error interrupts */ + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); +#endif + +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) + /* thermal monitor LVT interrupt */ + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); +#endif + + if (!acpi_ioapic) + setup_irq(2, &irq2); + /* setup after call gates are initialised (usually add in * the architecture specific gates) */ -- cgit v1.2.3 From 49800efcb17afdf973f33e8aa8807b7f83993cc6 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:27:30 +0200 Subject: x86: pda_init(): fix memory leak when using CPU hotplug pda->irqstackptr is allocated whenever a CPU is set online. But it is never freed. This results in a memory leak of 16K for each CPU offline/online cycle. Fix is to allocate pda->irqstackptr only once. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 6f9b8924bdc..8396fd7628a 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -493,17 +493,20 @@ void pda_init(int cpu) /* others are initialized in smpboot.c */ pda->pcurrent = &init_task; pda->irqstackptr = boot_cpu_stack; + pda->irqstackptr += IRQSTACKSIZE - 64; } else { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", cpu); + if (!pda->irqstackptr) { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", + cpu); + pda->irqstackptr += IRQSTACKSIZE - 64; + } if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } - - pda->irqstackptr += IRQSTACKSIZE-64; } char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + -- cgit v1.2.3 From b55793f7528ce1b73c25b3ac8a86a6cda2a0f9a4 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:29:37 +0200 Subject: x86: cpu_init(): fix memory leak when using CPU hotplug Exception stacks are allocated each time a CPU is set online. But the allocated space is never freed. Thus with one CPU hotplug offline/online cycle there is a memory leak of 24K (6 pages) for a CPU. Fix is to allocate exception stacks only once -- when the CPU is set online for the first time. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 8396fd7628a..cc6efe86249 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -606,19 +606,22 @@ void __cpuinit cpu_init(void) /* * set up and load the per-CPU TSS */ - for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (!orig_ist->ist[0]) { static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER }; - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception stack %ld %d\n", - v, cpu); + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception " + "stack %ld %d\n", v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); -- cgit v1.2.3 From 34a1b9fc4995102ecbb3a980b348aebf168d8196 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 12 Aug 2008 13:25:44 +0100 Subject: Fix date output in x86 microcode driver. The microcode stores its date in a uint32_t in some weird order approximating pdp-endian. Rather than printing it like that, print it properly in ISO standard form. Signed-off-by: David Woodhouse Cc: Shaohua Li Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_intel.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index a61986e8b69..d2d9d74f4cb 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -339,8 +339,11 @@ static void apply_microcode(int cpu) return; } printk(KERN_INFO "microcode: CPU%d updated from revision " - "0x%x to 0x%x, date = %08x \n", - cpu_num, uci->rev, val[1], uci->mc.mc_intel->hdr.date); + "0x%x to 0x%x, date = %04x-%02x-%02x \n", + cpu_num, uci->rev, val[1], + uci->mc.mc_intel->hdr.date & 0xffff, + uci->mc.mc_intel->hdr.date >> 24, + (uci->mc.mc_intel->hdr.date >> 16) & 0xff); uci->rev = val[1]; } -- cgit v1.2.3 From ee2b92a8201a40021ecd1aee6f0625dc03bacc54 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Aug 2008 11:38:13 -0700 Subject: x86, xsave: remove the redundant access_ok() in setup_rt_frame() save_i387_xstate() is already doing the required access_ok(). Remove the redundant access_ok() before it. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 2621b98f5bf..6c581698ab5 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -208,9 +208,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; - if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) - goto give_sigsegv; - if (save_i387_xstate(fp) < 0) err |= -1; } else -- cgit v1.2.3 From ed405958057ca6a8c4c9178a7a3b1167fabb45f5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Aug 2008 11:38:14 -0700 Subject: x86, xsave: clear the user buffer before doing fxsave/xsave fxsave/xsave instructions will not touch all the bytes in the fxsave/xsave frame. Clear the user buffer before doing fxsave/xsave directly to user buffer during the sigcontext setup. This is essentially needed in the context of xsave(for example, some of the fields in the xsave header are not touched by the xsave and defined as must be zero). This will also present uniform and clean context to the user (from which user can safely do fxrstor/xrstor). Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 7415f3e38a5..bb097b1644d 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -92,6 +92,12 @@ int save_i387_xstate(void __user *buf) return 0; clear_used_math(); /* trigger finit */ if (task_thread_info(tsk)->status & TS_USEDFPU) { + /* + * Start with clearing the user buffer. This will present a + * clean context for the bytes not touched by the fxsave/xsave. + */ + __clear_user(buf, sig_xstate_size); + if (task_thread_info(tsk)->status & TS_XSAVE) err = xsave_user(buf); else -- cgit v1.2.3 From f65bc214e042916135256620f900e9599d65e0cb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Aug 2008 11:38:15 -0700 Subject: x86, xsave: use BUG_ON() instead of BUILD_BUG_ON() All these structure sizes are runtime determined. So use a runtime bug check. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index bb097b1644d..07713d64deb 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -82,8 +82,7 @@ int save_i387_xstate(void __user *buf) if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) return -EACCES; - BUILD_BUG_ON(sizeof(struct user_i387_struct) != - sizeof(tsk->thread.xstate->fxsave)); + BUG_ON(sig_xstate_size < xstate_size); if ((unsigned long)buf % 64) printk("save_i387_xstate: bad fpstate %p\n", buf); -- cgit v1.2.3 From ed4e5ec177d20504c51aebb93db12d57716cde9c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:20 +0200 Subject: x86: apic - use SET_APIC_DEST_FIELD instead of hardcoded shift Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 69a876be506..77c5e5eb820 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -150,7 +150,7 @@ u32 safe_xapic_wait_icr_idle(void) void xapic_icr_write(u32 low, u32 id) { - apic_write(APIC_ICR2, id << 24); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); apic_write(APIC_ICR, low); } -- cgit v1.2.3 From 36fef0949c14445d231a68663e8a01860f7caca3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:20 +0200 Subject: x86: apic - unify disable_apic_timer Get rid of local_apic_timer_disabled and use disable_apic_timer instead. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 17 ++++++++++++----- arch/x86/kernel/apic_64.c | 16 +++++++++++----- 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 12b154822bc..6af20dd12c9 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -63,7 +63,7 @@ int disable_apic; /* Local APIC timer verification ok */ static int local_apic_timer_verify_ok; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int local_apic_timer_disabled; +static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -574,7 +574,7 @@ void __init setup_boot_APIC_clock(void) * timer as a dummy clock event source on SMP systems, so the * broadcast mechanism is used. On UP systems simply ignore it. */ - if (local_apic_timer_disabled) { + if (disable_apic_timer) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) { lapic_clockevent.mult = 1; @@ -1699,12 +1699,19 @@ static int __init parse_nolapic(char *arg) } early_param("nolapic", parse_nolapic); -static int __init parse_disable_lapic_timer(char *arg) +static int __init parse_disable_apic_timer(char *arg) { - local_apic_timer_disabled = 1; + disable_apic_timer = 1; return 0; } -early_param("nolapic_timer", parse_disable_lapic_timer); +early_param("noapictimer", parse_disable_apic_timer); + +static int __init parse_nolapic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; +} +early_param("nolapic_timer", parse_nolapic_timer); static int __init parse_lapic_timer_c2_ok(char *arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 77c5e5eb820..1e925be861e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -45,6 +45,7 @@ #include #include +/* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; @@ -1583,14 +1584,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg) } early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); -static __init int setup_noapictimer(char *str) +static int __init parse_disable_apic_timer(char *arg) { - if (str[0] != ' ' && str[0] != 0) - return 0; disable_apic_timer = 1; - return 1; + return 0; +} +early_param("noapictimer", parse_disable_apic_timer); + +static int __init parse_nolapic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; } -__setup("noapictimer", setup_noapictimer); +early_param("nolapic_timer", parse_nolapic_timer); static __init int setup_apicpmtimer(char *s) { -- cgit v1.2.3 From f07f4f9046121ac803bc2f0ded3d77b7c2ab481b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:21 +0200 Subject: x86: apic - unify __setup_APIC_LVTT To be able to unify this function we RE-introduce APIC_DIVISOR for 64bit mode. This snipped was eliminated in some time ago in a sake of clenup but now we need it again since it allow up to get rid of #ifdef(s). And lapic_is_integrated call is added in apic_64.c but since we always have APIC integrated on 64bit cpu compiler will ignore this call. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1e925be861e..ac399d0f65c 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -244,6 +244,9 @@ int lapic_get_maxlvt(void) return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } +/* Clock divisor is set to 1 */ +#define APIC_DIVISOR 1 + /* * This function sets up the local APIC timer, with a timeout of * 'clocks' APIC bus clock. During calibration we actually call @@ -262,6 +265,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) lvtt_value = LOCAL_TIMER_VECTOR; if (!oneshot) lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!lapic_is_integrated()) + lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); + if (!irqen) lvtt_value |= APIC_LVT_MASKED; @@ -276,7 +282,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | APIC_TDR_DIV_16); if (!oneshot) - apic_write(APIC_TMICT, clocks); + apic_write(APIC_TMICT, clocks / APIC_DIVISOR); } /* @@ -446,7 +452,7 @@ static int __init calibrate_APIC_clock(void) lapic_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &lapic_clockevent); - calibration_result = result / HZ; + calibration_result = (result * APIC_DIVISOR) / HZ; /* * Do a sanity check on the APIC calibration result -- cgit v1.2.3 From 9ce122c6e55c44ae9a4c4c777579b87d83e7f898 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:21 +0200 Subject: x86: apic - do not clear APIC twice in lapic_shutdown There is no need to clear APIC twice since disable_local_APIC will clear it anyway. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6af20dd12c9..a151d66f948 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -830,10 +830,11 @@ void lapic_shutdown(void) return; local_irq_save(flags); - clear_local_APIC(); if (enabled_via_apicbase) disable_local_APIC(); + else + clear_local_APIC(); local_irq_restore(flags); } -- cgit v1.2.3 From 64e474d168e3cf31e44890b4947644d361f84e43 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:22 +0200 Subject: x86: apic - get rid of local_apic_timer_verify_ok We are able to use clock_event_device as it's done in 64bit apic code so lets get rid of local_apic_timer_verify_ok variable. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a151d66f948..7783c113be2 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -60,8 +60,6 @@ unsigned long mp_lapic_addr; static int force_enable_local_apic; int disable_apic; -/* Local APIC timer verification ok */ -static int local_apic_timer_verify_ok; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ @@ -301,7 +299,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, unsigned int v; /* Lapic used for broadcast ? */ - if (!local_apic_timer_verify_ok) + if (evt->features & CLOCK_EVT_FEAT_DUMMY) return; local_irq_save(flags); @@ -514,7 +512,7 @@ static int __init calibrate_APIC_clock(void) return -1; } - local_apic_timer_verify_ok = 1; + levt->features &= ~CLOCK_EVT_FEAT_DUMMY; /* We trust the pm timer based calibration */ if (!pm_referenced) { @@ -548,11 +546,11 @@ static int __init calibrate_APIC_clock(void) if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); else - local_apic_timer_verify_ok = 0; + levt->features |= CLOCK_EVT_FEAT_DUMMY; } else local_irq_enable(); - if (!local_apic_timer_verify_ok) { + if (levt->features & CLOCK_EVT_FEAT_DUMMY) { printk(KERN_WARNING "APIC timer disabled due to verification failure.\n"); return -1; -- cgit v1.2.3 From c93baa1ae51cdba25a5f5ad37b2348e700e75daf Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:22 +0200 Subject: x86: apic - unify verify_local_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7783c113be2..7ef7c782a42 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -878,6 +878,12 @@ int __init verify_local_APIC(void) */ reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); + apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + reg1 = apic_read(APIC_ID); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); + apic_write(APIC_ID, reg0); + if (reg1 != (reg0 ^ APIC_ID_MASK)) + return 0; /* * The next two are just to see if we have sane values. -- cgit v1.2.3 From 296cb9511dcc3895fda84d0cd5b411bd926e4bb3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:23 +0200 Subject: x86: apic - unify sync_Arb_IDs Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 +-- arch/x86/kernel/apic_64.c | 10 ++++++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7ef7c782a42..d07488993ee 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -915,8 +915,7 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, - APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ac399d0f65c..41aff346063 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -742,8 +742,11 @@ int __init verify_local_APIC(void) */ void __init sync_Arb_IDs(void) { - /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ - if (modern_apic()) + /* + * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not + * needed on AMD. + */ + if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return; /* @@ -752,8 +755,7 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG - | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* -- cgit v1.2.3 From 516cbf3730c49739629d66313b20bdc50c98aa2c Mon Sep 17 00:00:00 2001 From: Tim Bird Date: Tue, 12 Aug 2008 12:52:36 -0700 Subject: x86, bootup: add built-in kernel command line for x86 (v2) Allow x86 to support a built-in kernel command line. The built-in command line can override the one provided by the boot loader, for those cases where the boot loader is broken or it is difficult to change the command line in the the boot loader. H. Peter Anvin wrote: > Ingo Molnar wrote: >> Best would be to make it really apparent in the code that nothing >> changes if this config option is not set. Preferably there should be >> no extra code at all in that case. >> > > I would like to see this: [...Nested ifdefs...] OK. This version changes absolutely nothing if CONFIG_CMDLINE_BOOL is not set (the default). Also, no space is appended even when CONFIG_CMDLINE_BOOL is set, but the builtin string is empty. This is less sloppy all the way around, IMHO. Note that I use the same option names as on other arches for this feature. [ mingo@elte.hu: build fix ] Signed-off-by: Tim Bird Cc: Matt Mackall Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 68b48e3fbcb..2f31cddd27b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -223,6 +223,9 @@ unsigned long saved_video_mode; #define RAMDISK_LOAD_FLAG 0x4000 static char __initdata command_line[COMMAND_LINE_SIZE]; +#ifdef CONFIG_CMDLINE_BOOL +static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; +#endif #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; @@ -673,6 +676,19 @@ void __init setup_arch(char **cmdline_p) bss_resource.start = virt_to_phys(&__bss_start); bss_resource.end = virt_to_phys(&__bss_stop)-1; +#ifdef CONFIG_CMDLINE_BOOL +#ifdef CONFIG_CMDLINE_OVERRIDE + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); +#else + if (builtin_cmdline[0]) { + /* append boot loader cmdline to builtin */ + strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); + strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + } +#endif +#endif + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; -- cgit v1.2.3 From 2bd455dbfebfd632a8dcf1d3d1612737986fde0a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 4 Aug 2008 11:26:38 +0800 Subject: x86: remove nesting CONFIG_HOTPLUG_CPU prefill_possible_map() is defined inside CONFIG_HOTPLUG_CPU, so the nesting CONFIG_HOTPLUG_CPU is just redundant. Signed-off-by: Li Zefan Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 332512767f4..d5e19c36204 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1271,16 +1271,13 @@ __init void prefill_possible_map(void) if (!num_processors) num_processors = 1; -#ifdef CONFIG_HOTPLUG_CPU if (additional_cpus == -1) { if (disabled_cpus > 0) additional_cpus = disabled_cpus; else additional_cpus = 0; } -#else - additional_cpus = 0; -#endif + possible = num_processors + additional_cpus; if (possible > NR_CPUS) possible = NR_CPUS; -- cgit v1.2.3 From 2070dae10f50ec244f58292436ace9a3f9dc1d71 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Sat, 2 Aug 2008 21:24:06 +0200 Subject: x86: coding style fixes to arch/x86/kernel/bios_uv.c paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/bios_uv.o.* 9afe794594831166704744184e192ed8 /tmp/bios_uv.o.after 9afe794594831166704744184e192ed8 /tmp/bios_uv.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/bios_uv.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index c639bd55391..fdd585f9c53 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -25,11 +25,11 @@ x86_bios_strerror(long status) { const char *str; switch (status) { - case 0: str = "Call completed without error"; break; - case -1: str = "Not implemented"; break; - case -2: str = "Invalid argument"; break; - case -3: str = "Call completed with error"; break; - default: str = "Unknown BIOS status code"; break; + case 0: str = "Call completed without error"; break; + case -1: str = "Not implemented"; break; + case -2: str = "Invalid argument"; break; + case -3: str = "Call completed with error"; break; + default: str = "Unknown BIOS status code"; break; } return str; } -- cgit v1.2.3 From d9336a9b47d57db835453968efbd0d5cedfe0260 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Sat, 2 Aug 2008 21:25:43 +0200 Subject: x86: coding style fixes to arch/x86/kernel/paravirt_patch_32.c Before: total: 3 errors, 1 warnings, 49 lines checked After: total: 2 errors, 1 warnings, 49 lines checked paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/paravirt_patch_32.o.* a78eea4264723e18c49dcfbe0ee0aae7 /tmp/paravirt_patch_32.o.after a78eea4264723e18c49dcfbe0ee0aae7 /tmp/paravirt_patch_32.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt_patch_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 58262218781..9fe644f4861 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -23,7 +23,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, start = start_##ops##_##x; \ end = end_##ops##_##x; \ goto patch_site - switch(type) { + switch (type) { PATCH_SITE(pv_irq_ops, irq_disable); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, restore_fl); -- cgit v1.2.3 From d33dcb9e7d272cc3171dcc3a21049902185ab03d Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Fri, 1 Aug 2008 12:46:45 +0200 Subject: x86: Microcode patch loader style corrections Style corrections to main microcode module. Signed-off-by: Peter Oruba Cc: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 28dba1a6e4a..39961bb8329 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -55,8 +55,8 @@ * in a single CPU package. * 1.10 28 Feb 2002 Asit K Mallick and * Tigran Aivazian , - * Serialize updates as required on HT processors due to speculative - * nature of implementation. + * Serialize updates as required on HT processors due to + * speculative nature of implementation. * 1.11 22 Mar 2002 Tigran Aivazian * Fix the panic when writing zero-length microcode chunk. * 1.12 29 Sep 2003 Nitin Kamble , @@ -71,7 +71,7 @@ * Thanks to Stuart Swales for pointing out this bug. */ -//#define DEBUG /* pr_debug */ +/*#define DEBUG pr_debug */ #include #include #include @@ -116,7 +116,7 @@ EXPORT_SYMBOL_GPL(user_buffer); unsigned int user_buffer_size; /* it's size */ EXPORT_SYMBOL_GPL(user_buffer_size); -static int do_microcode_update (void) +static int do_microcode_update(void) { long cursor = 0; int error = 0; @@ -158,18 +158,20 @@ out: return error; } -static int microcode_open (struct inode *unused1, struct file *unused2) +static int microcode_open(struct inode *unused1, struct file *unused2) { cycle_kernel_lock(); return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; } -static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) +static ssize_t microcode_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) { ssize_t ret; if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); + printk(KERN_ERR "microcode: too much data (max %ld pages)\n", + num_physpages); return -EINVAL; } @@ -201,7 +203,7 @@ static struct miscdevice microcode_dev = { .fops = µcode_fops, }; -static int __init microcode_dev_init (void) +static int __init microcode_dev_init(void) { int error; @@ -216,7 +218,7 @@ static int __init microcode_dev_init (void) return 0; } -static void microcode_dev_exit (void) +static void microcode_dev_exit(void) { misc_deregister(µcode_dev); } @@ -224,7 +226,7 @@ static void microcode_dev_exit (void) MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); #else #define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while(0) +#define microcode_dev_exit() do { } while (0) #endif /* fake device for request_firmware */ @@ -451,7 +453,7 @@ int microcode_init(void *opaque, struct module *module) } EXPORT_SYMBOL_GPL(microcode_init); -void __exit microcode_exit (void) +void __exit microcode_exit(void) { microcode_dev_exit(); -- cgit v1.2.3 From 636a31781684d0f49208aa163f1a5a3a74210eb4 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Fri, 1 Aug 2008 12:46:46 +0200 Subject: x86: Fixed NULL function pointer dereference in AMD microcode patch loader. Dereference took place in code part responsible for manual installation of microcode patches through /dev/cpu/microcode. Signed-off-by: Peter Oruba Cc: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 39961bb8329..ad136ad99cb 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -127,7 +127,8 @@ static int do_microcode_update(void) old = current->cpus_allowed; while ((cursor = microcode_ops->get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_ops->microcode_sanity_check(new_mc); + if (microcode_ops->microcode_sanity_check != NULL) + error = microcode_ops->microcode_sanity_check(new_mc); if (error) goto out; /* -- cgit v1.2.3 From c9c3dddd8f9a05b25d4ce53e8e80cc0ea1759d18 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 1 Aug 2008 03:51:38 +0400 Subject: x86_64: remove empty lines from stack traces/oopses Signed-off-by: Alexey Dobriyan Cc: ak@suse.de Cc: akpm@osdl.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3f18d73f420..8b5b3b81d43 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -339,9 +339,8 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl) { - printk("\nCall Trace:\n"); + printk("Call Trace:\n"); dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); - printk("\n"); } void show_trace(struct task_struct *task, struct pt_regs *regs, @@ -386,6 +385,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, printk(" %016lx", *stack++); touch_nmi_watchdog(); } + printk("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -443,7 +443,6 @@ void show_registers(struct pt_regs *regs) printk("Stack: "); show_stack_log_lvl(NULL, regs, (unsigned long *)sp, regs->bp, ""); - printk("\n"); printk(KERN_EMERG "Code: "); -- cgit v1.2.3 From 48e2bd56b1d1ae4b95fb21be778927b64d5c4235 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sat, 2 Aug 2008 12:50:37 -0300 Subject: x86: coding style fixes to arch/x86/kernel/traps_64.c Fix coding style of traps_64.c with improvements suggested by Ingo. Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index fe36d96ba70..5df5d2e97ec 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -84,8 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) void printk_address(unsigned long address, int reliable) { - printk(" [<%016lx>] %s%pS\n", address, reliable ? - "" : "? ", (void *) address); + printk(" [<%016lx>] %s%pS\n", + address, reliable ? "" : "? ", (void *) address); } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, @@ -98,8 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, [STACKFAULT_STACK - 1] = "#SS", [MCE_STACK - 1] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ - [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / - EXCEPTION_STKSZ - 2] = "#DB[?]" + [N_EXCEPTION_STACKS ... + N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" #endif }; unsigned k; @@ -363,8 +363,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); - /* debugging aid: "show_stack(NULL, NULL);" prints the - back trace for this cpu. */ + /* + * debugging aid: "show_stack(NULL, NULL);" prints the + * back trace for this cpu. + */ if (sp == NULL) { if (task) -- cgit v1.2.3 From f88f07e0f0fd6376e081b10930d272a08fbf082f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 14 Aug 2008 16:58:15 -0400 Subject: x86: alternatives : fix LOCK_PREFIX race with preemptible kernel and CPU hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a kernel thread is preempted in single-cpu mode right after the NOP (nop about to be turned into a lock prefix), then we CPU hotplug a CPU, and then the thread is scheduled back again, a SMP-unsafe atomic operation will be used on shared SMP variables, leading to corruption. No corruption would happen in the reverse case : going from SMP to UP is ok because we split a bit instruction into tiny pieces, which does not present this condition. Changing the 0x90 (single-byte nop) currently used into a 0x3E DS segment override prefix should fix this issue. Since the default of the atomic instructions is to use the DS segment anyway, it should not affect the behavior. The exception to this are references that use ESP/RSP and EBP/RBP as the base register (they will use the SS segment), however, in Linux (a) DS == SS at all times, and (b) we do not distinguish between segment violations reported as #SS as opposed to #GP, so there is no need to disassemble the instruction to figure out the suitable segment. This patch assumes that the 0x3E prefix will leave atomic operations as-is (thus assuming they normally touch data in the DS segment). Since there seem to be no obvious ill-use of other segment override prefixes for atomic operations, it should be safe. It can be verified with a quick grep -r LOCK_PREFIX include/asm-x86/ grep -A 1 -r LOCK_PREFIX arch/x86/ Taken from This source : AMD64 Architecture Programmer's Manual Volume 3: General-Purpose and System Instructions States "Instructions that Reference a Non-Stack Segment—If an instruction encoding references any base register other than rBP or rSP, or if an instruction contains an immediate offset, the default segment is the data segment (DS). These instructions can use the segment-override prefix to select one of the non-default segments, as shown in Table 1-5." Therefore, forcing the DS segment on the atomic operations, which already use the DS segment, should not change. This source : http://wiki.osdev.org/X86_Instruction_Encoding States "In 64-bit the CS, SS, DS and ES segment overrides are ignored." Confirmed by "AMD 64-Bit Technology" A.7 http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/x86-64_overview.pdf "In 64-bit mode, the DS, ES, SS and CS segment-override prefixes have no effect. These four prefixes are no longer treated as segment-override prefixes in the context of multipleprefix rules. Instead, they are treated as null prefixes." This patch applies to 2.6.27-rc2, but would also have to be applied to earlier kernels (2.6.26, 2.6.25, ...). Performance impact of the fix : tests done on "xaddq" and "xaddl" shows it actually improves performances on Intel Xeon, AMD64, Pentium M. It does not change the performance on Pentium II, Pentium 3 and Pentium 4. Xeon E5405 2.0GHz : NR_TESTS 10000000 test empty cycles : 162207948 test test 1-byte nop xadd cycles : 170755422 test test DS override prefix xadd cycles : 170000118 * test test LOCK xadd cycles : 472012134 AMD64 2.0GHz : NR_TESTS 10000000 test empty cycles : 146674549 test test 1-byte nop xadd cycles : 150273860 test test DS override prefix xadd cycles : 149982382 * test test LOCK xadd cycles : 270000690 Pentium 4 3.0GHz NR_TESTS 10000000 test empty cycles : 290001195 test test 1-byte nop xadd cycles : 310000560 test test DS override prefix xadd cycles : 310000575 * test test LOCK xadd cycles : 1050103740 Pentium M 2.0GHz NR_TESTS 10000000 test empty cycles : 180000523 test test 1-byte nop xadd cycles : 320000345 test test DS override prefix xadd cycles : 310000374 * test test LOCK xadd cycles : 480000357 Pentium 3 550MHz NR_TESTS 10000000 test empty cycles : 510000231 test test 1-byte nop xadd cycles : 620000128 test test DS override prefix xadd cycles : 620000110 * test test LOCK xadd cycles : 800000088 Pentium II 350MHz NR_TESTS 10000000 test empty cycles : 200833494 test test 1-byte nop xadd cycles : 340000130 test test DS override prefix xadd cycles : 340000126 * test test LOCK xadd cycles : 530000078 Speed test modules can be found at http://ltt.polymtl.ca/svn/trunk/tests/kernel/test-prefix-speed-32.c http://ltt.polymtl.ca/svn/trunk/tests/kernel/test-prefix-speed.c Macro-benchmarks 2.0GHz E5405 Core 2 dual Quad-Core Xeon Summary * replace smp lock prefixes with DS segment selector prefixes no lock prefix (s) with lock prefix (s) Speedup make -j1 kernel/ 33.94 +/- 0.07 34.91 +/- 0.27 2.8 % hackbench 50 2.99 +/- 0.01 3.74 +/- 0.01 25.1 % * replace smp lock prefixes with 0x90 nops no lock prefix (s) with lock prefix (s) Speedup make -j1 kernel/ 34.16 +/- 0.32 34.91 +/- 0.27 2.2 % hackbench 50 3.00 +/- 0.01 3.74 +/- 0.01 24.7 % Detail : 1 CPU, replace smp lock prefixes with DS segment selector prefixes make -j1 kernel/ real 0m34.067s user 0m30.630s sys 0m2.980s real 0m33.867s user 0m30.582s sys 0m3.024s real 0m33.939s user 0m30.738s sys 0m2.876s real 0m33.913s user 0m30.806s sys 0m2.808s avg : 33.94s std. dev. : 0.07s hackbench 50 Time: 2.978 Time: 2.982 Time: 3.010 Time: 2.984 Time: 2.982 avg : 2.99 std. dev. : 0.01 1 CPU, noreplace-smp make -j1 kernel/ real 0m35.326s user 0m30.630s sys 0m3.260s real 0m34.325s user 0m30.802s sys 0m3.084s real 0m35.568s user 0m30.722s sys 0m3.168s real 0m34.435s user 0m30.886s sys 0m2.996s avg.: 34.91s std. dev. : 0.27s hackbench 50 Time: 3.733 Time: 3.750 Time: 3.761 Time: 3.737 Time: 3.741 avg : 3.74 std. dev. : 0.01 1 CPU, replace smp lock prefixes with 0x90 nops make -j1 kernel/ real 0m34.139s user 0m30.782s sys 0m2.820s real 0m34.010s user 0m30.630s sys 0m2.976s real 0m34.777s user 0m30.658s sys 0m2.916s real 0m33.924s user 0m30.634s sys 0m2.924s real 0m33.962s user 0m30.774s sys 0m2.800s real 0m34.141s user 0m30.770s sys 0m2.828s avg : 34.16 std. dev. : 0.32 hackbench 50 Time: 2.999 Time: 2.994 Time: 3.004 Time: 2.991 Time: 2.988 avg : 3.00 std. dev. : 0.01 I did more runs (20 runs of each) to compare the nop case to the DS prefix case. Results in seconds. They actually does not seems to show a significant difference. NOP 34.155 33.955 34.012 35.299 35.679 34.141 33.995 35.016 34.254 33.957 33.957 34.008 35.013 34.494 33.893 34.295 34.314 34.854 33.991 34.132 DS 34.080 34.304 34.374 35.095 34.291 34.135 33.940 34.208 35.276 34.288 33.861 33.898 34.610 34.709 33.851 34.256 35.161 34.283 33.865 35.078 Used http://www.graphpad.com/quickcalcs/ttest1.cfm?Format=C to do the T-test (yeah, I'm lazy) : Group Group One (DS prefix) Group Two (nops) Mean 34.37815 34.37070 SD 0.46108 0.51905 SEM 0.10310 0.11606 N 20 20 P value and statistical significance: The two-tailed P value equals 0.9620 By conventional criteria, this difference is considered to be not statistically significant. Confidence interval: The mean of Group One minus Group Two equals 0.00745 95% confidence interval of this difference: From -0.30682 to 0.32172 Intermediate values used in calculations: t = 0.0480 df = 38 standard error of difference = 0.155 So, unless these calculus are completely bogus, the difference between the nop and the DS case seems not to be statistically significant. Signed-off-by: Mathieu Desnoyers Acked-by: H. Peter Anvin CC: Linus Torvalds CC: Jeremy Fitzhardinge CC: Roland McGrath CC: Ingo Molnar Cc: Steven Rostedt CC: Steven Rostedt CC: Thomas Gleixner CC: Peter Zijlstra CC: Andrew Morton CC: David Miller CC: Ulrich Drepper CC: Rusty Russell CC: Gregory Haskins CC: Arnaldo Carvalho de Melo CC: "Luis Claudio R. Goncalves" CC: Clark Williams CC: Christoph Lameter CC: Andi Kleen CC: Harvey Harrison Signed-off-by: H. Peter Anvin --- arch/x86/kernel/alternative.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb37b55..7ead11f3732 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -241,25 +241,25 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) continue; if (*ptr > text_end) continue; - text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */ + /* turn DS segment override prefix into lock prefix */ + text_poke(*ptr, ((unsigned char []){0xf0}), 1); }; } static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) { u8 **ptr; - char insn[1]; if (noreplace_smp) return; - add_nops(insn, 1); for (ptr = start; ptr < end; ptr++) { if (*ptr < text) continue; if (*ptr > text_end) continue; - text_poke(*ptr, insn, 1); + /* turn lock prefix into DS segment override prefix */ + text_poke(*ptr, ((unsigned char []){0x3E}), 1); }; } -- cgit v1.2.3 From 6f6da97faf29f87b3980a6992fb8cab44b4c444d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 23:05:19 +0400 Subject: x86: apic - sync_Arb_IDs style fixup No changes on binary level Signed-off-by: Cyrill Gorcunov Acked-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 +++- arch/x86/kernel/apic_64.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d07488993ee..60575c05151 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -909,13 +909,15 @@ void __init sync_Arb_IDs(void) */ if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return; + /* * Wait for idle. */ apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | + APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 41aff346063..72e94ab0e36 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -755,7 +755,8 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | + APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* -- cgit v1.2.3 From 638c0411922540deaf8797cacf73513b17618405 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 23:05:18 +0400 Subject: x86: apic - unify init_bsp_APIC - remove redundant read of APIC_LVR register in 64bit mode - APIC is always integrated for 64bit mode so gcc will eliminate lapic_is_integrated call Signed-off-by: Cyrill Gorcunov Acked-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 +++- arch/x86/kernel/apic_64.c | 14 +++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 60575c05151..16d9721788c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -925,7 +925,7 @@ void __init sync_Arb_IDs(void) */ void __init init_bsp_APIC(void) { - unsigned long value; + unsigned int value; /* * Don't do the setup now if we have a SMP BIOS as the @@ -946,11 +946,13 @@ void __init init_bsp_APIC(void) value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; +#ifdef CONFIG_X86_32 /* This bit is reserved on P4/Xeon and should be cleared */ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15)) value &= ~APIC_SPIV_FOCUS_DISABLED; else +#endif value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 72e94ab0e36..99d18b8976a 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -773,8 +773,6 @@ void __init init_bsp_APIC(void) if (smp_found_config || !cpu_has_apic) return; - value = apic_read(APIC_LVR); - /* * Do not trust the local APIC being empty at bootup. */ @@ -786,7 +784,15 @@ void __init init_bsp_APIC(void) value = apic_read(APIC_SPIV); value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; - value |= APIC_SPIV_FOCUS_DISABLED; + +#ifdef CONFIG_X86_32 + /* This bit is reserved on P4/Xeon and should be cleared */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + (boot_cpu_data.x86 == 15)) + value &= ~APIC_SPIV_FOCUS_DISABLED; + else +#endif + value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); @@ -795,6 +801,8 @@ void __init init_bsp_APIC(void) */ apic_write(APIC_LVT0, APIC_DM_EXTINT); value = APIC_DM_NMI; + if (!lapic_is_integrated()) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); } -- cgit v1.2.3 From 6764014bc8bb4849f6a4f336477e873ad5861ed2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:50 +0400 Subject: x86: apic - unify clear_local_APIC - Remove redundant masking of APIC_LVTTHMR register in apic_32.c - Add masking of APIC_LVTTHMR register to apic_64.c. We use a bit complicated #ifdef here: CONFIG_X86_MCE_P4THERMAL is 32bit specific and X86_MCE_INTEL is 64bit specific so the appropriate config variable will be set by Kconfig. - the APIC_ESR register clearing in apic_64.c now uses not straightforward way but this is allowed tradeoff. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 +----- arch/x86/kernel/apic_64.c | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 16d9721788c..3131603a4d6 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -754,7 +754,7 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); @@ -771,10 +771,6 @@ void clear_local_APIC(void) if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); -#ifdef CONFIG_X86_MCE_P4THERMAL - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, APIC_LVT_MASKED); -#endif /* Integrated APIC (!82489DX) ? */ if (lapic_is_integrated()) { if (maxlvt > 3) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 99d18b8976a..d834b758362 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -630,6 +630,13 @@ void clear_local_APIC(void) apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); } + /* lets not touch this if we didn't frob it */ +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) + if (maxlvt >= 5) { + v = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); + } +#endif /* * Clean APIC state for other OSs: */ @@ -640,8 +647,14 @@ void clear_local_APIC(void) apic_write(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); + + /* Integrated APIC (!82489DX) ? */ + if (lapic_is_integrated()) { + if (maxlvt > 3) + /* Clear ESR due to Pentium errata 3AP and 11AP */ + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } } /** -- cgit v1.2.3 From 92206c909ad1d4f9c355b4842722d5a355d6b76b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:51 +0400 Subject: x86: apic - unify lapic_resume Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 29 ++++++++++++++++++----------- arch/x86/kernel/apic_64.c | 19 +++++++++++++++---- 2 files changed, 33 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3131603a4d6..3d40213d3af 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1606,16 +1606,21 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); +#ifdef CONFIG_X86_64 + if (x2apic) + enable_x2apic(); + else +#endif + /* + * Make sure the APICBASE points to the right address + * + * FIXME! This will be wrong if we ever support suspend on + * SMP! We'll need to do this as part of the CPU restore! + */ + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -1625,7 +1630,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif @@ -1639,7 +1644,9 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); + local_irq_restore(flags); + return 0; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d834b758362..e542a2d08de 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1412,13 +1412,22 @@ static int lapic_resume(struct sys_device *dev) maxlvt = lapic_get_maxlvt(); local_irq_save(flags); - if (!x2apic) { + +#ifdef CONFIG_X86_64 + if (x2apic) + enable_x2apic(); + else +#endif + /* + * Make sure the APICBASE points to the right address + * + * FIXME! This will be wrong if we ever support suspend on + * SMP! We'll need to do this as part of the CPU restore! + */ rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); - } else - enable_x2apic(); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -1428,7 +1437,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_INTEL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif @@ -1442,7 +1451,9 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); + local_irq_restore(flags); + return 0; } -- cgit v1.2.3 From 24968cfdd4c3cf61338495dd0f9bb8a6907d2087 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:52 +0400 Subject: x86: apic - unify lapic_suspend Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 +- arch/x86/kernel/apic_64.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3d40213d3af..6cb8aaaf10f 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1582,7 +1582,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index e542a2d08de..13dea935b4e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1390,10 +1390,11 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_INTEL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif + local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); -- cgit v1.2.3 From 274cfe5912eebe9d4e1a4c451fe617289a181fcf Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:53 +0400 Subject: x86: apic - rearrange functions and comments Rearrange functions and comments to find differences easier. Also use apic_printk in setup_boot_APIC_clock for 64bit mode. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 69 ++++++++++++++++++++++++++--------------------- arch/x86/kernel/apic_64.c | 48 ++++++++++++++++++++++++++------- 2 files changed, 77 insertions(+), 40 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6cb8aaaf10f..9e8702eebd4 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -251,6 +251,9 @@ int lapic_get_maxlvt(void) * this function twice on the boot CPU, once with a bogus timeout * value, second time for real. The other (noncalibrating) CPUs * call this function only once, with the real, calibrated value. + * + * We do reads before writes even if unnecessary, to get around the + * P5 APIC double write bug. */ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { @@ -279,6 +282,36 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); } +/* + * Setup extended LVT, AMD specific (K8, family 10h) + * + * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and + * MCE interrupts are supported. Thus MCE offset must be set to 0. + */ + +#define APIC_EILVT_LVTOFF_MCE 0 +#define APIC_EILVT_LVTOFF_IBS 1 + +static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) +{ + unsigned long reg = (lvt_off << 4) + APIC_EILVT0; + unsigned int v = (mask << 16) | (msg_type << 8) | vector; + + apic_write(reg, v); +} + +u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_MCE; +} + +u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_IBS; +} + /* * Program the next event, relative to now */ @@ -298,7 +331,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, unsigned long flags; unsigned int v; - /* Lapic used for broadcast ? */ + /* Lapic used as dummy for broadcast ? */ if (evt->features & CLOCK_EVT_FEAT_DUMMY) return; @@ -680,35 +713,6 @@ int setup_profiling_timer(unsigned int multiplier) return -EINVAL; } -/* - * Setup extended LVT, AMD specific (K8, family 10h) - * - * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and - * MCE interrupts are supported. Thus MCE offset must be set to 0. - */ - -#define APIC_EILVT_LVTOFF_MCE 0 -#define APIC_EILVT_LVTOFF_IBS 1 - -static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) -{ - unsigned long reg = (lvt_off << 4) + APIC_EILVT0; - unsigned int v = (mask << 16) | (msg_type << 8) | vector; - apic_write(reg, v); -} - -u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_MCE; -} - -u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_IBS; -} - /* * Local APIC start and shutdown */ @@ -1542,6 +1546,11 @@ void __cpuinit generic_processor_info(int apicid, int version) #ifdef CONFIG_PM static struct { + /* + * 'active' is true if the local APIC was enabled by us and + * not the BIOS; this signifies that we are also responsible + * for disabling it before entering apm/acpi suspend + */ int active; /* r/w apic fields */ unsigned int apic_id; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 13dea935b4e..46523c0cc6f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -81,6 +81,9 @@ static void lapic_timer_setup(enum clock_event_mode mode, static void lapic_timer_broadcast(cpumask_t mask); static void apic_pm_activate(void); +/* + * The local apic timer can be used for any function which is CPU local. + */ static struct clock_event_device lapic_clockevent = { .name = "lapic", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT @@ -127,6 +130,11 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +/* + * Paravirt kernels also might be using these below ops. So we still + * use generic apic_read()/apic_write(), which might be pointing to different + * ops in PARAVIRT case. + */ void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) @@ -175,7 +183,6 @@ static struct apic_ops xapic_ops = { }; struct apic_ops __read_mostly *apic_ops = &xapic_ops; - EXPORT_SYMBOL_GPL(apic_ops); static void x2apic_wait_icr_idle(void) @@ -244,6 +251,10 @@ int lapic_get_maxlvt(void) return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } +/* + * Local APIC timer + */ + /* Clock divisor is set to 1 */ #define APIC_DIVISOR 1 @@ -257,7 +268,6 @@ int lapic_get_maxlvt(void) * We do reads before writes even if unnecessary, to get around the * P5 APIC double write bug. */ - static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { unsigned int lvtt_value, tmp_value; @@ -474,10 +484,10 @@ static int __init calibrate_APIC_clock(void) void __init setup_boot_APIC_clock(void) { /* - * The local apic timer can be disabled via the kernel commandline. - * Register the lapic timer as a dummy clock event source on SMP - * systems, so the broadcast mechanism is used. On UP systems simply - * ignore it. + * The local apic timer can be disabled via the kernel + * commandline or from the CPU detection code. Register the lapic + * timer as a dummy clock event source on SMP systems, so the + * broadcast mechanism is used. On UP systems simply ignore it. */ if (disable_apic_timer) { printk(KERN_INFO "Disabling APIC timer\n"); @@ -489,7 +499,9 @@ void __init setup_boot_APIC_clock(void) return; } - printk(KERN_INFO "Using local APIC timer interrupts.\n"); + apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" + "calibrating APIC timer ...\n"); + if (calibrate_APIC_clock()) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) @@ -508,6 +520,7 @@ void __init setup_boot_APIC_clock(void) printk(KERN_WARNING "APIC timer registered as dummy," " due to nmi_watchdog=%d!\n", nmi_watchdog); + /* Setup the lapic or request the broadcast */ setup_APIC_timer(); } @@ -577,6 +590,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) irq_enter(); local_apic_timer_interrupt(); irq_exit(); + set_irq_regs(old_regs); } @@ -1248,6 +1262,13 @@ void __init connect_bsp_APIC(void) enable_apic_mode(); } +/** + * disconnect_bsp_APIC - detach the APIC from the interrupt system + * @virt_wire_setup: indicates, whether virtual wire mode is selected + * + * Virtual wire mode is necessary to deliver legacy interrupts even when the + * APIC is disabled. + */ void disconnect_bsp_APIC(int virt_wire_setup) { /* Go back to Virtual Wire compatibility mode */ @@ -1347,9 +1368,11 @@ int hard_smp_processor_id(void) #ifdef CONFIG_PM static struct { - /* 'active' is true if the local APIC was enabled by us and - not the BIOS; this signifies that we are also responsible - for disabling it before entering apm/acpi suspend */ + /* + * 'active' is true if the local APIC was enabled by us and + * not the BIOS; this signifies that we are also responsible + * for disabling it before entering apm/acpi suspend + */ int active; /* r/w apic fields */ unsigned int apic_id; @@ -1458,6 +1481,11 @@ static int lapic_resume(struct sys_device *dev) return 0; } +/* + * This device has no shutdown method - fully functioning local APICs + * are needed on every CPU up until machine_halt/restart/poweroff. + */ + static struct sysdev_class lapic_sysclass = { .name = "lapic", .resume = lapic_resume, -- cgit v1.2.3 From 9c803869f5f5e3d7ad94b2926474b2882e30073b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:54 +0400 Subject: x86: apic - unify lapic_is_integrated Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++++ arch/x86/kernel/apic_64.c | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 9e8702eebd4..41134d4e6a6 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -128,7 +128,11 @@ static inline int lapic_get_version(void) */ static inline int lapic_is_integrated(void) { +#ifdef CONFIG_X86_64 + return 1; +#else return APIC_INTEGRATED(lapic_get_version()); +#endif } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46523c0cc6f..18c0b8cd237 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -111,11 +111,15 @@ static inline int lapic_get_version(void) } /* - * Check, if the APIC is integrated or a seperate chip + * Check, if the APIC is integrated or a separate chip */ static inline int lapic_is_integrated(void) { +#ifdef CONFIG_X86_64 return 1; +#else + return APIC_INTEGRATED(lapic_get_version()); +#endif } /* -- cgit v1.2.3 From cf9768d7514d59b3179a886c4a47908d72e6cf76 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:55 +0400 Subject: x86: apic - unify xapic_icr_read Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 18c0b8cd237..5e0738131e5 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -174,7 +174,7 @@ u64 xapic_icr_read(void) icr2 = apic_read(APIC_ICR2); icr1 = apic_read(APIC_ICR); - return (icr1 | ((u64)icr2 << 32)); + return icr1 | ((u64)icr2 << 32); } static struct apic_ops xapic_ops = { -- cgit v1.2.3 From 7b22ff5344fda666e0938e5261ea7b9a3dfce497 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 18 Aug 2008 00:36:18 +0900 Subject: x86 gart: allocate size-aligned address for alloc_coherent, v2 This patch changes GART IOMMU to return a size aligned address wrt dma_alloc_coherent, as DMA-mapping.txt defines: The cpu return address and the DMA bus master address are both guaranteed to be aligned to the smallest PAGE_SIZE order which is greater than or equal to the requested size. This invariant exists (for example) to guarantee that if you allocate a chunk which is smaller than or equal to 64 kilobytes, the extent of the buffer you receive will not cross a 64K boundary. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index cdab6784907..4d8efb05428 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -82,7 +82,8 @@ AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ -static unsigned long alloc_iommu(struct device *dev, int size) +static unsigned long alloc_iommu(struct device *dev, int size, + unsigned long align_mask) { unsigned long offset, flags; unsigned long boundary_size; @@ -95,11 +96,12 @@ static unsigned long alloc_iommu(struct device *dev, int size) spin_lock_irqsave(&iommu_bitmap_lock, flags); offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, - size, base_index, boundary_size, 0); + size, base_index, boundary_size, align_mask); if (offset == -1) { need_flush = 1; offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, - size, base_index, boundary_size, 0); + size, base_index, boundary_size, + align_mask); } if (offset != -1) { next_bit = offset+size; @@ -236,10 +238,10 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) * Caller needs to check if the iommu is needed and flush. */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir) + size_t size, int dir, unsigned long align_mask) { unsigned long npages = iommu_num_pages(phys_mem, size); - unsigned long iommu_page = alloc_iommu(dev, npages); + unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); int i; if (iommu_page == -1) { @@ -262,7 +264,11 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, static dma_addr_t gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) { - dma_addr_t map = dma_map_area(dev, paddr, size, dir); + dma_addr_t map; + unsigned long align_mask; + + align_mask = (1UL << get_order(size)) - 1; + map = dma_map_area(dev, paddr, size, dir, align_mask); flush_gart(); @@ -281,7 +287,8 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) if (!need_iommu(dev, paddr, size)) return paddr; - bus = gart_map_simple(dev, paddr, size, dir); + bus = dma_map_area(dev, paddr, size, dir, 0); + flush_gart(); return bus; } @@ -340,7 +347,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir); + addr = dma_map_area(dev, addr, s->length, dir, 0); if (addr == bad_dma_address) { if (i > 0) gart_unmap_sg(dev, sg, i, dir); @@ -362,7 +369,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { - unsigned long iommu_start = alloc_iommu(dev, pages); + unsigned long iommu_start = alloc_iommu(dev, pages, 0); unsigned long iommu_page = iommu_start; struct scatterlist *s; int i; -- cgit v1.2.3 From d5e629a6f88137fb77c4cc857be5ea7c3f27110d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 17 Aug 2008 21:12:27 -0700 Subject: x86: apic - unify lapic_resume - fix Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5e0738131e5..ad532dccd11 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1446,6 +1446,7 @@ static int lapic_resume(struct sys_device *dev) enable_x2apic(); else #endif + { /* * Make sure the APICBASE points to the right address * @@ -1456,6 +1457,7 @@ static int lapic_resume(struct sys_device *dev) l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); + } apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); -- cgit v1.2.3 From 8bfcb3960fde049b863266dab8c3617bb5a541aa Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 18 Aug 2008 12:33:20 +0200 Subject: x86: make movsl_mask definition non-CPU specific movsl_mask is currently defined in arch/x86/kernel/cpu/intel.c, which contains code specific to Intel CPUs. However, movsl_mask is used in the non-CPU specific code in arch/x86/lib/usercopy_32.c, which breaks the compilation when support for Intel CPUs is compiled out. This patch solves this problem by moving movsl_mask's definition close to its users in arch/x86/lib/usercopy_32.c. Signed-off-by: Thomas Petazzoni Cc: michael@free-electrons.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b75f2569b8f..5c8959b8a42 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -23,13 +23,6 @@ #include #endif -#ifdef CONFIG_X86_INTEL_USERCOPY -/* - * Alignment at which movsl is preferred for bulk memory copies. - */ -struct movsl_mask movsl_mask __read_mostly; -#endif - static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ -- cgit v1.2.3 From 774400a3ba23b63f4de39e67ce6c4e48935809dc Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 18 Aug 2008 12:33:21 +0200 Subject: x86: move cmpxchg fallbacks to a generic place arch/x86/kernel/cpu/intel.c defines a few fallback functions (cmpxchg_*()) that are used when the CPU doesn't support cmpxchg and/or cmpxchg64 natively. However, while defined in an Intel-specific file, these functions are also used for CPUs from other vendors when they don't support cmpxchg and/or cmpxchg64. This breaks the compilation when support for Intel CPUs is disabled. This patch moves these functions to a new arch/x86/kernel/cpu/cmpxchg.c file, unconditionally compiled when X86_32 is enabled. Signed-off-by: Thomas Petazzoni Cc: michael@free-electrons.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/cmpxchg.c | 72 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/intel.c | 64 -------------------------------------- 3 files changed, 73 insertions(+), 65 deletions(-) create mode 100644 arch/x86/kernel/cpu/cmpxchg.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index ee76eaad300..25b4c063fbf 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -5,7 +5,7 @@ obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o feature_names.o -obj-$(CONFIG_X86_32) += common.o bugs.o +obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o obj-$(CONFIG_X86_32) += amd.o obj-$(CONFIG_X86_64) += amd_64.o diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c new file mode 100644 index 00000000000..2056ccf572c --- /dev/null +++ b/arch/x86/kernel/cpu/cmpxchg.c @@ -0,0 +1,72 @@ +/* + * cmpxchg*() fallbacks for CPU not supporting these instructions + */ + +#include +#include +#include + +#ifndef CONFIG_X86_CMPXCHG +unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) +{ + u8 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u8 *)ptr; + if (prev == old) + *(u8 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u8); + +unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) +{ + u16 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u16 *)ptr; + if (prev == old) + *(u16 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u16); + +unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) +{ + u32 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u32 *)ptr; + if (prev == old) + *(u32 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u32); +#endif + +#ifndef CONFIG_X86_CMPXCHG64 +unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) +{ + u64 prev; + unsigned long flags; + + /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u64 *)ptr; + if (prev == old) + *(u64 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_486_u64); +#endif + diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5c8959b8a42..77618c717d7 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -307,69 +307,5 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); -#ifndef CONFIG_X86_CMPXCHG -unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) -{ - u8 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u8 *)ptr; - if (prev == old) - *(u8 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u8); - -unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) -{ - u16 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u16 *)ptr; - if (prev == old) - *(u16 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u16); - -unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) -{ - u32 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u32 *)ptr; - if (prev == old) - *(u32 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u32); -#endif - -#ifndef CONFIG_X86_CMPXCHG64 -unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) -{ - u64 prev; - unsigned long flags; - - /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u64 *)ptr; - if (prev == old) - *(u64 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_486_u64); -#endif - /* arch_initcall(intel_cpu_init); */ -- cgit v1.2.3 From 8d02c2110b3fb8e2700b31596a582a2989fd72ba Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 5 Aug 2008 11:45:19 +0200 Subject: x86: configuration options to compile out x86 CPU support code This patch adds some configuration options that allow to compile out CPU vendor-specific code in x86 kernels (in arch/x86/kernel/cpu). The new configuration options are only visible when CONFIG_EMBEDDED is selected, as they are mostly interesting for space savings reasons. An example of size saving, on x86 with only Intel CPU support: text data bss dec hex filename 1125479 118760 212992 1457231 163c4f vmlinux.old 1121355 116536 212992 1450883 162383 vmlinux -4124 -2224 0 -6348 -18CC +/- However, I'm not exactly sure that the Kconfig wording is correct with regard to !64BIT / 64BIT. [ mingo@elte.hu: convert macro to inline ] Signed-off-by: Thomas Petazzoni Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 25b4c063fbf..a0fc6c14438 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -7,15 +7,16 @@ obj-y += proc.o feature_names.o obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o -obj-$(CONFIG_X86_32) += amd.o -obj-$(CONFIG_X86_64) += amd_64.o -obj-$(CONFIG_X86_32) += cyrix.o -obj-$(CONFIG_X86_32) += centaur.o -obj-$(CONFIG_X86_64) += centaur_64.o -obj-$(CONFIG_X86_32) += transmeta.o -obj-$(CONFIG_X86_32) += intel.o -obj-$(CONFIG_X86_64) += intel_64.o -obj-$(CONFIG_X86_32) += umc.o + +obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o +obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o +obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o +obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o +obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o +obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o +obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o +obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o +obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ -- cgit v1.2.3 From b6c8051311e1a14d229df05ea39d0a1b2ce90cdd Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:49 +0400 Subject: x86: apic - rearrange maxcpu definition Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++-- arch/x86/kernel/apic_64.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 41134d4e6a6..8d1febf05da 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -114,6 +114,8 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static int enabled_via_apicbase; static unsigned long apic_phys; +unsigned int __cpuinitdata maxcpus = NR_CPUS; + /* * Get the LAPIC version @@ -1459,8 +1461,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) } } -unsigned int __cpuinitdata maxcpus = NR_CPUS; - void __cpuinit generic_processor_info(int apicid, int version) { int cpu; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ad532dccd11..46acb9b47ae 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -98,10 +98,10 @@ static struct clock_event_device lapic_clockevent = { static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static unsigned long apic_phys; +unsigned int __cpuinitdata maxcpus = NR_CPUS; unsigned long mp_lapic_addr; -unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version */ -- cgit v1.2.3 From f1ee37891dab6014f6b0ec77b18bc07e2369a55f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:50 +0400 Subject: x86: apic - unify setup_boot_APIC_clock Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 8d1febf05da..80db3e0426c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -612,6 +612,7 @@ void __init setup_boot_APIC_clock(void) * broadcast mechanism is used. On UP systems simply ignore it. */ if (disable_apic_timer) { + printk(KERN_INFO "Disabling APIC timer\n"); /* No broadcast on UP ! */ if (num_possible_cpus() > 1) { lapic_clockevent.mult = 1; -- cgit v1.2.3 From 990b183e58cb513a62492b6218987750e106cbfb Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:51 +0400 Subject: x86: apic - unify disable_local_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 +++- arch/x86/kernel/apic_64.c | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 80db3e0426c..13c4b79441d 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -796,7 +796,7 @@ void clear_local_APIC(void) */ void disable_local_APIC(void) { - unsigned long value; + unsigned int value; clear_local_APIC(); @@ -808,6 +808,7 @@ void disable_local_APIC(void) value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); +#ifdef CONFIG_X86_32 /* * When LAPIC was disabled by the BIOS and enabled by the kernel, * restore the disabled state. @@ -819,6 +820,7 @@ void disable_local_APIC(void) l &= ~MSR_IA32_APICBASE_ENABLE; wrmsr(MSR_IA32_APICBASE, l, h); } +#endif } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46acb9b47ae..4fb903b2fc3 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -691,6 +691,20 @@ void disable_local_APIC(void) value = apic_read(APIC_SPIV); value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); + +#ifdef CONFIG_X86_32 + /* + * When LAPIC was disabled by the BIOS and enabled by the kernel, + * restore the disabled state. + */ + if (enabled_via_apicbase) { + unsigned int l, h; + + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_ENABLE; + wrmsr(MSR_IA32_APICBASE, l, h); + } +#endif } void lapic_shutdown(void) -- cgit v1.2.3 From fe4024dcb0c01e5399394d2807406a2c13fb1eb7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:52 +0400 Subject: x86: apic - unify lapic_shutdown Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 9 ++++++--- arch/x86/kernel/apic_64.c | 14 +++++++++++++- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 13c4b79441d..d4efe86adc7 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -838,10 +838,13 @@ void lapic_shutdown(void) local_irq_save(flags); - if (enabled_via_apicbase) - disable_local_APIC(); - else +#ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) clear_local_APIC(); + else +#endif + disable_local_APIC(); + local_irq_restore(flags); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4fb903b2fc3..48806546d49 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -707,6 +707,12 @@ void disable_local_APIC(void) #endif } +/* + * If Linux enabled the LAPIC against the BIOS default disable it down before + * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and + * not power-off. Additionally clear all LVT entries before disable_local_APIC + * for the case where Linux didn't enable the LAPIC. + */ void lapic_shutdown(void) { unsigned long flags; @@ -716,7 +722,13 @@ void lapic_shutdown(void) local_irq_save(flags); - disable_local_APIC(); +#ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) + clear_local_APIC(); + else +#endif + disable_local_APIC(); + local_irq_restore(flags); } -- cgit v1.2.3 From 36c9d6742897fa414f51c4e9d0f20ab4e6bf942c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:53 +0400 Subject: x86: apic - unify connect_bsp_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 ++ arch/x86/kernel/apic_64.c | 20 ++++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d4efe86adc7..6d230e9d0a7 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1387,6 +1387,7 @@ void smp_error_interrupt(struct pt_regs *regs) */ void __init connect_bsp_APIC(void) { +#ifdef CONFIG_X86_32 if (pic_mode) { /* * Do not trust the local APIC being empty at bootup. @@ -1401,6 +1402,7 @@ void __init connect_bsp_APIC(void) outb(0x70, 0x22); outb(0x01, 0x23); } +#endif enable_apic_mode(); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 48806546d49..5579e213b5d 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1285,10 +1285,26 @@ asmlinkage void smp_error_interrupt(void) } /** - * * connect_bsp_APIC - attach the APIC to the interrupt system - * */ + * connect_bsp_APIC - attach the APIC to the interrupt system + */ void __init connect_bsp_APIC(void) { +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + /* + * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's + * local APIC to INT and NMI lines. + */ + apic_printk(APIC_VERBOSE, "leaving PIC mode, " + "enabling APIC mode.\n"); + outb(0x70, 0x22); + outb(0x01, 0x23); + } +#endif enable_apic_mode(); } -- cgit v1.2.3 From c43da2f5e92fe3bcc256f0c0d6cb858368da5bd9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:54 +0400 Subject: x86: apic - unify lapic_setup_esr We use 32bit code former for 64bit mode since it's much better implementation and easier to merge. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 21 +++++++++---------- arch/x86/kernel/apic_64.c | 51 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 22 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6d230e9d0a7..3c1562afa27 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -983,6 +983,16 @@ static void __cpuinit lapic_setup_esr(void) { unsigned long oldvalue, value, maxlvt; if (lapic_is_integrated() && !esr_disable) { + if (esr_disable) { + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; + } /* !82489DX */ maxlvt = lapic_get_maxlvt(); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ @@ -1003,16 +1013,7 @@ static void __cpuinit lapic_setup_esr(void) "vector: 0x%08lx after: 0x%08lx\n", oldvalue, value); } else { - if (esr_disable) - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - else - printk(KERN_INFO "No ESR for 82489DX.\n"); + printk(KERN_INFO "No ESR for 82489DX.\n"); } } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5579e213b5d..d74abf7e92f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -863,6 +863,45 @@ void __init init_bsp_APIC(void) apic_write(APIC_LVT1, value); } +static void __cpuinit lapic_setup_esr(void) +{ + unsigned long oldvalue, value, maxlvt; + if (lapic_is_integrated() && !esr_disable) { + if (esr_disable) { + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; + } + /* !82489DX */ + maxlvt = lapic_get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + oldvalue = apic_read(APIC_ESR); + + /* enables sending errors */ + value = ERROR_APIC_VECTOR; + apic_write(APIC_LVTERR, value); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + if (value != oldvalue) + apic_printk(APIC_VERBOSE, "ESR value before enabling " + "vector: 0x%08lx after: 0x%08lx\n", + oldvalue, value); + } else { + printk(KERN_INFO "No ESR for 82489DX.\n"); + } +} + + /** * setup_local_APIC - setup the local APIC */ @@ -968,18 +1007,6 @@ void __cpuinit setup_local_APIC(void) preempt_enable(); } -static void __cpuinit lapic_setup_esr(void) -{ - unsigned maxlvt = lapic_get_maxlvt(); - - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); -} - void __cpuinit end_local_APIC_setup(void) { lapic_setup_esr(); -- cgit v1.2.3 From c40aaec6868401671a0ca14ed77e9b2da2d1f223 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:55 +0400 Subject: x86: apic - unify __setup_APIC_LVTT Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 10 +++++++--- arch/x86/kernel/apic_64.c | 12 ++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3c1562afa27..65419c7d437 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -248,8 +248,12 @@ int lapic_get_maxlvt(void) * Local APIC timer */ -/* Clock divisor is set to 16 */ +/* Clock divisor */ +#ifdef CONFG_X86_64 +#define APIC_DIVISOR 1 +#else #define APIC_DIVISOR 16 +#endif /* * This function sets up the local APIC timer, with a timeout of @@ -281,8 +285,8 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) */ tmp_value = apic_read(APIC_TDCR); apic_write(APIC_TDCR, - (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | - APIC_TDR_DIV_16); + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); if (!oneshot) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d74abf7e92f..fe57db9f3fb 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -259,8 +259,12 @@ int lapic_get_maxlvt(void) * Local APIC timer */ -/* Clock divisor is set to 1 */ +/* Clock divisor */ +#ifdef CONFG_X86_64 #define APIC_DIVISOR 1 +#else +#define APIC_DIVISOR 16 +#endif /* * This function sets up the local APIC timer, with a timeout of @@ -291,9 +295,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) * Divide PICLK by 16 */ tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, (tmp_value - & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) - | APIC_TDR_DIV_16); + apic_write(APIC_TDCR, + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); if (!oneshot) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); -- cgit v1.2.3 From c177b0bc03e0e11623e2099db42903fb0caf0fd3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:56 +0400 Subject: x86: apic - unify disconnect_bsp_APIC - just #ifdef added Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 66 ++++++++++++++++++++++++----------------------- arch/x86/kernel/apic_64.c | 23 +++++++++++++++-- 2 files changed, 55 insertions(+), 34 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 65419c7d437..3095bb71eae 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1420,6 +1420,7 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { +#ifdef CONFIG_X86_32 if (pic_mode) { /* * Put the board back into PIC mode (has an effect only on @@ -1431,47 +1432,48 @@ void disconnect_bsp_APIC(int virt_wire_setup) "entering PIC mode.\n"); outb(0x70, 0x22); outb(0x00, 0x23); - } else { - /* Go back to Virtual Wire compatibility mode */ - unsigned long value; + return; + } +#endif - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); + /* Go back to Virtual Wire compatibility mode */ + unsigned int value; - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); - } + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write(APIC_SPIV, value); + if (!virt_wire_setup) { /* - * For LVT1 make it edge triggered, active high, nmi and - * enabled + * For LVT0 make it edge triggered, active high, + * external and enabled */ - value = apic_read(APIC_LVT1); - value &= ~( - APIC_MODE_MASK | APIC_SEND_PENDING | + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write(APIC_LVT0, value); + } else { + /* Disable LVT0 */ + apic_write(APIC_LVT0, APIC_LVT_MASKED); } + + /* + * For LVT1 make it edge triggered, active high, + * nmi and enabled + */ + value = apic_read(APIC_LVT1); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write(APIC_LVT1, value); } void __cpuinit generic_processor_info(int apicid, int version) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index fe57db9f3fb..0d969103fce 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1348,8 +1348,24 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Put the board back into PIC mode (has an effect only on + * certain older boards). Note that APIC interrupts, including + * IPIs, won't work beyond this point! The only exception are + * INIT IPIs. + */ + apic_printk(APIC_VERBOSE, "disabling APIC mode, " + "entering PIC mode.\n"); + outb(0x70, 0x22); + outb(0x00, 0x23); + return; + } +#endif + /* Go back to Virtual Wire compatibility mode */ - unsigned long value; + unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); @@ -1375,7 +1391,10 @@ void disconnect_bsp_APIC(int virt_wire_setup) apic_write(APIC_LVT0, APIC_LVT_MASKED); } - /* For LVT1 make it edge triggered, active high, nmi and enabled */ + /* + * For LVT1 make it edge triggered, active high, + * nmi and enabled + */ value = apic_read(APIC_LVT1); value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | -- cgit v1.2.3 From 1b313f4a6d7bee7b2c034b3f1e203bc360a71cca Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:57 +0400 Subject: x86: apic - generic_processor_info - use physid_set instead of phys_cpu and physids_or - set phys_cpu_present_map bit AFTER check for allowed number of processors - add checking for APIC valid version in 64bit mode (mostly not needed but added for merging purpose) - add apic_version definition for 64bit mode which is used now Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 15 ++++++++------- arch/x86/kernel/apic_64.c | 41 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3095bb71eae..c3a252b1a8b 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1480,7 +1480,6 @@ void __cpuinit generic_processor_info(int apicid, int version) { int cpu; cpumask_t tmp_map; - physid_mask_t phys_cpu; /* * Validate version @@ -1493,9 +1492,6 @@ void __cpuinit generic_processor_info(int apicid, int version) } apic_version[apicid] = version; - phys_cpu = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); - if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." " Processor ignored.\n", NR_CPUS); @@ -1512,17 +1508,19 @@ void __cpuinit generic_processor_info(int apicid, int version) cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); - if (apicid == boot_cpu_physical_apicid) + physid_set(apicid, phys_cpu_present_map); + if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed * in same order as logical cpu numbers. Hence the first * entry is BSP, and so on. */ cpu = 0; - + } if (apicid > max_physical_apicid) max_physical_apicid = apicid; +#ifdef CONFIG_X86_32 /* * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y * but we need to work other dependencies like SMP_SUSPEND etc @@ -1542,7 +1540,9 @@ void __cpuinit generic_processor_info(int apicid, int version) def_to_bigsmp = 1; } } -#ifdef CONFIG_SMP +#endif + +#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) /* are we being called early in kernel startup? */ if (early_per_cpu_ptr(x86_cpu_to_apicid)) { u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); @@ -1555,6 +1555,7 @@ void __cpuinit generic_processor_info(int apicid, int version) per_cpu(x86_bios_cpu_apicid, cpu) = apicid; } #endif + cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 0d969103fce..76c20773bed 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1215,6 +1215,8 @@ void __init init_apic_mappings(void) * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ +int apic_version[MAX_APICS]; + int __init APIC_init_uniprocessor(void) { if (disable_apic) { @@ -1409,15 +1411,26 @@ void __cpuinit generic_processor_info(int apicid, int version) int cpu; cpumask_t tmp_map; + /* + * Validate version + */ + if (version == 0x0) { + printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + version); + version = 0x10; + } + apic_version[apicid] = version; + if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); + " Processor ignored.\n", NR_CPUS); return; } if (num_processors >= maxcpus) { printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); + " Processor ignored.\n", maxcpus); return; } @@ -1437,6 +1450,29 @@ void __cpuinit generic_processor_info(int apicid, int version) if (apicid > max_physical_apicid) max_physical_apicid = apicid; +#ifdef CONFIG_X86_32 + /* + * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y + * but we need to work other dependencies like SMP_SUSPEND etc + * before this can be done without some confusion. + * if (CPU_HOTPLUG_ENABLED || num_processors > 8) + * - Ashok Raj + */ + if (max_physical_apicid >= 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } +#endif + +#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) /* are we being called early in kernel startup? */ if (early_per_cpu_ptr(x86_cpu_to_apicid)) { u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); @@ -1448,6 +1484,7 @@ void __cpuinit generic_processor_info(int apicid, int version) per_cpu(x86_cpu_to_apicid, cpu) = apicid; per_cpu(x86_bios_cpu_apicid, cpu) = apicid; } +#endif cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); -- cgit v1.2.3 From fa6b95fc7c6f2f3eb1560e1f33cd13197546c5a0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:58 +0400 Subject: x86: apic - unify end_local_APIC_setup Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 ++++-- arch/x86/kernel/apic_64.c | 9 +++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index c3a252b1a8b..f1882329b9c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1155,13 +1155,15 @@ void __cpuinit setup_local_APIC(void) void __cpuinit end_local_APIC_setup(void) { - unsigned long value; - lapic_setup_esr(); + +#ifdef CONFIG_X86_32 + unsigned int value; /* Disable the local apic timer */ value = apic_read(APIC_LVTT); value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, value); +#endif setup_apic_nmi_watchdog(NULL); apic_pm_activate(); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 76c20773bed..eec10b34dc4 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1014,6 +1014,15 @@ void __cpuinit setup_local_APIC(void) void __cpuinit end_local_APIC_setup(void) { lapic_setup_esr(); + +#ifdef CONFIG_X86_32 + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); +#endif + setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } -- cgit v1.2.3 From 0b23e8cf553f5e706b0057363f1319867bcd1a7d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:59 +0400 Subject: x86: apic - unify local_apic_timer_interrupt Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++++ arch/x86/kernel/apic_64.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f1882329b9c..af227bce23b 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -685,7 +685,11 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ +#ifdef CONFIG_X86_64 + add_pda(apic_timer_irqs, 1); +#else per_cpu(irq_stat, cpu).apic_timer_irqs++; +#endif evt->event_handler(evt); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index eec10b34dc4..a9ad2cb4ff9 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -567,7 +567,11 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ +#ifdef CONFIG_X86_64 add_pda(apic_timer_irqs, 1); +#else + per_cpu(irq_stat, cpu).apic_timer_irqs++; +#endif evt->event_handler(evt); } -- cgit v1.2.3 From 79af9bec60e6e218a1e48e8830d603d64a7fc441 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:00 +0400 Subject: x86: apic - unify apic_set_verbosity Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 17 ++++++++++++++--- arch/x86/kernel/apic_64.c | 46 +++++++++++++++++++++++++--------------------- 2 files changed, 39 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index af227bce23b..acbc4dea2ee 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1768,13 +1768,24 @@ early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); static int __init apic_set_verbosity(char *arg) { - if (!arg) + if (!arg) { +#ifdef CONFIG_X86_64 + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; +#endif return -EINVAL; + } - if (strcmp(arg, "debug") == 0) + if (strcmp("debug", arg) == 0) apic_verbosity = APIC_DEBUG; - else if (strcmp(arg, "verbose") == 0) + else if (strcmp("verbose", arg) == 0) apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", arg); + return -EINVAL; + } return 0; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index a9ad2cb4ff9..999781810c0 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1759,27 +1759,6 @@ early_param("nox2apic", setup_nox2apic); /* * APIC command line parameters */ -static int __init apic_set_verbosity(char *str) -{ - if (str == NULL) { - skip_ioapic_setup = 0; - ioapic_force = 1; - return 0; - } - if (strcmp("debug", str) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) - apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", str); - return -EINVAL; - } - - return 0; -} -early_param("apic", apic_set_verbosity); - static __init int setup_disableapic(char *str) { disable_apic = 1; @@ -1824,6 +1803,31 @@ static __init int setup_apicpmtimer(char *s) } __setup("apicpmtimer", setup_apicpmtimer); +static int __init apic_set_verbosity(char *arg) +{ + if (!arg) { +#ifdef CONFIG_X86_64 + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; +#endif + return -EINVAL; + } + + if (strcmp("debug", arg) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", arg) == 0) + apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", arg); + return -EINVAL; + } + + return 0; +} +early_param("apic", apic_set_verbosity); + static int __init lapic_insert_resource(void) { if (!apic_phys) -- cgit v1.2.3 From 789fa735712d726b5cfa5c6be57171b5637a4872 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:01 +0400 Subject: x86: apic - unify disableapic and nolapic setup handlers Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 11 +++++++++-- arch/x86/kernel/apic_64.c | 6 +++--- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index acbc4dea2ee..5680bda62f7 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1737,13 +1737,20 @@ static int __init parse_lapic(char *arg) } early_param("lapic", parse_lapic); -static int __init parse_nolapic(char *arg) +static int __init setup_disableapic(char *arg) { disable_apic = 1; setup_clear_cpu_cap(X86_FEATURE_APIC); return 0; } -early_param("nolapic", parse_nolapic); +early_param("disableapic", setup_disableapic); + +/* same as disableapic, for compatibility */ +static int __init setup_nolapic(char *arg) +{ + return setup_disableapic(arg); +} +early_param("nolapic", setup_nolapic); static int __init parse_disable_apic_timer(char *arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 999781810c0..7a317180ba2 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1759,7 +1759,7 @@ early_param("nox2apic", setup_nox2apic); /* * APIC command line parameters */ -static __init int setup_disableapic(char *str) +static int __init setup_disableapic(char *arg) { disable_apic = 1; setup_clear_cpu_cap(X86_FEATURE_APIC); @@ -1768,9 +1768,9 @@ static __init int setup_disableapic(char *str) early_param("disableapic", setup_disableapic); /* same as disableapic, for compatibility */ -static __init int setup_nolapic(char *str) +static int __init setup_nolapic(char *arg) { - return setup_disableapic(str); + return setup_disableapic(arg); } early_param("nolapic", setup_nolapic); -- cgit v1.2.3 From 3415610b8e38b26b52a430b8846665c2d6bb3558 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:02 +0400 Subject: x86: apic - rearrange parse_lapic_timer_c2_ok Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 5680bda62f7..885e306420a 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1752,6 +1752,13 @@ static int __init setup_nolapic(char *arg) } early_param("nolapic", setup_nolapic); +static int __init parse_lapic_timer_c2_ok(char *arg) +{ + local_apic_timer_c2_ok = 1; + return 0; +} +early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); + static int __init parse_disable_apic_timer(char *arg) { disable_apic_timer = 1; @@ -1766,13 +1773,6 @@ static int __init parse_nolapic_timer(char *arg) } early_param("nolapic_timer", parse_nolapic_timer); -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - static int __init apic_set_verbosity(char *arg) { if (!arg) { -- cgit v1.2.3 From e75bedf415f300a08e9bbcc755784e488574a73e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:03 +0400 Subject: x86: apic - lapic_resume 32bit - unification fix Just add parenthesis to be identical of current 64bit implementation (so diff will not complain). Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 885e306420a..e975562a76a 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1646,6 +1646,7 @@ static int lapic_resume(struct sys_device *dev) enable_x2apic(); else #endif + { /* * Make sure the APICBASE points to the right address * @@ -1656,6 +1657,7 @@ static int lapic_resume(struct sys_device *dev) l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); + } apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); -- cgit v1.2.3 From 1b4ee4e4096d430c4c12516c1d30a6b0b4f9e9e4 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 23:12:33 +0400 Subject: x86: apic - compilation warnings fix Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 15 +++++++++------ arch/x86/kernel/apic_64.c | 15 +++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index e975562a76a..b8d80c29165 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1162,11 +1162,13 @@ void __cpuinit end_local_APIC_setup(void) lapic_setup_esr(); #ifdef CONFIG_X86_32 - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); + { + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); + } #endif setup_apic_nmi_watchdog(NULL); @@ -1426,6 +1428,8 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { + unsigned int value; + #ifdef CONFIG_X86_32 if (pic_mode) { /* @@ -1443,7 +1447,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) #endif /* Go back to Virtual Wire compatibility mode */ - unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7a317180ba2..37e037606f3 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1020,11 +1020,13 @@ void __cpuinit end_local_APIC_setup(void) lapic_setup_esr(); #ifdef CONFIG_X86_32 - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); + { + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); + } #endif setup_apic_nmi_watchdog(NULL); @@ -1363,6 +1365,8 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { + unsigned int value; + #ifdef CONFIG_X86_32 if (pic_mode) { /* @@ -1380,7 +1384,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) #endif /* Go back to Virtual Wire compatibility mode */ - unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); -- cgit v1.2.3 From 467cd0529a480c9c25f06154b788d1d1ba77828a Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 18 Aug 2008 16:56:29 -0700 Subject: x86: early_printk.c trivial sparse fixes arch/x86/kernel/early_printk.c:404:13: warning: incorrect type in assignment (different base types) arch/x86/kernel/early_printk.c:404:13: expected restricted __le16 [assigned] [usertype] wValue arch/x86/kernel/early_printk.c:404:13: got int [signed] value arch/x86/kernel/early_printk.c:405:13: warning: incorrect type in assignment (different base types) arch/x86/kernel/early_printk.c:405:13: expected restricted __le16 [assigned] [usertype] wIndex arch/x86/kernel/early_printk.c:405:13: got int [signed] index arch/x86/kernel/early_printk.c:406:14: warning: incorrect type in assignment (different base types) arch/x86/kernel/early_printk.c:406:14: expected restricted __le16 [assigned] [usertype] wLength arch/x86/kernel/early_printk.c:406:14: got int [signed] size arch/x86/kernel/early_printk.c:845:16: warning: Using plain integer as NULL pointer arch/x86/kernel/early_printk.c:992:13: warning: symbol 'enable_debug_console' was not declared. Should it be static? Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 28155acf706..825e9136b02 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -401,9 +401,9 @@ static int dbgp_control_msg(unsigned devnum, int requesttype, int request, /* Compute the control message */ req.bRequestType = requesttype; req.bRequest = request; - req.wValue = value; - req.wIndex = index; - req.wLength = size; + req.wValue = cpu_to_le16(value); + req.wIndex = cpu_to_le16(index); + req.wLength = cpu_to_le16(size); pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP); addr = DBGP_EPADDR(devnum, 0); @@ -842,7 +842,7 @@ static int __init early_dbgp_init(char *s) ret = ehci_setup(); if (ret < 0) { dbgp_printk("ehci_setup failed\n"); - ehci_debug = 0; + ehci_debug = NULL; return -1; } @@ -989,7 +989,7 @@ static int __init setup_early_printk(char *buf) return 0; } -void __init enable_debug_console(char *buf) +static void __init enable_debug_console(char *buf) { #ifdef DBGP_DEBUG struct console *old_early_console = NULL; -- cgit v1.2.3 From 7e00df5818964298c9821365a6cb7a8304227c5c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Aug 2008 17:39:32 -0700 Subject: x86: add NOPL as a synthetic CPU feature bit The long noops ("NOPL") are supposed to be detected by family >= 6. Unfortunately, several non-Intel x86 implementations, both hardware and software, don't obey this dictum. Instead, probe for NOPL directly by executing a NOPL instruction and see if we get #UD. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 32 +++++++++++++++++++++++++++++++- arch/x86/kernel/cpu/common_64.c | 36 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/feature_names.c | 3 ++- 3 files changed, 69 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa3..0785b3c8d04 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include @@ -341,6 +342,35 @@ static void __init early_cpu_detect(void) early_get_cap(c); } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { u32 tfms, xlvl; @@ -395,8 +425,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) } init_scattered_cpuid_features(c); + detect_nopl(c); } - } static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index dd6e3f15017..c3afba5a81a 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -215,6 +216,39 @@ static void __init early_cpu_support_print(void) } } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); void __init early_cpu_init(void) @@ -313,6 +347,8 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_phys_bits = eax & 0xff; } + detect_nopl(c); + if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index e43ad4ad4cb..c9017799497 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -39,7 +39,8 @@ const char * const x86_cap_flags[NCAPINTS*32] = { NULL, NULL, NULL, NULL, "constant_tsc", "up", NULL, "arch_perfmon", "pebs", "bts", NULL, NULL, - "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "rep_good", NULL, NULL, NULL, + "nopl", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ -- cgit v1.2.3 From f1c5d30e1d79bbfb60eaf189db862d3cb2bcac92 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Aug 2008 17:50:33 -0700 Subject: x86: use X86_FEATURE_NOPL in alternatives Use X86_FEATURE_NOPL to determine if it is safe to use P6 NOPs in alternatives. Also, replace table and loop with simple if statement. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/alternative.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb37b55..65a0c1b4869 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { extern char __vsyscall_0; const unsigned char *const *find_nop_table(void) { - return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return k8_nops; } #else /* CONFIG_X86_64 */ -static const struct nop { - int cpuid; - const unsigned char *const *noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { X86_FEATURE_P4, p6_nops }, - { X86_FEATURE_P3, p6_nops }, - { -1, NULL } -}; - const unsigned char *const *find_nop_table(void) { - const unsigned char *const *noptable = intel_nops; - int i; - - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - return noptable; + if (boot_cpu_has(X86_FEATURE_K8)) + return k8_nops; + else if (boot_cpu_has(X86_FEATURE_K7)) + return k7_nops; + else if (boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return intel_nops; } #endif /* CONFIG_X86_64 */ -- cgit v1.2.3 From 8343ef2437c599d30568e6b5a257a40bf2f4902b Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Wed, 20 Aug 2008 00:16:13 +0200 Subject: x86-microcode: fix unbalanced use of get_cpu() Don't use get_cpu() at all. Resort to checking a boot-up CPU (#0) in microcode_{intel,amd}_module_init(). Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 10 ++++++---- arch/x86/kernel/microcode_intel.c | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 33b2a217a8c..a6e76ccf815 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -500,13 +500,15 @@ static struct microcode_ops microcode_amd_ops = { static int __init microcode_amd_module_init(void) { - struct cpuinfo_x86 *c = &cpu_data(get_cpu()); + struct cpuinfo_x86 *c = &cpu_data(0); equiv_cpu_table = NULL; - if (c->x86_vendor == X86_VENDOR_AMD) - return microcode_init(µcode_amd_ops, THIS_MODULE); - else + if (c->x86_vendor != X86_VENDOR_AMD) { + printk(KERN_ERR "microcode: CPU platform is not AMD-capable\n"); return -ENODEV; + } + + return microcode_init(µcode_amd_ops, THIS_MODULE); } static void __exit microcode_amd_module_exit(void) diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index d2d9d74f4cb..6dd8907ff22 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -531,12 +531,14 @@ static struct microcode_ops microcode_intel_ops = { static int __init microcode_intel_module_init(void) { - struct cpuinfo_x86 *c = &cpu_data(get_cpu()); + struct cpuinfo_x86 *c = &cpu_data(0); - if (c->x86_vendor == X86_VENDOR_INTEL) - return microcode_init(µcode_intel_ops, THIS_MODULE); - else + if (c->x86_vendor != X86_VENDOR_INTEL) { + printk(KERN_ERR "microcode: CPU platform is not Intel-capable\n"); return -ENODEV; + } + + return microcode_init(µcode_intel_ops, THIS_MODULE); } static void __exit microcode_intel_module_exit(void) -- cgit v1.2.3 From d45de40934897c6ee5b05141f7895bbb28512395 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Wed, 20 Aug 2008 00:22:26 +0200 Subject: x86-microcode: generic interface refactoring This is the 1st patch in the series. Here the aim was to avoid any significant changes, logically-wise. So it's mainly about generic interface refactoring: e.g. make microcode_{intel,amd}.c more about arch-specific details and less about policies like make-sure-we-run-on-a-target-cpu (no more set_cpus_allowed_ptr() here) and generic synchronization (no more microcode_mutex here). All in all, more line have been deleted than added. 4 files changed, 145 insertions(+), 198 deletions(-) Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 155 +++++++++++++++++++++++++------------- arch/x86/kernel/microcode_amd.c | 77 +++---------------- arch/x86/kernel/microcode_intel.c | 91 ++++------------------ 3 files changed, 129 insertions(+), 194 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index ad136ad99cb..b2f84ce5eed 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -71,7 +71,7 @@ * Thanks to Stuart Swales for pointing out this bug. */ -/*#define DEBUG pr_debug */ +/* #define DEBUG pr_debug */ #include #include #include @@ -104,8 +104,7 @@ MODULE_LICENSE("GPL"); struct microcode_ops *microcode_ops; /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -DEFINE_MUTEX(microcode_mutex); -EXPORT_SYMBOL_GPL(microcode_mutex); +static DEFINE_MUTEX(microcode_mutex); struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; EXPORT_SYMBOL_GPL(ucode_cpu_info); @@ -234,22 +233,6 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); struct platform_device *microcode_pdev; EXPORT_SYMBOL_GPL(microcode_pdev); -static void microcode_init_cpu(int cpu, int resume) -{ - cpumask_t old; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - old = current->cpus_allowed; - - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - mutex_lock(µcode_mutex); - microcode_ops->collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING && !resume) - microcode_ops->cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - set_cpus_allowed_ptr(current, &old); -} - static ssize_t reload_store(struct sys_device *dev, struct sysdev_attribute *attr, const char *buf, size_t sz) @@ -266,14 +249,15 @@ static ssize_t reload_store(struct sys_device *dev, cpumask_t old = current->cpus_allowed; get_online_cpus(); - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - - mutex_lock(µcode_mutex); - if (uci->valid) - err = microcode_ops->cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); + if (cpu_online(cpu)) { + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + mutex_lock(µcode_mutex); + if (uci->valid) + err = microcode_ops->cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + set_cpus_allowed_ptr(current, &old); + } put_online_cpus(); - set_cpus_allowed_ptr(current, &old); } if (err) return err; @@ -285,7 +269,7 @@ static ssize_t version_show(struct sys_device *dev, { struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - return sprintf(buf, "0x%x\n", uci->rev); + return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); } static ssize_t pf_show(struct sys_device *dev, @@ -293,7 +277,7 @@ static ssize_t pf_show(struct sys_device *dev, { struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - return sprintf(buf, "0x%x\n", uci->pf); + return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); } static SYSDEV_ATTR(reload, 0200, NULL, reload_store); @@ -312,7 +296,85 @@ static struct attribute_group mc_attr_group = { .name = "microcode", }; -static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) +static void microcode_fini_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + mutex_lock(µcode_mutex); + microcode_ops->microcode_fini_cpu(cpu); + uci->valid = 0; + mutex_unlock(µcode_mutex); +} + +static void collect_cpu_info(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + memset(uci, 0, sizeof(*uci)); + if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig)) + uci->valid = 1; +} + +static void microcode_resume_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct cpu_signature nsig; + + pr_debug("microcode: CPU%d resumed\n", cpu); + + if (!uci->mc.valid_mc) + return; + + /* + * Let's verify that the 'cached' ucode does belong + * to this cpu (a bit of paranoia): + */ + if (microcode_ops->collect_cpu_info(cpu, &nsig)) { + microcode_fini_cpu(cpu); + return; + } + + if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { + microcode_fini_cpu(cpu); + /* Should we look for a new ucode here? */ + return; + } + + microcode_ops->apply_microcode(cpu); +} + +void microcode_update_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + /* We should bind the task to the CPU */ + BUG_ON(raw_smp_processor_id() != cpu); + + mutex_lock(µcode_mutex); + /* + * Check if the system resume is in progress (uci->valid != NULL), + * otherwise just request a firmware: + */ + if (uci->valid) { + microcode_resume_cpu(cpu); + } else { + collect_cpu_info(cpu); + if (uci->valid && system_state == SYSTEM_RUNNING) + microcode_ops->cpu_request_microcode(cpu); + } + mutex_unlock(µcode_mutex); +} + +static void microcode_init_cpu(int cpu) +{ + cpumask_t old = current->cpus_allowed; + + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + microcode_update_cpu(cpu); + set_cpus_allowed_ptr(current, &old); +} + +static int mc_sysdev_add(struct sys_device *sys_dev) { int err, cpu = sys_dev->id; struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -327,16 +389,10 @@ static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) if (err) return err; - microcode_init_cpu(cpu, resume); - + microcode_init_cpu(cpu); return 0; } -static int mc_sysdev_add(struct sys_device *sys_dev) -{ - return __mc_sysdev_add(sys_dev, 0); -} - static int mc_sysdev_remove(struct sys_device *sys_dev) { int cpu = sys_dev->id; @@ -345,7 +401,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) return 0; pr_debug("microcode: CPU%d removed\n", cpu); - microcode_ops->microcode_fini_cpu(cpu); + microcode_fini_cpu(cpu); sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); return 0; } @@ -376,33 +432,26 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) sys_dev = get_cpu_sysdev(cpu); switch (action) { - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_ops->microcode_fini_cpu(cpu); - break; case CPU_ONLINE: - case CPU_DOWN_FAILED: - mc_sysdev_add(sys_dev); - break; case CPU_ONLINE_FROZEN: - /* System-wide resume is in progress, try to apply microcode */ - if (microcode_ops->apply_microcode_check_cpu(cpu)) { - /* The application of microcode failed */ - microcode_ops->microcode_fini_cpu(cpu); - __mc_sysdev_add(sys_dev, 1); - break; - } + microcode_init_cpu(cpu); + case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: + pr_debug("microcode: CPU%d added\n", cpu); if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) printk(KERN_ERR "microcode: Failed to create the sysfs " "group for CPU%d\n", cpu); break; case CPU_DOWN_PREPARE: - mc_sysdev_remove(sys_dev); - break; case CPU_DOWN_PREPARE_FROZEN: /* Suspend is in progress, only remove the interface */ sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + pr_debug("microcode: CPU%d removed\n", cpu); + break; + case CPU_DEAD: + case CPU_UP_CANCELED_FROZEN: + /* The CPU refused to come up during a system resume */ + microcode_fini_cpu(cpu); break; } return NOTIFY_OK; diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index a6e76ccf815..4006e5e3adf 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -59,38 +59,28 @@ MODULE_LICENSE("GPL v2"); /* serialize access to the physical write */ static DEFINE_SPINLOCK(microcode_update_lock); -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -extern struct mutex (microcode_mutex); - struct equiv_cpu_entry *equiv_cpu_table; -extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; - -static void collect_cpu_info_amd(int cpu) +static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu); - uci->rev = 0; - uci->pf = 0; - uci->mc.mc_amd = NULL; - uci->valid = 1; + memset(csig, 0, sizeof(*csig)); if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n", cpu); - uci->valid = 0; - return; + return -1; } asm volatile("movl %1, %%ecx; rdmsr" - : "=a" (uci->rev) + : "=a" (csig->rev) : "i" (0x0000008B) : "ecx"); printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n", - uci->rev); + csig->rev); + + return 0; } static int get_matching_microcode_amd(void *mc, int cpu) @@ -119,7 +109,7 @@ static int get_matching_microcode_amd(void *mc, int cpu) if (equiv_cpu_table == NULL) { printk(KERN_INFO "microcode: CPU%d microcode update with " "version 0x%x (current=0x%x)\n", - cpu, mc_header->patch_id, uci->rev); + cpu, mc_header->patch_id, uci->cpu_sig.rev); goto out; } @@ -185,12 +175,12 @@ static int get_matching_microcode_amd(void *mc, int cpu) pci_dev_put(sb_pci_dev); } - if (mc_header->patch_id <= uci->rev) + if (mc_header->patch_id <= uci->cpu_sig.rev) return 0; printk(KERN_INFO "microcode: CPU%d found a matching microcode " "update with version 0x%x (current=0x%x)\n", - cpu, mc_header->patch_id, uci->rev); + cpu, mc_header->patch_id, uci->cpu_sig.rev); out: new_mc = vmalloc(UCODE_MAX_SIZE); @@ -250,9 +240,9 @@ static void apply_microcode_amd(int cpu) printk(KERN_INFO "microcode: CPU%d updated from revision " "0x%x to 0x%x \n", - cpu_num, uci->rev, uci->mc.mc_amd->hdr.patch_id); + cpu_num, uci->cpu_sig.rev, uci->mc.mc_amd->hdr.patch_id); - uci->rev = rev; + uci->cpu_sig.rev = rev; } #ifdef CONFIG_MICROCODE_OLD_INTERFACE @@ -437,61 +427,18 @@ static int cpu_request_microcode_amd(int cpu) return error; } -static int apply_microcode_check_cpu_amd(int cpu) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - unsigned int rev; - cpumask_t old; - int err = 0; - - /* Check if the microcode is available */ - if (!uci->mc.mc_amd) - return 0; - - old = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - - /* Check if the microcode we have in memory matches the CPU */ - if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 16) - err = -EINVAL; - - if (!err) { - asm volatile("movl %1, %%ecx; rdmsr" - : "=a" (rev) - : "i" (0x0000008B) : "ecx"); - - if (uci->rev != rev) - err = -EINVAL; - } - - if (!err) - apply_microcode_amd(cpu); - else - printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" - " rev=0x%x\n", - cpu, uci->rev); - - set_cpus_allowed(current, old); - return err; -} - static void microcode_fini_cpu_amd(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - mutex_lock(µcode_mutex); - uci->valid = 0; vfree(uci->mc.mc_amd); uci->mc.mc_amd = NULL; - mutex_unlock(µcode_mutex); } static struct microcode_ops microcode_amd_ops = { .get_next_ucode = get_next_ucode_amd, .get_matching_microcode = get_matching_microcode_amd, .microcode_sanity_check = NULL, - .apply_microcode_check_cpu = apply_microcode_check_cpu_amd, .cpu_request_microcode = cpu_request_microcode_amd, .collect_cpu_info = collect_cpu_info_amd, .apply_microcode = apply_microcode_amd, diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 6dd8907ff22..c9b53202ba3 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -122,46 +122,37 @@ MODULE_LICENSE("GPL"); /* serialize access to the physical write to MSR 0x79 */ static DEFINE_SPINLOCK(microcode_update_lock); -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -extern struct mutex microcode_mutex; - -extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; - -static void collect_cpu_info(int cpu_num) +static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu_num); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; unsigned int val[2]; - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu_num); - uci->pf = uci->rev = 0; - uci->mc.mc_intel = NULL; - uci->valid = 1; + memset(csig, 0, sizeof(*csig)); if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || cpu_has(c, X86_FEATURE_IA64)) { printk(KERN_ERR "microcode: CPU%d not a capable Intel " "processor\n", cpu_num); - uci->valid = 0; - return; + return -1; } - uci->sig = cpuid_eax(0x00000001); + csig->sig = cpuid_eax(0x00000001); if ((c->x86_model >= 5) || (c->x86 > 6)) { /* get processor flags from MSR 0x17 */ rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - uci->pf = 1 << ((val[1] >> 18) & 7); + csig->pf = 1 << ((val[1] >> 18) & 7); } wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ sync_core(); /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); + rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", - uci->sig, uci->pf, uci->rev); + csig->sig, csig->pf, csig->rev); + + return 0; } static inline int microcode_update_match(int cpu_num, @@ -169,8 +160,8 @@ static inline int microcode_update_match(int cpu_num, { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - if (!sigmatch(sig, uci->sig, pf, uci->pf) - || mc_header->rev <= uci->rev) + if (!sigmatch(sig, uci->cpu_sig.sig, pf, uci->cpu_sig.pf) + || mc_header->rev <= uci->cpu_sig.rev) return 0; return 1; } @@ -289,7 +280,7 @@ static int get_matching_microcode(void *mc, int cpu) find: pr_debug("microcode: CPU%d found a matching microcode update with" " version 0x%x (current=0x%x)\n", - cpu, mc_header->rev, uci->rev); + cpu, mc_header->rev, uci->cpu_sig.rev); new_mc = vmalloc(total_size); if (!new_mc) { printk(KERN_ERR "microcode: error! Can not allocate memory\n"); @@ -335,16 +326,16 @@ static void apply_microcode(int cpu) spin_unlock_irqrestore(µcode_update_lock, flags); if (val[1] != uci->mc.mc_intel->hdr.rev) { printk(KERN_ERR "microcode: CPU%d update from revision " - "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); + "0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]); return; } printk(KERN_INFO "microcode: CPU%d updated from revision " "0x%x to 0x%x, date = %04x-%02x-%02x \n", - cpu_num, uci->rev, val[1], + cpu_num, uci->cpu_sig.rev, val[1], uci->mc.mc_intel->hdr.date & 0xffff, uci->mc.mc_intel->hdr.date >> 24, (uci->mc.mc_intel->hdr.date >> 16) & 0xff); - uci->rev = val[1]; + uci->cpu_sig.rev = val[1]; } #ifdef CONFIG_MICROCODE_OLD_INTERFACE @@ -459,70 +450,18 @@ static int cpu_request_microcode(int cpu) return error; } -static int apply_microcode_check_cpu(int cpu) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - cpumask_t old; - unsigned int val[2]; - int err = 0; - - /* Check if the microcode is available */ - if (!uci->mc.mc_intel) - return 0; - - old = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - - /* Check if the microcode we have in memory matches the CPU */ - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) - err = -EINVAL; - - if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - if (uci->pf != (1 << ((val[1] >> 18) & 7))) - err = -EINVAL; - } - - if (!err) { - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (uci->rev != val[1]) - err = -EINVAL; - } - - if (!err) - apply_microcode(cpu); - else - printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" - " sig=0x%x, pf=0x%x, rev=0x%x\n", - cpu, uci->sig, uci->pf, uci->rev); - - set_cpus_allowed_ptr(current, &old); - return err; -} - static void microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - mutex_lock(µcode_mutex); - uci->valid = 0; vfree(uci->mc.mc_intel); uci->mc.mc_intel = NULL; - mutex_unlock(µcode_mutex); } static struct microcode_ops microcode_intel_ops = { .get_next_ucode = get_next_ucode, .get_matching_microcode = get_matching_microcode, .microcode_sanity_check = microcode_sanity_check, - .apply_microcode_check_cpu = apply_microcode_check_cpu, .cpu_request_microcode = cpu_request_microcode, .collect_cpu_info = collect_cpu_info, .apply_microcode = apply_microcode, -- cgit v1.2.3 From 63d3a75d6f1fcf2f33e6abbe84e1f428c3586152 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 19 Aug 2008 13:19:36 -0700 Subject: x86/paravirt: add spin_lock_flags lock op It is useful for a pv_lock_ops backend to know whether interrupts are enabled or not in the context a spin_lock is being called. This allows it to enable interrupts while spinning, which could be particularly helpful when spinning becomes blocking. The default implementation just calls the normal spin_lock op, ignoring the flags. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt-spinlocks.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 38d7f7f1dbc..206359a8e43 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -7,12 +7,18 @@ #include +static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) +{ + __raw_spin_lock(lock); +} + struct pv_lock_ops pv_lock_ops = { #ifdef CONFIG_SMP .spin_is_locked = __ticket_spin_is_locked, .spin_is_contended = __ticket_spin_is_contended, .spin_lock = __ticket_spin_lock, + .spin_lock_flags = default_spin_lock_flags, .spin_trylock = __ticket_spin_trylock, .spin_unlock = __ticket_spin_unlock, #endif -- cgit v1.2.3 From 11494547b1754c4f3bd7f707ab869e2adf54d52f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 21 Aug 2008 01:01:19 -0700 Subject: x86: fix apic version warning after following patch, commit 1b313f4a6d7bee7b2c034b3f1e203bc360a71cca Author: Cyrill Gorcunov Date: Mon Aug 18 20:45:57 2008 +0400 x86: apic - generic_processor_info - use physid_set instead of phys_cpu and physids_or - set phys_cpu_present_map bit AFTER check for allowed number of processors - add checking for APIC valid version in 64bit mode (mostly not needed but added for merging purpose) - add apic_version definition for 64bit mode which is used now we are getting warning for acpi path on 64 bit system. make the 64-bit side fill in apic_version[] as well. [ mingo@elte.hu: build fix ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 12e260e8fb2..d9770a56511 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -239,10 +239,8 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) return; } -#ifdef CONFIG_X86_32 if (boot_cpu_physical_apicid != -1U) ver = apic_version[boot_cpu_physical_apicid]; -#endif generic_processor_info(id, ver); } @@ -762,10 +760,8 @@ static void __init acpi_register_lapic_address(unsigned long address) set_fixmap_nocache(FIX_APIC_BASE, address); if (boot_cpu_physical_apicid == -1U) { boot_cpu_physical_apicid = read_apic_id(); -#ifdef CONFIG_X86_32 apic_version[boot_cpu_physical_apicid] = GET_APIC_VERSION(apic_read(APIC_LVR)); -#endif } } -- cgit v1.2.3 From 4e1d112cac08049e764fe3c4f6d3ec92529f9f68 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 20 Aug 2008 16:43:07 -0700 Subject: arch/x86/kernel/apm_32.c: remove duplicated #include Removed duplicated include file in arch/x86/kernel/apm_32.c. Signed-off-by: Huang Weiyi Acked-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton --- arch/x86/kernel/apm_32.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 9ee24e6bc4b..b93d069aea7 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -228,7 +228,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 671eef85a3e885dff4ce210d8774ad50a91d5967 Mon Sep 17 00:00:00 2001 From: "Cihula, Joseph" Date: Wed, 20 Aug 2008 16:43:07 -0700 Subject: x86, e820: add support for AddressRangeUnusuable ACPI memory type Add support for the E820_UNUSABLE memory type, which is defined in Revision 3.0b (Oct. 10, 2006) of the ACPI Specification on p. 394 Table 14-1: AddressRangeUnusuable This range of address contains memory in which errors have been detected. This range must not be used by the OSPM. Signed-off-by: Joseph Cihula Signed-off-by: Shane Wang Signed-off-by: Gang Wei Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9af89078f7b..291e6cd9f9c 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -148,6 +148,9 @@ void __init e820_print_map(char *who) case E820_NVS: printk(KERN_CONT "(ACPI NVS)\n"); break; + case E820_UNUSABLE: + printk("(unusable)\n"); + break; default: printk(KERN_CONT "type %u\n", e820.map[i].type); break; @@ -1260,6 +1263,7 @@ static inline const char *e820_type_to_string(int e820_type) case E820_RAM: return "System RAM"; case E820_ACPI: return "ACPI Tables"; case E820_NVS: return "ACPI Non-volatile Storage"; + case E820_UNUSABLE: return "Unusable memory"; default: return "reserved"; } } -- cgit v1.2.3 From f86399396ce7a4f4069828b7dceac5aa5113dfb5 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 30 Jul 2008 18:32:27 -0300 Subject: x86, paravirt_ops: use unsigned long instead of u32 for alloc_p*() pfn args This patch changes the pfn args from 'u32' to 'unsigned long' on alloc_p*() functions on paravirt_ops, and the corresponding implementations for Xen and VMI. The prototypes for CONFIG_PARAVIRT=n are already using unsigned long, so paravirt.h now matches the prototypes on asm-x86/pgalloc.h. It shouldn't result in any changes on generated code on 32-bit, with or without CONFIG_PARAVIRT. On both cases, 'codiff -f' didn't show any change after applying this patch. On 64-bit, there are (expected) binary changes only when CONFIG_PARAVIRT is enabled, as the patch is really supposed to change the size of the pfn args. [ v2: KVM_GUEST: use the right parameter type on kvm_release_pt() ] Signed-off-by: Eduardo Habkost Acked-by: Jeremy Fitzhardinge Acked-by: Zachary Amsden Signed-off-by: Ingo Molnar --- arch/x86/kernel/kvm.c | 2 +- arch/x86/kernel/vmi_32.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8b7a3cf37d2..478bca986ec 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -178,7 +178,7 @@ static void kvm_flush_tlb(void) kvm_deferred_mmu_op(&ftlb, sizeof ftlb); } -static void kvm_release_pt(u32 pfn) +static void kvm_release_pt(unsigned long pfn) { struct kvm_mmu_op_release_pt rpt = { .header.op = KVM_MMU_OP_RELEASE_PT, diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 0a1b1a9d922..340b03643b9 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) } #endif -static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn) +static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) { vmi_set_page_type(pfn, VMI_PAGE_L1); vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); } -static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) +static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) { /* * This call comes in very early, before mem_map is setup. @@ -409,20 +409,20 @@ static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); } -static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) +static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) { vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); vmi_check_page_type(clonepfn, VMI_PAGE_L2); vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); } -static void vmi_release_pte(u32 pfn) +static void vmi_release_pte(unsigned long pfn) { vmi_ops.release_page(pfn, VMI_PAGE_L1); vmi_set_page_type(pfn, VMI_PAGE_NORMAL); } -static void vmi_release_pmd(u32 pfn) +static void vmi_release_pmd(unsigned long pfn) { vmi_ops.release_page(pfn, VMI_PAGE_L2); vmi_set_page_type(pfn, VMI_PAGE_NORMAL); -- cgit v1.2.3 From 5b792d320f28ff83dd4c13f984807e26235f7703 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 21 Aug 2008 13:43:51 -0700 Subject: x86, microcode_amd: fix shift warning microcode_amd.c uses ">> 32" on a 32-bit value, so gcc warns about that. The code could use something like this *untested* patch. linux-next-20080821/arch/x86/kernel/microcode_amd.c:229: warning: right shift count >= width of type Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 4006e5e3adf..d606a05545c 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -206,6 +206,7 @@ static void apply_microcode_amd(int cpu) unsigned int rev; int cpu_num = raw_smp_processor_id(); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + unsigned long addr; /* We should bind the task to the CPU */ BUG_ON(cpu_num != cpu); @@ -215,10 +216,9 @@ static void apply_microcode_amd(int cpu) spin_lock_irqsave(µcode_update_lock, flags); - edx = (unsigned int)(((unsigned long) - &(uci->mc.mc_amd->hdr.data_code)) >> 32); - eax = (unsigned int)(((unsigned long) - &(uci->mc.mc_amd->hdr.data_code)) & 0xffffffffL); + addr = (unsigned long)&uci->mc.mc_amd->hdr.data_code; + edx = (unsigned int)(((unsigned long)upper_32_bits(addr))); + eax = (unsigned int)(((unsigned long)lower_32_bits(addr))); asm volatile("movl %0, %%ecx; wrmsr" : : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx"); -- cgit v1.2.3 From 94581094e774402a11887719bac10505236c2d51 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:39 +0200 Subject: x86: add alloc_coherent dma_ops callback to GART driver [ v2 - x86: make gart_alloc_coherent return zeroed memory FUJITA Tomonori pointed it out that the dma_alloc_coherent function should return memory set to zero. This patch adds this to the GART implementation too. ] Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 49285f8fd4d..076e64b2d4f 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -499,6 +499,26 @@ error: return 0; } +/* allocate and map a coherent mapping */ +static void * +gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, + gfp_t flag) +{ + void *vaddr; + + vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); + if (!vaddr) + return NULL; + + *dma_addr = gart_map_single(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL); + if (*dma_addr != bad_dma_address) + return vaddr; + + free_pages((unsigned long)vaddr, get_order(size)); + + return NULL; +} + static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -701,6 +721,7 @@ static struct dma_mapping_ops gart_dma_ops = { .sync_sg_for_device = NULL, .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, + .alloc_coherent = gart_alloc_coherent, }; void gart_iommu_shutdown(void) -- cgit v1.2.3 From 43a5a5a09b8cfd56047706cf904718d073ccfd33 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:40 +0200 Subject: x86: add free_coherent dma_ops callback to GART driver Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 076e64b2d4f..ef753e23358 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -519,6 +519,15 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, return NULL; } +/* free a coherent mapping */ +static void +gart_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr) +{ + gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL); + free_pages((unsigned long)vaddr, get_order(size)); +} + static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -722,6 +731,7 @@ static struct dma_mapping_ops gart_dma_ops = { .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, .alloc_coherent = gart_alloc_coherent, + .free_coherent = gart_free_coherent, }; void gart_iommu_shutdown(void) -- cgit v1.2.3 From e4ad68b651f22fa71820c0b30bb2807999b2b49f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:41 +0200 Subject: x86: add free_coherent dma_ops callback to Calgary IOMMU driver Signed-off-by: Joerg Roedel Acked-by: Muli Ben-Yehuda Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 218d783ed7a..afb020fdb19 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -511,8 +511,22 @@ error: return ret; } +static void calgary_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + unsigned int npages; + struct iommu_table *tbl = find_iommu_table(dev); + + size = PAGE_ALIGN(size); + npages = size >> PAGE_SHIFT; + + iommu_free(tbl, dma_handle, npages); + free_pages((unsigned long)vaddr, get_order(size)); +} + static struct dma_mapping_ops calgary_dma_ops = { .alloc_coherent = calgary_alloc_coherent, + .free_coherent = calgary_free_coherent, .map_single = calgary_map_single, .unmap_single = calgary_unmap_single, .map_sg = calgary_map_sg, -- cgit v1.2.3 From c5e835f9641e9fcb95d1afb24906821e98b2c6a8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:42 +0200 Subject: x86: add alloc_coherent dma_ops callback to NOMMU driver Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 55 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 3f91f71cdc3..b8ce83c9821 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -72,7 +72,62 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } +static void * +nommu_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_addr, gfp_t gfp) +{ + unsigned long dma_mask; + int node; + struct page *page; + + if (hwdev->dma_mask == NULL) + return NULL; + + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + gfp |= __GFP_ZERO; + + dma_mask = hwdev->coherent_dma_mask; + if (!dma_mask) + dma_mask = *(hwdev->dma_mask); + + if (dma_mask < DMA_24BIT_MASK) + return NULL; + + node = dev_to_node(hwdev); + +#ifdef CONFIG_X86_64 + if (dma_mask <= DMA_32BIT_MASK) + gfp |= GFP_DMA32; +#endif + + /* No alloc-free penalty for ISA devices */ + if (dma_mask == DMA_24BIT_MASK) + gfp |= GFP_DMA; + +again: + page = alloc_pages_node(node, gfp, get_order(size)); + if (!page) + return NULL; + + if ((page_to_phys(page) + size > dma_mask) && !(gfp & GFP_DMA)) { + free_pages((unsigned long)page_address(page), get_order(size)); + gfp |= GFP_DMA; + goto again; + } + + *dma_addr = page_to_phys(page); + if (check_addr("alloc_coherent", hwdev, *dma_addr, size)) { + flush_write_buffers(); + return page_address(page); + } + + free_pages((unsigned long)page_address(page), get_order(size)); + + return NULL; +} + struct dma_mapping_ops nommu_dma_ops = { + .alloc_coherent = nommu_alloc_coherent, .map_single = nommu_map_single, .map_sg = nommu_map_sg, .is_phys = 1, -- cgit v1.2.3 From a3a76532e0caa093c279806d8fe8608232538af0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:43 +0200 Subject: x86: add free_coherent dma_ops callback to NOMMU driver Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index b8ce83c9821..73853d3fdca 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -126,8 +126,15 @@ again: return NULL; } +static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr) +{ + free_pages((unsigned long)vaddr, get_order(size)); +} + struct dma_mapping_ops nommu_dma_ops = { .alloc_coherent = nommu_alloc_coherent, + .free_coherent = nommu_free_coherent, .map_single = nommu_map_single, .map_sg = nommu_map_sg, .is_phys = 1, -- cgit v1.2.3 From c647c3bb2d16246a87f49035985ddb7c1eb030df Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:44 +0200 Subject: x86: cleanup dma_*_coherent functions All dma_ops implementations support the alloc_coherent and free_coherent callbacks now. This allows a big simplification of the dma_alloc_coherent function which is done with this patch. The dma_free_coherent functions is also cleaned up and calls now the free_coherent callback of the dma_ops implementation. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 121 +++++----------------------------------------- 1 file changed, 12 insertions(+), 109 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 87d4d6964ec..613332b26e3 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -241,33 +241,15 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -/* Allocate DMA memory on node near device */ -static noinline struct page * -dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) -{ - int node; - - node = dev_to_node(dev); - - return alloc_pages_node(node, gfp, order); -} - /* * Allocate memory for a coherent mapping. */ -void * + void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { struct dma_mapping_ops *ops = get_dma_ops(dev); - void *memory = NULL; - struct page *page; - unsigned long dma_mask = 0; - dma_addr_t bus; - int noretry = 0; - - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + void *memory; if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) return memory; @@ -276,89 +258,10 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, dev = &fallback_dev; gfp |= GFP_DMA; } - dma_mask = dev->coherent_dma_mask; - if (dma_mask == 0) - dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; - - /* Device not DMA able */ - if (dev->dma_mask == NULL) - return NULL; - - /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ - if (gfp & __GFP_DMA) - noretry = 1; - -#ifdef CONFIG_X86_64 - /* Why <=? Even when the mask is smaller than 4GB it is often - larger than 16MB and in this case we have a chance of - finding fitting memory in the next higher zone first. If - not retry with true GFP_DMA. -AK */ - if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp |= GFP_DMA32; - if (dma_mask < DMA_32BIT_MASK) - noretry = 1; - } -#endif - again: - page = dma_alloc_pages(dev, - noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); - if (page == NULL) - return NULL; - - { - int high, mmu; - bus = page_to_phys(page); - memory = page_address(page); - high = (bus + size) >= dma_mask; - mmu = high; - if (force_iommu && !(gfp & GFP_DMA)) - mmu = 1; - else if (high) { - free_pages((unsigned long)memory, - get_order(size)); - - /* Don't use the 16MB ZONE_DMA unless absolutely - needed. It's better to use remapping first. */ - if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - /* Let low level make its own zone decisions */ - gfp &= ~(GFP_DMA32|GFP_DMA); - - if (ops->alloc_coherent) - return ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; - } - - memset(memory, 0, size); - if (!mmu) { - *dma_handle = bus; - return memory; - } - } - - if (ops->alloc_coherent) { - free_pages((unsigned long)memory, get_order(size)); - gfp &= ~(GFP_DMA|GFP_DMA32); - return ops->alloc_coherent(dev, size, dma_handle, gfp); - } - - if (ops->map_simple) { - *dma_handle = ops->map_simple(dev, virt_to_phys(memory), - size, - PCI_DMA_BIDIRECTIONAL); - if (*dma_handle != bad_dma_address) - return memory; - } - - if (panic_on_overflow) - panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", - (unsigned long)size); - free_pages((unsigned long)memory, get_order(size)); + if (ops->alloc_coherent) + return ops->alloc_coherent(dev, size, + dma_handle, gfp); return NULL; } EXPORT_SYMBOL(dma_alloc_coherent); @@ -368,17 +271,17 @@ EXPORT_SYMBOL(dma_alloc_coherent); * The caller must ensure that the device has finished accessing the mapping. */ void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) + void *vaddr, dma_addr_t bus) { struct dma_mapping_ops *ops = get_dma_ops(dev); - int order = get_order(size); - WARN_ON(irqs_disabled()); /* for portability */ - if (dma_release_from_coherent(dev, order, vaddr)) + WARN_ON(irqs_disabled()); /* for portability */ + + if (dma_release_from_coherent(dev, get_order(size), vaddr)) return; - if (ops->unmap_single) - ops->unmap_single(dev, bus, size, 0); - free_pages((unsigned long)vaddr, order); + + if (ops->free_coherent) + ops->free_coherent(dev, size, vaddr, bus); } EXPORT_SYMBOL(dma_free_coherent); -- cgit v1.2.3 From 6c505ce3930c6a6b455cda53fab3e88ae44f8221 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:45 +0200 Subject: x86: move dma_*_coherent functions to include file All the x86 DMA-API functions are defined in asm/dma-mapping.h. This patch moves the dma_*_coherent functions also to this header file because they are now small enough to do so. This is done as a separate patch because it also includes some renaming and restructuring of the dma-mapping.h file. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 49 +++---------------------------------------- arch/x86/kernel/pci-gart_64.c | 4 ++-- 2 files changed, 5 insertions(+), 48 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 613332b26e3..0a1408abcc6 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -41,11 +41,12 @@ EXPORT_SYMBOL(bad_dma_address); /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible to older i386. */ -struct device fallback_dev = { +struct device x86_dma_fallback_dev = { .bus_id = "fallback device", .coherent_dma_mask = DMA_32BIT_MASK, - .dma_mask = &fallback_dev.coherent_dma_mask, + .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, }; +EXPORT_SYMBOL(x86_dma_fallback_dev); int dma_set_mask(struct device *dev, u64 mask) { @@ -241,50 +242,6 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -/* - * Allocate memory for a coherent mapping. - */ - void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp) -{ - struct dma_mapping_ops *ops = get_dma_ops(dev); - void *memory; - - if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) - return memory; - - if (!dev) { - dev = &fallback_dev; - gfp |= GFP_DMA; - } - - if (ops->alloc_coherent) - return ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -/* - * Unmap coherent memory. - * The caller must ensure that the device has finished accessing the mapping. - */ -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) -{ - struct dma_mapping_ops *ops = get_dma_ops(dev); - - WARN_ON(irqs_disabled()); /* for portability */ - - if (dma_release_from_coherent(dev, get_order(size), vaddr)) - return; - - if (ops->free_coherent) - ops->free_coherent(dev, size, vaddr, bus); -} -EXPORT_SYMBOL(dma_free_coherent); - static int __init pci_iommu_init(void) { calgary_iommu_init(); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index ef753e23358..e81df25dc06 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -276,7 +276,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) unsigned long bus; if (!dev) - dev = &fallback_dev; + dev = &x86_dma_fallback_dev; if (!need_iommu(dev, paddr, size)) return paddr; @@ -427,7 +427,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) return 0; if (!dev) - dev = &fallback_dev; + dev = &x86_dma_fallback_dev; out = 0; start = 0; -- cgit v1.2.3 From 2cd54961caff9fe9109807c6603a0af0729b9591 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:46 +0200 Subject: x86, AMD IOMMU: remove obsolete FIXME comment Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index de39e1f2ede..d15081c3823 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1038,8 +1038,6 @@ out: /* * The exported free_coherent function for dma_ops. - * FIXME: fix the generic x86 DMA layer so that it actually calls that - * function. */ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr) -- cgit v1.2.3 From 766af9fa812f49feb4a3e62cf92f3d37f33c7fb6 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 22 Aug 2008 00:12:09 +0900 Subject: dma-mapping.h, x86: remove last user of dma_mapping_ops->map_simple pci-dma.c doesn't use map_simple hook any more so we can remove it from struct dma_mapping_ops now. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index e81df25dc06..cfd7ec25e37 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -720,7 +720,6 @@ extern int agp_amd64_init(void); static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, - .map_simple = gart_map_simple, .unmap_single = gart_unmap_single, .sync_single_for_cpu = NULL, .sync_single_for_device = NULL, -- cgit v1.2.3 From 421076e2bec1b917bdcf83da35b24f6349bf35db Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 22 Aug 2008 16:29:10 +0900 Subject: x86: dma_*_coherent rework patchset v2, fix alloc_coherent dma_ops callback was added to GART, however, it doesn't return a size aligned address wrt dma_alloc_coherent, as DMA-mapping.txt defines. This patch fixes it. This patch also removes unused gart_map_simple (dma_mapping_ops->map_simple has gone). Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 338c4f24155..4d0864900b8 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -261,20 +261,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); } -static dma_addr_t -gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) -{ - dma_addr_t map; - unsigned long align_mask; - - align_mask = (1UL << get_order(size)) - 1; - map = dma_map_area(dev, paddr, size, dir, align_mask); - - flush_gart(); - - return map; -} - /* Map a single area into the IOMMU */ static dma_addr_t gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) @@ -512,12 +498,21 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { void *vaddr; + unsigned long align_mask; vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); if (!vaddr) return NULL; - *dma_addr = gart_map_single(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL); + align_mask = (1UL << get_order(size)) - 1; + + if (!dev) + dev = &x86_dma_fallback_dev; + + *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, + align_mask); + flush_gart(); + if (*dma_addr != bad_dma_address) return vaddr; -- cgit v1.2.3 From b05f78f5c713eda2c34e495d92495ee4f1c3b5e1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 22 Aug 2008 01:32:50 -0700 Subject: x86_64: printout msr -v2 commandline show_msr=1 for bsp, show_msr=32 for all 32 cpus. [ mingo@elte.hu: added documentation ] Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 51 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/paravirt.c | 1 + 2 files changed, 52 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index dd6e3f15017..bca2d6980e8 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -394,6 +394,49 @@ static __init int setup_noclflush(char *arg) } __setup("noclflush", setup_noclflush); +struct msr_range { + unsigned min; + unsigned max; +}; + +static struct msr_range msr_range_array[] __cpuinitdata = { + { 0x00000000, 0x00000418}, + { 0xc0000000, 0xc000040b}, + { 0xc0010000, 0xc0010142}, + { 0xc0011000, 0xc001103b}, +}; + +static void __cpuinit print_cpu_msr(void) +{ + unsigned index; + u64 val; + int i; + unsigned index_min, index_max; + + for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { + index_min = msr_range_array[i].min; + index_max = msr_range_array[i].max; + for (index = index_min; index < index_max; index++) { + if (rdmsrl_amd_safe(index, &val)) + continue; + printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + } + } +} + +static int show_msr __cpuinitdata; +static __init int setup_show_msr(char *arg) +{ + int num; + + get_option(&arg, &num); + + if (num > 0) + show_msr = num; + return 1; +} +__setup("show_msr=", setup_show_msr); + void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { if (c->x86_model_id[0]) @@ -403,6 +446,14 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT " stepping %02x\n", c->x86_mask); else printk(KERN_CONT "\n"); + +#ifdef CONFIG_SMP + if (c->cpu_index < show_msr) + print_cpu_msr(); +#else + if (show_msr) + print_cpu_msr(); +#endif } static __init int setup_disablecpuid(char *arg) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52d79..c6044682e1e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -330,6 +330,7 @@ struct pv_cpu_ops pv_cpu_ops = { #endif .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, + .read_msr_amd = native_read_msr_amd_safe, .write_msr = native_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, -- cgit v1.2.3 From 25258ef762bc4a05fa9c4523f7dae56e3fd01864 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 20 Aug 2008 11:31:07 -0700 Subject: x86: export pv_lock_ops non-GPL None of the spinlock API is exported GPL, so there's no reason for pv_lock_ops to be. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Cc: drago01 --- arch/x86/kernel/paravirt-spinlocks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 206359a8e43..0e9f1982b1d 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -23,7 +23,7 @@ struct pv_lock_ops pv_lock_ops = { .spin_unlock = __ticket_spin_unlock, #endif }; -EXPORT_SYMBOL_GPL(pv_lock_ops); +EXPORT_SYMBOL(pv_lock_ops); void __init paravirt_use_bytelocks(void) { -- cgit v1.2.3 From bbb65d2d365efe9951290e61678dcf81ec60add4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 23 Aug 2008 17:47:10 +0200 Subject: x86: use cpuid vector 0xb when available for detecting cpu topology cpuid leaf 0xb provides extended topology enumeration. This interface provides the 32-bit x2APIC id of the logical processor and it also provides a new mechanism to detect SMT and core siblings (which provides increased addressability). Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 86 ++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 3 ++ arch/x86/kernel/cpu/intel.c | 13 +++-- arch/x86/kernel/cpu/intel_64.c | 5 +- 4 files changed, 103 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84a8220a607..aa9641ae670 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -7,6 +7,8 @@ #include #include +#include + struct cpuid_bit { u16 feature; u8 reg; @@ -48,6 +50,90 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) } } +/* leaf 0xb SMT level */ +#define SMT_LEVEL 0 + +/* leaf 0xb sub-leaf types */ +#define INVALID_TYPE 0 +#define SMT_TYPE 1 +#define CORE_TYPE 2 + +#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) +#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) +#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) + +/* + * Check for extended topology enumeration cpuid leaf 0xb and if it + * exists, use it for populating initial_apicid and cpu topology + * detection. + */ +void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) +{ + unsigned int eax, ebx, ecx, edx, sub_index; + unsigned int ht_mask_width, core_plus_mask_width; + unsigned int core_select_mask, core_level_siblings; + + if (c->cpuid_level < 0xb) + return; + + cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + + /* + * check if the cpuid leaf 0xb is actually implemented. + */ + if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) + return; + + set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); + + /* + * initial apic id, which also represents 32-bit extended x2apic id. + */ + c->initial_apicid = edx; + + /* + * Populate HT related information from sub-leaf level 0. + */ + core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + + sub_index = 1; + do { + cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); + + /* + * Check for the Core type in the implemented sub leaves. + */ + if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { + core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + break; + } + + sub_index++; + } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); + + core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; + +#ifdef CONFIG_X86_32 + c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) + & core_select_mask; + c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); +#else + c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; + c->phys_proc_id = phys_pkg_id(core_plus_mask_width); +#endif + c->x86_max_cores = (core_level_siblings / smp_num_siblings); + + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + return; +} + #ifdef CONFIG_X86_PAT void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) { diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index af569a964e7..a2888c7b56a 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -128,6 +128,9 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) u32 eax, ebx, ecx, edx; int index_msb, core_bits; + if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) + return; + cpuid(1, &eax, &ebx, &ecx, &edx); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 77618c717d7..58a6f1a0b29 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -176,9 +176,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (p) strcpy(c->x86_model_id, p); - c->x86_max_cores = num_cpu_cores(c); - - detect_ht(c); + detect_extended_topology(c); + + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ + c->x86_max_cores = num_cpu_cores(c); + detect_ht(c); + } /* Work around errata */ Intel_errata_workarounds(c); diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 1019c58d39f..42d501a8c41 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -80,7 +80,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - c->x86_max_cores = intel_num_cpu_cores(c); + + detect_extended_topology(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) + c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); } -- cgit v1.2.3 From e17941b0c140562d92e5a3bc12b4ad88281c7926 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 23 Aug 2008 17:47:11 +0200 Subject: x86: use x2apic id reported by cpuid during topology discovery use x2apic id reported by cpuid during topology discovery, instead of the apic id configured in the APIC. For most of the systems, x2apic id reported by cpuid leaf 0xb will be same as the physical apic id reported by the APIC_ID register of the APIC. We follow the suggested guidelines and use the apic id reported by the cpuid. No change to non-generic UV platforms, will use the apic id reported in the APIC_ID register as the cpuid reported apic id's may not be unique. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_cluster.c | 7 +------ arch/x86/kernel/genx2apic_phys.c | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index ef3f3182d50..0bf0a8624b8 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -120,14 +120,9 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int x2apic_read_id(void) -{ - return apic_read(APIC_ID); -} - static unsigned int phys_pkg_id(int index_msb) { - return x2apic_read_id() >> index_msb; + return current_cpu_data.initial_apicid >> index_msb; } static void x2apic_send_IPI_self(int vector) diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3229c68aedd..e2401ab0c46 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -118,14 +118,9 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int x2apic_read_id(void) -{ - return apic_read(APIC_ID); -} - static unsigned int phys_pkg_id(int index_msb) { - return x2apic_read_id() >> index_msb; + return current_cpu_data.initial_apicid >> index_msb; } void x2apic_send_IPI_self(int vector) -- cgit v1.2.3 From 93be71b672f167b1e8c23725114f86305354f0ac Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Fri, 22 Aug 2008 11:52:11 +0100 Subject: x86: add cpu hotplug hooks into smp_ops Signed-off-by: Alex Nixon Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 4 ++-- arch/x86/kernel/process_64.c | 4 ++-- arch/x86/kernel/smp.c | 6 +++++- arch/x86/kernel/smpboot.c | 8 ++++---- 4 files changed, 13 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3b7a1ddcc0b..e382fe0ccd6 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -91,7 +91,7 @@ static void cpu_exit_clear(void) } /* We don't actually take CPU down, just spin without interrupts. */ -static inline void play_dead(void) +void native_play_dead(void) { /* This must be done before dead CPU ack */ cpu_exit_clear(); @@ -107,7 +107,7 @@ static inline void play_dead(void) wbinvd_halt(); } #else -static inline void play_dead(void) +void native_play_dead(void) { BUG(); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 71553b664e2..dfd3f575208 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -90,7 +90,7 @@ DECLARE_PER_CPU(int, cpu_state); #include /* We halt the CPU with physical CPU hotplug */ -static inline void play_dead(void) +void native_play_dead(void) { idle_task_exit(); mb(); @@ -102,7 +102,7 @@ static inline void play_dead(void) wbinvd_halt(); } #else -static inline void play_dead(void) +void native_play_dead(void) { BUG(); } diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 361b7a4c640..18f9b19f5f8 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -214,12 +214,16 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) struct smp_ops smp_ops = { .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, .smp_prepare_cpus = native_smp_prepare_cpus, - .cpu_up = native_cpu_up, .smp_cpus_done = native_smp_cpus_done, .smp_send_stop = native_smp_send_stop, .smp_send_reschedule = native_smp_send_reschedule, + .cpu_up = native_cpu_up, + .cpu_die = native_cpu_die, + .cpu_disable = native_cpu_disable, + .play_dead = native_play_dead, + .send_call_func_ipi = native_send_call_func_ipi, .send_call_func_single_ipi = native_send_call_func_single_ipi, }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7985c5b3f91..c414cee296b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1346,7 +1346,7 @@ static void __ref remove_cpu_from_maps(int cpu) numa_remove_cpu(cpu); } -int __cpu_disable(void) +int native_cpu_disable(void) { int cpu = smp_processor_id(); @@ -1385,7 +1385,7 @@ int __cpu_disable(void) return 0; } -void __cpu_die(unsigned int cpu) +void native_cpu_die(unsigned int cpu) { /* We don't do anything here: idle task is faking death itself. */ unsigned int i; @@ -1403,12 +1403,12 @@ void __cpu_die(unsigned int cpu) printk(KERN_ERR "CPU %u didn't die...\n", cpu); } #else /* ... !CONFIG_HOTPLUG_CPU */ -int __cpu_disable(void) +int native_cpu_disable(void) { return -ENOSYS; } -void __cpu_die(unsigned int cpu) +void native_cpu_die(unsigned int cpu) { /* We said "no" in __cpu_disable */ BUG(); -- cgit v1.2.3 From 379002586368ae22916f668011c9118c8ce8189c Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Fri, 22 Aug 2008 11:52:12 +0100 Subject: x86_32: clean up play_dead The removal of the CPU from the various maps was redundant as it already happened in cpu_disable. After cleaning this up, cpu_uninit only resets the tlb state, so rename it and create a noop version for the X86_64 case (so the two play_deads can be unified later). Signed-off-by: Alex Nixon Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 6 +----- arch/x86/kernel/process_32.c | 17 ++++------------- 2 files changed, 5 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa3..18f5551b32d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -714,14 +714,10 @@ void __cpuinit cpu_init(void) mxcsr_feature_mask_init(); } -#ifdef CONFIG_HOTPLUG_CPU -void __cpuinit cpu_uninit(void) +void reset_lazy_tlbstate(void) { int cpu = raw_smp_processor_id(); - cpu_clear(cpu, cpu_initialized); - /* lazy TLB state */ per_cpu(cpu_tlbstate, cpu).state = 0; per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; } -#endif diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index e382fe0ccd6..d55eb49584b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -75,26 +75,17 @@ unsigned long thread_saved_pc(struct task_struct *tsk) #ifdef CONFIG_HOTPLUG_CPU #include -static void cpu_exit_clear(void) +/* We don't actually take CPU down, just spin without interrupts. */ +void native_play_dead(void) { int cpu = raw_smp_processor_id(); idle_task_exit(); - cpu_uninit(); - irq_ctx_exit(cpu); + reset_lazy_tlbstate(); - cpu_clear(cpu, cpu_callout_map); - cpu_clear(cpu, cpu_callin_map); - - numa_remove_cpu(cpu); -} + irq_ctx_exit(cpu); -/* We don't actually take CPU down, just spin without interrupts. */ -void native_play_dead(void) -{ - /* This must be done before dead CPU ack */ - cpu_exit_clear(); mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; -- cgit v1.2.3 From a21f5d88c17a40941f6239d1959d89e8493e8e01 Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Fri, 22 Aug 2008 11:52:13 +0100 Subject: x86: unify x86_32 and x86_64 play_dead into one function Add the new play_dead into smpboot.c, as it fits more cleanly in there alongside other CONFIG_HOTPLUG functions. Separate out the common code into its own function. Signed-off-by: Alex Nixon Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 32 -------------------------------- arch/x86/kernel/process_64.c | 23 ----------------------- arch/x86/kernel/smpboot.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 55 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d55eb49584b..633cf06578f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -72,38 +72,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return ((unsigned long *)tsk->thread.sp)[3]; } -#ifdef CONFIG_HOTPLUG_CPU -#include - -/* We don't actually take CPU down, just spin without interrupts. */ -void native_play_dead(void) -{ - int cpu = raw_smp_processor_id(); - - idle_task_exit(); - - reset_lazy_tlbstate(); - - irq_ctx_exit(cpu); - - mb(); - /* Ack it */ - __get_cpu_var(cpu_state) = CPU_DEAD; - - /* - * With physical CPU hotplug, we should halt the cpu - */ - local_irq_disable(); - /* mask all interrupts, flush any and all caches, and halt */ - wbinvd_halt(); -} -#else -void native_play_dead(void) -{ - BUG(); -} -#endif /* CONFIG_HOTPLUG_CPU */ - /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index dfd3f575208..aa28ed01cdf 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -85,29 +85,6 @@ void exit_idle(void) __exit_idle(); } -#ifdef CONFIG_HOTPLUG_CPU -DECLARE_PER_CPU(int, cpu_state); - -#include -/* We halt the CPU with physical CPU hotplug */ -void native_play_dead(void) -{ - idle_task_exit(); - mb(); - /* Ack it */ - __get_cpu_var(cpu_state) = CPU_DEAD; - - local_irq_disable(); - /* mask all interrupts, flush any and all caches, and halt */ - wbinvd_halt(); -} -#else -void native_play_dead(void) -{ - BUG(); -} -#endif /* CONFIG_HOTPLUG_CPU */ - /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c414cee296b..28b4287296a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1402,6 +1402,29 @@ void native_cpu_die(unsigned int cpu) } printk(KERN_ERR "CPU %u didn't die...\n", cpu); } + +void play_dead_common(void) +{ + idle_task_exit(); + reset_lazy_tlbstate(); + irq_ctx_exit(raw_smp_processor_id()); + + mb(); + /* Ack it */ + __get_cpu_var(cpu_state) = CPU_DEAD; + + /* + * With physical CPU hotplug, we should halt the cpu + */ + local_irq_disable(); +} + +void native_play_dead(void) +{ + play_dead_common(); + wbinvd_halt(); +} + #else /* ... !CONFIG_HOTPLUG_CPU */ int native_cpu_disable(void) { @@ -1413,4 +1436,10 @@ void native_cpu_die(unsigned int cpu) /* We said "no" in __cpu_disable */ BUG(); } + +void native_play_dead(void) +{ + BUG(); +} + #endif -- cgit v1.2.3 From 8227dce7dc2cfdcc28ee0eadfb482a7ee77fba03 Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Fri, 22 Aug 2008 11:52:14 +0100 Subject: x86: separate generic cpu disabling code from APIC writes in cpu_disable It allows paravirt implementations of cpu_disable to share the cpu_disable_common code, without having to take on board APIC writes, which may not be appropriate. Signed-off-by: Alex Nixon Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 28b4287296a..66b04e59881 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1346,25 +1346,9 @@ static void __ref remove_cpu_from_maps(int cpu) numa_remove_cpu(cpu); } -int native_cpu_disable(void) +void cpu_disable_common(void) { int cpu = smp_processor_id(); - - /* - * Perhaps use cpufreq to drop frequency, but that could go - * into generic code. - * - * We won't take down the boot processor on i386 due to some - * interrupts only being able to be serviced by the BSP. - * Especially so if we're not using an IOAPIC -zwane - */ - if (cpu == 0) - return -EBUSY; - - if (nmi_watchdog == NMI_LOCAL_APIC) - stop_apic_nmi_watchdog(NULL); - clear_local_APIC(); - /* * HACK: * Allow any queued timer interrupts to get serviced @@ -1382,6 +1366,28 @@ int native_cpu_disable(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(cpu_online_map); +} + +int native_cpu_disable(void) +{ + int cpu = smp_processor_id(); + + /* + * Perhaps use cpufreq to drop frequency, but that could go + * into generic code. + * + * We won't take down the boot processor on i386 due to some + * interrupts only being able to be serviced by the BSP. + * Especially so if we're not using an IOAPIC -zwane + */ + if (cpu == 0) + return -EBUSY; + + if (nmi_watchdog == NMI_LOCAL_APIC) + stop_apic_nmi_watchdog(NULL); + clear_local_APIC(); + + cpu_disable_common(); return 0; } -- cgit v1.2.3 From c7ffa6c26277b403920e2255d10df849bd613380 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 25 Aug 2008 13:11:27 +0300 Subject: x86: default to reboot via ACPI Triple-fault and keyboard reset may assert INIT instead of RESET; however INIT is blocked when Intel VT is enabled. This leads to a partially reset machine when invoking emergency_restart via sysrq-b: the processor is still working but other parts of the system are dead. Default to rebooting via ACPI, which correctly asserts RESET and reboots the machine. This is safe since we will fall back to keyboard reset and triple fault if acpi is not enabled or if the reset is not successful. Signed-off-by: Avi Kivity Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 724adfc63cb..f4c93f1cfc1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -29,7 +29,11 @@ EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; static int reboot_mode; -enum reboot_type reboot_type = BOOT_KBD; +/* + * Keyboard reset and triple fault may result in INIT, not RESET, which + * doesn't work when we're in vmx root mode. Try ACPI first. + */ +enum reboot_type reboot_type = BOOT_ACPI; int reboot_force; #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) -- cgit v1.2.3 From a81726087428b541aa64604b8a94104a4d4aa8f9 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 26 Aug 2008 15:13:45 -0700 Subject: x86: acpi: move acpi_mcfg_64bit_base_addr into CONFIG_PCI_MMCONFIG acpi_mcfg_64bit_base_addr is used when CONFIG_PCI_MMCONFIG is enabled. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f20470823d3..e5032d7b391 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -96,8 +96,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; #warning ACPI uses CMPXCHG, i486 and later hardware #endif -static int acpi_mcfg_64bit_base_addr __initdata = FALSE; - /* -------------------------------------------------------------------------- Boot-time Configuration -------------------------------------------------------------------------- */ @@ -159,6 +157,8 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) struct acpi_mcfg_allocation *pci_mmcfg_config; int pci_mmcfg_config_num; +static int acpi_mcfg_64bit_base_addr __initdata = FALSE; + static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) { if (!strcmp(mcfg->header.oem_id, "SGI")) -- cgit v1.2.3 From 11c231a962c740b3216eb6565149ae5a7944cba7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 23 Aug 2008 17:47:11 +0200 Subject: x86: use x2apic id reported by cpuid during topology discovery, fix v2: Fix for !SMP build Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index aa9641ae670..a2d1a545767 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -69,6 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) */ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) { +#ifdef CONFIG_SMP unsigned int eax, ebx, ecx, edx, sub_index; unsigned int ht_mask_width, core_plus_mask_width; unsigned int core_select_mask, core_level_siblings; @@ -132,6 +133,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); return; +#endif } #ifdef CONFIG_X86_PAT -- cgit v1.2.3 From 7414aa41a63348c3bc72d8c37b716024c29b6d50 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 27 Aug 2008 17:56:44 -0700 Subject: x86: generate names for /proc/cpuinfo from We have had a number of cases where (and its predecessors) have diverged substantially from the names list in /proc/cpuinfo. This patch generates the latter from the former. It retains the option for explicitly overriding the strings, but by making that require a separate action it should at least be less likely to happen. It would be good to do a future pass and rename strings that are gratuituously different in the kernel (/proc/cpuinfo is a userspace interface and must remain constant.) Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/Makefile | 11 ++++- arch/x86/kernel/cpu/feature_names.c | 84 ------------------------------------- arch/x86/kernel/cpu/mkcapflags.pl | 32 ++++++++++++++ arch/x86/kernel/cpu/powerflags.c | 20 +++++++++ 4 files changed, 62 insertions(+), 85 deletions(-) delete mode 100644 arch/x86/kernel/cpu/feature_names.c create mode 100644 arch/x86/kernel/cpu/mkcapflags.pl create mode 100644 arch/x86/kernel/cpu/powerflags.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index a0fc6c14438..3ede19a4e0b 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -3,7 +3,7 @@ # obj-y := intel_cacheinfo.o addon_cpuid_features.o -obj-y += proc.o feature_names.o +obj-y += proc.o capflags.o powerflags.o obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o @@ -23,3 +23,12 @@ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o + +quiet_cmd_mkcapflags = MKCAP $@ + cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ + +cpufeature = $(src)/../../../../include/asm-x86/cpufeature.h + +targets += capflags.c +$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE + $(call if_changed,mkcapflags) diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c deleted file mode 100644 index c9017799497..00000000000 --- a/arch/x86/kernel/cpu/feature_names.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Strings for the various x86 capability flags. - * - * This file must not contain any executable code. - */ - -#include - -/* - * These flag bits must match the definitions in . - * NULL means this bit is undefined or reserved; either way it doesn't - * have meaning as far as Linux is concerned. Note that it's important - * to realize there is a difference between this table and CPUID -- if - * applications want to get the raw CPUID data, they should access - * /dev/cpu//cpuid instead. - */ -const char * const x86_cap_flags[NCAPINTS*32] = { - /* Intel-defined */ - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", - "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", - - /* AMD-defined */ - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", - "3dnowext", "3dnow", - - /* Transmeta-defined */ - "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Other (Linux-defined) */ - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", - NULL, NULL, NULL, NULL, - "constant_tsc", "up", NULL, "arch_perfmon", - "pebs", "bts", NULL, NULL, - "rep_good", NULL, NULL, NULL, - "nopl", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", - "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* VIA/Cyrix/Centaur-defined */ - NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", - "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", - "cr8_legacy", "abm", "sse4a", "misalignsse", - "3dnowprefetch", "osvw", "ibs", "sse5", - "skinit", "wdt", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Auxiliary (Linux-defined) */ - "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -}; - -const char *const x86_power_flags[32] = { - "ts", /* temperature sensor */ - "fid", /* frequency id control */ - "vid", /* voltage id control */ - "ttp", /* thermal trip */ - "tm", - "stc", - "100mhzsteps", - "hwpstate", - "", /* tsc invariant mapped to constant_tsc */ - /* nothing */ -}; diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl new file mode 100644 index 00000000000..dfea390e160 --- /dev/null +++ b/arch/x86/kernel/cpu/mkcapflags.pl @@ -0,0 +1,32 @@ +#!/usr/bin/perl +# +# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h +# + +($in, $out) = @ARGV; + +open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; +open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; + +print OUT "#include \n\n"; +print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; + +while (defined($line = )) { + if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) { + $macro = $1; + $feature = $2; + $tail = $3; + if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) { + $feature = $1; + } + + if ($feature ne '') { + printf OUT "\t%-32s = \"%s\",\n", + "[$macro]", "\L$feature"; + } + } +} +print OUT "};\n"; + +close(IN); +close(OUT); diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c new file mode 100644 index 00000000000..5abbea297e0 --- /dev/null +++ b/arch/x86/kernel/cpu/powerflags.c @@ -0,0 +1,20 @@ +/* + * Strings for the various x86 power flags + * + * This file must not contain any executable code. + */ + +#include + +const char *const x86_power_flags[32] = { + "ts", /* temperature sensor */ + "fid", /* frequency id control */ + "vid", /* voltage id control */ + "ttp", /* thermal trip */ + "tm", + "stc", + "100mhzsteps", + "hwpstate", + "", /* tsc invariant mapped to constant_tsc */ + /* nothing */ +}; -- cgit v1.2.3 From 2c7e9fd4c6cb7f4b0bc7162e9a30847e51a1ca1b Mon Sep 17 00:00:00 2001 From: Joe Korty Date: Wed, 27 Aug 2008 10:35:06 -0400 Subject: x86: make poll_idle behave more like the other idle methods Make poll_idle() behave more like the other idle methods. Currently, poll_idle() returns immediately. The other idle methods all wait indefinately for some condition to come true before returning. poll_idle should emulate these other methods and also wait for a return condition, in this case, for need_resched() to become 'true'. Without this delay the idle loop spends all of its time in the outer loop that calls poll_idle. This outer loop, these days, does real work, some of it under rcu locks. That work should only be done when idle is entered and when idle exits, not continuously while idle is spinning. Signed-off-by: Joe Korty Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc4d5b0a6a..4e09d26748c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -185,7 +185,8 @@ static void mwait_idle(void) static void poll_idle(void) { local_irq_enable(); - cpu_relax(); + while (!need_resched()) + cpu_relax(); } /* -- cgit v1.2.3 From 1625324d22409e32e3f8eb86018cad72e1c09d61 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 27 Aug 2008 23:01:16 -0700 Subject: x86: move dir es7000 to es7000_32.c to be aligned with numaq, summit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/es7000_32.c | 345 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 346 insertions(+) create mode 100644 arch/x86/kernel/es7000_32.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a07ec14f331..dac24c4390d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o +obj-$(CONFIG_X86_ES7000) += es7000_32.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o obj-y += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c new file mode 100644 index 00000000000..849e5cd485b --- /dev/null +++ b/arch/x86/kernel/es7000_32.c @@ -0,0 +1,345 @@ +/* + * Written by: Garry Forsgren, Unisys Corporation + * Natalie Protasevich, Unisys Corporation + * This file contains the code to configure and interface + * with Unisys ES7000 series hardware system manager. + * + * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Unisys Corporation, Township Line & Union Meeting + * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: + * + * http://www.unisys.com + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * ES7000 chipsets + */ + +#define NON_UNISYS 0 +#define ES7000_CLASSIC 1 +#define ES7000_ZORRO 2 + + +#define MIP_REG 1 +#define MIP_PSAI_REG 4 + +#define MIP_BUSY 1 +#define MIP_SPIN 0xf0000 +#define MIP_VALID 0x0100000000000000ULL +#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) + +#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) + +struct mip_reg_info { + unsigned long long mip_info; + unsigned long long delivery_info; + unsigned long long host_reg; + unsigned long long mip_reg; +}; + +struct part_info { + unsigned char type; + unsigned char length; + unsigned char part_id; + unsigned char apic_mode; + unsigned long snum; + char ptype[16]; + char sname[64]; + char pname[64]; +}; + +struct psai { + unsigned long long entry_type; + unsigned long long addr; + unsigned long long bep_addr; +}; + +struct es7000_mem_info { + unsigned char type; + unsigned char length; + unsigned char resv[6]; + unsigned long long start; + unsigned long long size; +}; + +struct es7000_oem_table { + unsigned long long hdr; + struct mip_reg_info mip; + struct part_info pif; + struct es7000_mem_info shm; + struct psai psai; +}; + +#ifdef CONFIG_ACPI + +struct oem_table { + struct acpi_table_header Header; + u32 OEMTableAddr; + u32 OEMTableSize; +}; + +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +#endif + +struct mip_reg { + unsigned long long off_0; + unsigned long long off_8; + unsigned long long off_10; + unsigned long long off_18; + unsigned long long off_20; + unsigned long long off_28; + unsigned long long off_30; + unsigned long long off_38; +}; + +#define MIP_SW_APIC 0x1020b +#define MIP_FUNC(VALUE) (VALUE & 0xff) + +/* + * ES7000 Globals + */ + +static volatile unsigned long *psai = NULL; +static struct mip_reg *mip_reg; +static struct mip_reg *host_reg; +static int mip_port; +static unsigned long mip_addr, host_addr; + +int es7000_plat; + +/* + * GSI override for ES7000 platforms. + */ + +static unsigned int base; + +static int +es7000_rename_gsi(int ioapic, int gsi) +{ + if (es7000_plat == ES7000_ZORRO) + return gsi; + + if (!base) { + int i; + for (i = 0; i < nr_ioapics; i++) + base += nr_ioapic_registers[i]; + } + + if (!ioapic && (gsi < 16)) + gsi += base; + return gsi; +} + +void __init +setup_unisys(void) +{ + /* + * Determine the generation of the ES7000 currently running. + * + * es7000_plat = 1 if the machine is a 5xx ES7000 box + * es7000_plat = 2 if the machine is a x86_64 ES7000 box + * + */ + if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) + es7000_plat = ES7000_ZORRO; + else + es7000_plat = ES7000_CLASSIC; + ioapic_renumber_irq = es7000_rename_gsi; +} + +/* + * Parse the OEM Table + */ + +int __init +parse_unisys_oem (char *oemptr) +{ + int i; + int success = 0; + unsigned char type, size; + unsigned long val; + char *tp = NULL; + struct psai *psaip = NULL; + struct mip_reg_info *mi; + struct mip_reg *host, *mip; + + tp = oemptr; + + tp += 8; + + for (i=0; i <= 6; i++) { + type = *tp++; + size = *tp++; + tp -= 2; + switch (type) { + case MIP_REG: + mi = (struct mip_reg_info *)tp; + val = MIP_RD_LO(mi->host_reg); + host_addr = val; + host = (struct mip_reg *)val; + host_reg = __va(host); + val = MIP_RD_LO(mi->mip_reg); + mip_port = MIP_PORT(mi->mip_info); + mip_addr = val; + mip = (struct mip_reg *)val; + mip_reg = __va(mip); + pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", + (unsigned long)host_reg); + pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", + (unsigned long)mip_reg); + success++; + break; + case MIP_PSAI_REG: + psaip = (struct psai *)tp; + if (tp != NULL) { + if (psaip->addr) + psai = __va(psaip->addr); + else + psai = NULL; + success++; + } + break; + default: + break; + } + tp += size; + } + + if (success < 2) { + es7000_plat = NON_UNISYS; + } else + setup_unisys(); + return es7000_plat; +} + +#ifdef CONFIG_ACPI +int __init +find_unisys_acpi_oem_table(unsigned long *oem_addr) +{ + struct acpi_table_header *header = NULL; + int i = 0; + while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { + if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { + struct oem_table *t = (struct oem_table *)header; + *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr, + t->OEMTableSize); + return 0; + } + } + return -1; +} +#endif + +static void +es7000_spin(int n) +{ + int i = 0; + + while (i++ < n) + rep_nop(); +} + +static int __init +es7000_mip_write(struct mip_reg *mip_reg) +{ + int status = 0; + int spin; + + spin = MIP_SPIN; + while (((unsigned long long)host_reg->off_38 & + (unsigned long long)MIP_VALID) != 0) { + if (--spin <= 0) { + printk("es7000_mip_write: Timeout waiting for Host Valid Flag"); + return -1; + } + es7000_spin(MIP_SPIN); + } + + memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); + outb(1, mip_port); + + spin = MIP_SPIN; + + while (((unsigned long long)mip_reg->off_38 & + (unsigned long long)MIP_VALID) == 0) { + if (--spin <= 0) { + printk("es7000_mip_write: Timeout waiting for MIP Valid Flag"); + return -1; + } + es7000_spin(MIP_SPIN); + } + + status = ((unsigned long long)mip_reg->off_0 & + (unsigned long long)0xffff0000000000ULL) >> 48; + mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 & + (unsigned long long)~MIP_VALID); + return status; +} + +int +es7000_start_cpu(int cpu, unsigned long eip) +{ + unsigned long vect = 0, psaival = 0; + + if (psai == NULL) + return -1; + + vect = ((unsigned long)__pa(eip)/0x1000) << 16; + psaival = (0x1000000 | vect | cpu); + + while (*psai & 0x1000000) + ; + + *psai = psaival; + + return 0; + +} + +void __init +es7000_sw_apic(void) +{ + if (es7000_plat) { + int mip_status; + struct mip_reg es7000_mip_reg; + + printk("ES7000: Enabling APIC mode.\n"); + memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); + es7000_mip_reg.off_0 = MIP_SW_APIC; + es7000_mip_reg.off_38 = (MIP_VALID); + while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) + printk("es7000_sw_apic: command failed, status = %x\n", + mip_status); + return; + } +} -- cgit v1.2.3 From cce3e057242d3d46fea07b9eb3910b0076419be5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:44 +0000 Subject: x86: TSC: define the PIT latch value separate Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 346cae5ac42..aa11413e7c1 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -122,6 +122,10 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) return ULLONG_MAX; } +#define CAL_MS 50 +#define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) +#define CAL_PIT_LOOPS 5000 + /* * Try to calibrate the TSC against the Programmable * Interrupt Timer and return the frequency of the TSC @@ -144,8 +148,8 @@ static unsigned long pit_calibrate_tsc(void) * (LSB then MSB) to begin countdown. */ outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + outb(CAL_LATCH & 0xff, 0x42); + outb(CAL_LATCH >> 8, 0x42); tsc = t1 = t2 = get_cycles(); @@ -166,18 +170,18 @@ static unsigned long pit_calibrate_tsc(void) /* * Sanity checks: * - * If we were not able to read the PIT more than 5000 + * If we were not able to read the PIT more than PIT_MIN_LOOPS * times, then we have been hit by a massive SMI * * If the maximum is 10 times larger than the minimum, * then we got hit by an SMI as well. */ - if (pitcnt < 5000 || tscmax > 10 * tscmin) + if (pitcnt < CAL_PIT_LOOPS || tscmax > 10 * tscmin) return ULONG_MAX; /* Calculate the PIT value */ delta = t2 - t1; - do_div(delta, 50); + do_div(delta, CAL_MS); return delta; } -- cgit v1.2.3 From d683ef7afe8b6dbac6a3c681cef8a908357793ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:48 +0000 Subject: x86: TSC: separate hpet/pmtimer calculation out Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 56 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index aa11413e7c1..ebb9bf824a0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -122,6 +122,43 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) return ULLONG_MAX; } +/* + * Calculate the TSC frequency from HPET reference + */ +static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) +{ + u64 tmp; + + if (hpet2 < hpet1) + hpet2 += 0x100000000ULL; + hpet2 -= hpet1; + tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); + do_div(tmp, 1000000); + do_div(deltatsc, tmp); + + return (unsigned long) deltatsc; +} + +/* + * Calculate the TSC frequency from PMTimer reference + */ +static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) +{ + u64 tmp; + + if (!pm1 && !pm2) + return ULONG_MAX; + + if (pm2 < pm1) + pm2 += (u64)ACPI_PM_OVRRUN; + pm2 -= pm1; + tmp = pm2 * 1000000000LL; + do_div(tmp, PMTMR_TICKS_PER_SEC); + do_div(deltatsc, tmp); + + return (unsigned long) deltatsc; +} + #define CAL_MS 50 #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) #define CAL_PIT_LOOPS 5000 @@ -247,22 +284,11 @@ unsigned long native_calibrate_tsc(void) continue; tsc2 = (tsc2 - tsc1) * 1000000LL; + if (hpet) + tsc2 = calc_hpet_ref(tsc2, hpet1, hpet2); + else + tsc2 = calc_pmtimer_ref(tsc2, pm1, pm2); - if (hpet) { - if (hpet2 < hpet1) - hpet2 += 0x100000000ULL; - hpet2 -= hpet1; - tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); - do_div(tsc1, 1000000); - } else { - if (pm2 < pm1) - pm2 += (u64)ACPI_PM_OVRRUN; - pm2 -= pm1; - tsc1 = pm2 * 1000000000LL; - do_div(tsc1, PMTMR_TICKS_PER_SEC); - } - - do_div(tsc2, tsc1); tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); } -- cgit v1.2.3 From 827014be05e4515fa0dfc32e3100c4dab2070a98 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:53 +0000 Subject: x86: TSC: use one set of reference variables Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ebb9bf824a0..52284d31fc9 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *p, int hpet) { u64 t1, t2; int i; @@ -112,9 +112,9 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) for (i = 0; i < MAX_RETRIES; i++) { t1 = get_cycles(); if (hpet) - *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; + *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; else - *pm = acpi_pm_read_early(); + *p = acpi_pm_read_early(); t2 = get_cycles(); if ((t2 - t1) < SMI_TRESHOLD) return t2; @@ -228,7 +228,7 @@ static unsigned long pit_calibrate_tsc(void) */ unsigned long native_calibrate_tsc(void) { - u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2; + u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; unsigned long flags; int hpet = is_hpet_enabled(), i; @@ -267,16 +267,16 @@ unsigned long native_calibrate_tsc(void) * read the end value. */ local_irq_save(flags); - tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + tsc1 = tsc_read_refs(&ref1, hpet); tsc_pit_khz = pit_calibrate_tsc(); - tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + tsc2 = tsc_read_refs(&ref2, hpet); local_irq_restore(flags); /* Pick the lowest PIT TSC calibration so far */ tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); /* hpet or pmtimer available ? */ - if (!hpet && !pm1 && !pm2) + if (!hpet && !ref1 && !ref2) continue; /* Check, whether the sampling was disturbed by an SMI */ @@ -285,9 +285,9 @@ unsigned long native_calibrate_tsc(void) tsc2 = (tsc2 - tsc1) * 1000000LL; if (hpet) - tsc2 = calc_hpet_ref(tsc2, hpet1, hpet2); + tsc2 = calc_hpet_ref(tsc2, ref1, ref2); else - tsc2 = calc_pmtimer_ref(tsc2, pm1, pm2); + tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); } @@ -301,7 +301,7 @@ unsigned long native_calibrate_tsc(void) "SMI disturbance.\n"); /* We don't have an alternative source, disable TSC */ - if (!hpet && !pm1 && !pm2) { + if (!hpet && !ref1 && !ref2) { printk("TSC: No reference (HPET/PMTIMER) available\n"); return 0; } @@ -321,7 +321,7 @@ unsigned long native_calibrate_tsc(void) } /* We don't have an alternative source, use the PIT calibration value */ - if (!hpet && !pm1 && !pm2) { + if (!hpet && !ref1 && !ref2) { printk(KERN_INFO "TSC: Using PIT calibration value\n"); return tsc_pit_min; } -- cgit v1.2.3 From a977c400957451f3bd92b9ed6022f5fe8a6cbbf5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:59 +0000 Subject: x86: TSC make the calibration loop smarter The last changes made the calibration loop 250ms long which is far too much. Try to do that more clever. Experiments have shown that using a 10ms delay for the PIT based calibration gives us a good enough value. If we have a reference (HPET/PMTIMER) and the result of the PIT and the reference is close enough, then we can break out of the calibration loop on a match right away and use the reference value. Otherwise we just loop 3 times and decide then, which value to take. One caveat is that for virtualized environments the PIT calibration often does not work at all and I found out that 10us is a bit too short as well for the reference to give a sane result. The solution here is to make the last loop longer when the first two PIT calibrations failed. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 95 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 37 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 52284d31fc9..da033b5b3e1 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -159,9 +159,14 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) return (unsigned long) deltatsc; } -#define CAL_MS 50 +#define CAL_MS 10 #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) -#define CAL_PIT_LOOPS 5000 +#define CAL_PIT_LOOPS 1000 + +#define CAL2_MS 50 +#define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) +#define CAL2_PIT_LOOPS 5000 + /* * Try to calibrate the TSC against the Programmable @@ -170,7 +175,7 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) * * Return ULONG_MAX on failure to calibrate. */ -static unsigned long pit_calibrate_tsc(void) +static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) { u64 tsc, t1, t2, delta; unsigned long tscmin, tscmax; @@ -185,8 +190,8 @@ static unsigned long pit_calibrate_tsc(void) * (LSB then MSB) to begin countdown. */ outb(0xb0, 0x43); - outb(CAL_LATCH & 0xff, 0x42); - outb(CAL_LATCH >> 8, 0x42); + outb(latch & 0xff, 0x42); + outb(latch >> 8, 0x42); tsc = t1 = t2 = get_cycles(); @@ -207,18 +212,18 @@ static unsigned long pit_calibrate_tsc(void) /* * Sanity checks: * - * If we were not able to read the PIT more than PIT_MIN_LOOPS + * If we were not able to read the PIT more than loopmin * times, then we have been hit by a massive SMI * * If the maximum is 10 times larger than the minimum, * then we got hit by an SMI as well. */ - if (pitcnt < CAL_PIT_LOOPS || tscmax > 10 * tscmin) + if (pitcnt < loopmin || tscmax > 10 * tscmin) return ULONG_MAX; /* Calculate the PIT value */ delta = t2 - t1; - do_div(delta, CAL_MS); + do_div(delta, ms); return delta; } @@ -230,8 +235,8 @@ unsigned long native_calibrate_tsc(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags; - int hpet = is_hpet_enabled(), i; + unsigned long flags, latch, ms; + int hpet = is_hpet_enabled(), i, loopmin; /* * Run 5 calibration loops to get the lowest frequency value @@ -257,7 +262,13 @@ unsigned long native_calibrate_tsc(void) * calibration delay loop as we have to wait for a certain * amount of time anyway. */ - for (i = 0; i < 5; i++) { + + /* Preset PIT loop values */ + latch = CAL_LATCH; + ms = CAL_MS; + loopmin = CAL_PIT_LOOPS; + + for (i = 0; i < 3; i++) { unsigned long tsc_pit_khz; /* @@ -268,7 +279,7 @@ unsigned long native_calibrate_tsc(void) */ local_irq_save(flags); tsc1 = tsc_read_refs(&ref1, hpet); - tsc_pit_khz = pit_calibrate_tsc(); + tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); tsc2 = tsc_read_refs(&ref2, hpet); local_irq_restore(flags); @@ -290,6 +301,35 @@ unsigned long native_calibrate_tsc(void) tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); + + /* Check the reference deviation */ + delta = ((u64) tsc_pit_min) * 100; + do_div(delta, tsc_ref_min); + + /* + * If both calibration results are inside a 10% window + * then we can be sure, that the calibration + * succeeded. We break out of the loop right away. We + * use the reference value, as it is more precise. + */ + if (delta >= 90 && delta <= 110) { + printk(KERN_INFO + "TSC: PIT calibration matches %s. %d loops\n", + hpet ? "HPET" : "PMTIMER", i + 1); + return tsc_ref_min; + } + + /* + * Check whether PIT failed more than once. This + * happens in virtualized environments. We need to + * give the virtual PC a slightly longer timeframe for + * the HPET/PMTIMER to make the result precise. + */ + if (i == 1 && tsc_pit_min == ULONG_MAX) { + latch = CAL2_LATCH; + ms = CAL2_MS; + loopmin = CAL2_PIT_LOOPS; + } } /* @@ -309,7 +349,7 @@ unsigned long native_calibrate_tsc(void) /* The alternative source failed as well, disable TSC */ if (tsc_ref_min == ULONG_MAX) { printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " - "failed due to SMI disturbance.\n"); + "failed.\n"); return 0; } @@ -328,37 +368,18 @@ unsigned long native_calibrate_tsc(void) /* The alternative source failed, use the PIT calibration value */ if (tsc_ref_min == ULONG_MAX) { - printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due " - "to SMI disturbance. Using PIT calibration\n"); + printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " + "Using PIT calibration\n"); return tsc_pit_min; } - /* Check the reference deviation */ - delta = ((u64) tsc_pit_min) * 100; - do_div(delta, tsc_ref_min); - - /* - * If both calibration results are inside a 5% window, the we - * use the lower frequency of those as it is probably the - * closest estimate. - */ - if (delta >= 95 && delta <= 105) { - printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n", - hpet ? "HPET" : "PMTIMER"); - printk(KERN_INFO "TSC: using %s calibration value\n", - tsc_pit_min <= tsc_ref_min ? "PIT" : - hpet ? "HPET" : "PMTIMER"); - return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min; - } - - printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", - hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); - /* * The calibration values differ too much. In doubt, we use * the PIT value as we know that there are PMTIMERs around - * running at double speed. + * running at double speed. At least we let the user know: */ + printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", + hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); printk(KERN_INFO "TSC: Using PIT calibration value\n"); return tsc_pit_min; } -- cgit v1.2.3 From 58f7c98850a226d3fb05b1095af9f7c4ea3507ba Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Aug 2008 13:52:25 -0700 Subject: x86: split e820 reserved entries record to late v2 so could let BAR res register at first, or even pnp. v2: insert e820 reserve resources before pnp_system_init Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 291e6cd9f9c..523d6c5605d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1271,13 +1271,15 @@ static inline const char *e820_type_to_string(int e820_type) /* * Mark e820 reserved areas as busy for the resource manager. */ +struct resource __initdata *e820_res; void __init e820_reserve_resources(void) { int i; - struct resource *res; u64 end; + struct resource *res; res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); + e820_res = res; for (i = 0; i < e820.nr_map; i++) { end = e820.map[i].addr + e820.map[i].size - 1; #ifndef CONFIG_RESOURCES_64BIT @@ -1291,7 +1293,8 @@ void __init e820_reserve_resources(void) res->end = end; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); + if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) + insert_resource(&iomem_resource, res); res++; } @@ -1303,6 +1306,19 @@ void __init e820_reserve_resources(void) } } +void __init e820_reserve_resources_late(void) +{ + int i; + struct resource *res; + + res = e820_res; + for (i = 0; i < e820.nr_map; i++) { + if (e820.map[i].type == E820_RESERVED && res->start >= (1ULL<<20)) + insert_resource(&iomem_resource, res); + res++; + } +} + char *__init default_machine_specific_memory_setup(void) { char *who = "BIOS-e820"; -- cgit v1.2.3 From a5444d15b611cf2ffe2bc52aaf11f2ac51882f89 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Aug 2008 08:09:23 +0200 Subject: x86: split e820 reserved entries record to late v4 this one replaces: | commit a2bd7274b47124d2fc4dfdb8c0591f545ba749dd | Author: Yinghai Lu | Date: Mon Aug 25 00:56:08 2008 -0700 | | x86: fix HPET regression in 2.6.26 versus 2.6.25, check hpet against BAR, v3 v2: insert e820 reserve resources before pnp_system_init v3: fix merging problem in tip/x86/core v4: address Linus's review about comments and condition in _late() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 523d6c5605d..a7a71339bfb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1271,12 +1271,12 @@ static inline const char *e820_type_to_string(int e820_type) /* * Mark e820 reserved areas as busy for the resource manager. */ -struct resource __initdata *e820_res; +static struct resource __initdata *e820_res; void __init e820_reserve_resources(void) { int i; - u64 end; struct resource *res; + u64 end; res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); e820_res = res; @@ -1293,6 +1293,12 @@ void __init e820_reserve_resources(void) res->end = end; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + /* + * don't register the region that could be conflicted with + * pci device BAR resource and insert them later in + * pcibios_resource_survey() + */ if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) insert_resource(&iomem_resource, res); res++; @@ -1313,7 +1319,7 @@ void __init e820_reserve_resources_late(void) res = e820_res; for (i = 0; i < e820.nr_map; i++) { - if (e820.map[i].type == E820_RESERVED && res->start >= (1ULL<<20)) + if (!res->parent && res->end) insert_resource(&iomem_resource, res); res++; } -- cgit v1.2.3 From dc44e65943169de2d1a1b494876f48a65a9737f1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 4 Sep 2008 13:47:38 +0200 Subject: x86: capitalize function call interrupts consistently Impact: aestetic Capitalize function call interrupts consistently. All other descriptions in /proc/interrupts are capitalized except for "function call interrupts". Capitalize it too for consistency. While that's technically a published ABI I think the risk of anyone relying on that text to stay the same is negligible. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/irq_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 1cf8c1fcc08..b71e02d42f4 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -325,7 +325,7 @@ skip: for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_call_count); - seq_printf(p, " function call interrupts\n"); + seq_printf(p, " Function call interrupts\n"); seq_printf(p, "TLB: "); for_each_online_cpu(j) seq_printf(p, "%10u ", diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 1f78b238d8d..f065fe9071b 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -129,7 +129,7 @@ skip: seq_printf(p, "CAL: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); - seq_printf(p, " function call interrupts\n"); + seq_printf(p, " Function call interrupts\n"); seq_printf(p, "TLB: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); -- cgit v1.2.3 From fac8f1e4f99dff7a0c3a929f327d66f46de6fa21 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:59:22 +0200 Subject: x86: split e820 reserved entries record to late, v7 try to insert_resource second time, by expanding the resource... for case: e820 reserved entry is partially overlapped with bar res... hope it will never happen Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a7a71339bfb..e24d1bc47b4 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1320,7 +1320,7 @@ void __init e820_reserve_resources_late(void) res = e820_res; for (i = 0; i < e820.nr_map; i++) { if (!res->parent && res->end) - insert_resource(&iomem_resource, res); + reserve_region_with_split(&iomem_resource, res->start, res->end, res->name); res++; } } -- cgit v1.2.3 From 5fef55fddb7317585cabc1eae38dbd57f1c59729 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:43 +0200 Subject: x86: move mtrr cpu cap setting early in early_init_xxxx Krzysztof Helt found MTRR is not detected on k6-2 root cause: we moved mtrr_bp_init() early for mtrr trimming, and in early_detect we only read the CPU capability from cpuid, so some cpu doesn't have that bit in cpuid. So we need to add early_init_xxxx to preset those bit before mtrr_bp_init for those earlier cpus. this patch is for v2.6.27 Reported-by: Krzysztof Helt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 9 +++++---- arch/x86/kernel/cpu/centaur.c | 11 +++++++++++ arch/x86/kernel/cpu/cyrix.c | 32 ++++++++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index cae9cabc303..18514ed2610 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -31,6 +31,11 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } + + /* Set MTRR capability flag if appropriate */ + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); } static void __cpuinit init_amd(struct cpuinfo_x86 *c) @@ -166,10 +171,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) mbytes); } - /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); break; } diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e0f45edd6a5..a0534c04d38 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -314,6 +314,16 @@ enum { EAMD3D = 1<<20, }; +static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 5: + /* Emulate MTRRs using Centaur's MCR. */ + set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); + break; + } +} + static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { @@ -462,6 +472,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, + .c_early_init = early_init_centaur, .c_init = init_centaur, .c_size_cache = centaur_size_cache, }; diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index ada50505a5c..13f8fa16b81 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -15,13 +15,11 @@ /* * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU */ -static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) { unsigned char ccr2, ccr3; - unsigned long flags; /* we test for DEVID by checking whether CCR3 is writable */ - local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, ccr3 ^ 0x80); getCx86(0xc0); /* dummy to change bus */ @@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) *dir0 = getCx86(CX86_DIR0); *dir1 = getCx86(CX86_DIR1); } - local_irq_restore(flags); } +static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +{ + unsigned long flags; + + local_irq_save(flags); + __do_cyrix_devid(dir0, dir1); + local_irq_restore(flags); +} /* * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in * order to identify the Cyrix CPU model after we're out of setup.c @@ -161,6 +166,24 @@ static void __cpuinit geode_configure(void) local_irq_restore(flags); } +static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c) +{ + unsigned char dir0, dir0_msn, dir1 = 0; + + __do_cyrix_devid(&dir0, &dir1); + dir0_msn = dir0 >> 4; /* identifies CPU "family" */ + + switch (dir0_msn) { + case 3: /* 6x86/6x86L */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + case 5: /* 6x86MX/M II */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + } +} static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) { @@ -416,6 +439,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_vendor = "Cyrix", .c_ident = { "CyrixInstead" }, + .c_early_init = early_init_cyrix, .c_init = init_cyrix, .c_identify = cyrix_identify, }; -- cgit v1.2.3 From 5031088dbc0cd30e893eb27d53f0e0eee6eb1c00 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 4 Sep 2008 21:09:43 +0200 Subject: x86: delay early cpu initialization until cpuid is done Move early cpu initialization after cpu early get cap so the early cpu initialization can fix up cpu caps. Signed-off-by: Krzysztof Helt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0785b3c8d04..8aab8517642 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -335,11 +335,11 @@ static void __init early_cpu_detect(void) get_cpu_vendor(c, 1); + early_get_cap(c); + if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); - - early_get_cap(c); } /* -- cgit v1.2.3 From 3da99c97763703b23cbf2bd6e96252256d4e4617 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:44 +0200 Subject: x86: make (early)_identify_cpu more the same between 32bit and 64 bit 1. add extended_cpuid_level for 32bit 2. add generic_identify for 64bit 3. add early_identify_cpu for 32bit 4. early_identify_cpu not be called by identify_cpu 5. remove early in get_cpu_vendor for 32bit 6. add get_cpu_cap 7. add cpu_detect for 64bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 138 +++++++++++++++------------------------ arch/x86/kernel/cpu/common_64.c | 139 +++++++++++++++++++++++++--------------- 2 files changed, 141 insertions(+), 136 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8aab8517642..96e1b8698d3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -96,7 +96,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) unsigned int *v; char *p, *q; - if (cpuid_eax(0x80000000) < 0x80000004) + if (c->extended_cpuid_level < 0x80000004) return 0; v = (unsigned int *) c->x86_model_id; @@ -125,7 +125,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ecx, edx, l2size; - n = cpuid_eax(0x80000000); + n = c->extended_cpuid_level; if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); @@ -186,7 +186,7 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) } -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; int i; @@ -198,8 +198,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) (cpu_devs[i]->c_ident[1] && !strcmp(v, cpu_devs[i]->c_ident[1]))) { c->x86_vendor = i; - if (!early) - this_cpu = cpu_devs[i]; + this_cpu = cpu_devs[i]; return; } } @@ -284,34 +283,30 @@ void __init cpu_detect(struct cpuinfo_x86 *c) } } } -static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) + +static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; - unsigned int ebx; + u32 ebx; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - if (have_cpuid_p()) { - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - } + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 capability, excap; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + } - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + c->extended_cpuid_level = xlvl; + if ((xlvl & 0xffff0000) == 0x80000000) { + if (xlvl >= 0x80000001) { + c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); } - } - } - /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -321,25 +316,29 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) * WARNING: this function is only called on the BP. Don't add code here * that is supposed to run on all CPUs. */ -static void __init early_cpu_detect(void) +static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - struct cpuinfo_x86 *c = &boot_cpu_data; - c->x86_cache_alignment = 32; c->x86_clflush_size = 32; if (!have_cpuid_p()) return; + c->extended_cpuid_level = 0; + + memset(&c->x86_capability, 0, sizeof c->x86_capability); + cpu_detect(c); - get_cpu_vendor(c, 1); + get_cpu_vendor(c); - early_get_cap(c); + get_cpu_cap(c); if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); + + validate_pat_support(c); } /* @@ -373,60 +372,32 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { - u32 tfms, xlvl; - unsigned int ebx; - - if (have_cpuid_p()) { - /* Get vendor name */ - cpuid(0x00000000, (unsigned int *)&c->cpuid_level, - (unsigned int *)&c->x86_vendor_id[0], - (unsigned int *)&c->x86_vendor_id[8], - (unsigned int *)&c->x86_vendor_id[4]); - - get_cpu_vendor(c, 0); - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; - c->initial_apicid = (ebx >> 24) & 0xFF; + if (!have_cpuid_p()) + return; + + c->extended_cpuid_level = 0; + + cpu_detect(c); + + get_cpu_vendor(c); + + get_cpu_cap(c); + + if (c->cpuid_level >= 0x00000001) { + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; #ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); - c->phys_proc_id = c->initial_apicid; + c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->phys_proc_id = c->initial_apicid; #else - c->apicid = c->initial_apicid; + c->apicid = c->initial_apicid; #endif - if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) - c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } + } - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - if (xlvl >= 0x80000004) - get_model_name(c); /* Default name */ - } + if (c->extended_cpuid_level >= 0x80000004) + get_model_name(c); /* Default name */ - init_scattered_cpuid_features(c); - detect_nopl(c); - } + init_scattered_cpuid_features(c); + detect_nopl(c); } static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) @@ -651,13 +622,10 @@ void __init early_cpu_init(void) { struct cpu_vendor_dev *cvdev; - for (cvdev = __x86cpuvendor_start ; - cvdev < __x86cpuvendor_end ; - cvdev++) + for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - early_cpu_detect(); - validate_pat_support(&boot_cpu_data); + early_identify_cpu(&boot_cpu_data); } /* Make sure %fs is initialized properly in idle threads */ diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 699ecbfb63e..28719fe0794 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -195,6 +195,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) printk(KERN_ERR "CPU: Your system may be unstable.\n"); } c->x86_vendor = X86_VENDOR_UNKNOWN; + this_cpu = &default_cpu; } static void __init early_cpu_support_print(void) @@ -249,56 +250,18 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) } } -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); - -void __init early_cpu_init(void) +void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { - struct cpu_vendor_dev *cvdev; - - for (cvdev = __x86cpuvendor_start ; - cvdev < __x86cpuvendor_end ; - cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - early_cpu_support_print(); - early_identify_cpu(&boot_cpu_data); -} - -/* Do some early cpuid on the boot CPU to get some parameter that are - needed before check_bugs. Everything advanced is in identify_cpu - below. */ -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) -{ - u32 tfms, xlvl; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_clflush_size = 64; - c->x86_cache_alignment = c->x86_clflush_size; - c->x86_max_cores = 1; - c->x86_coreid_bits = 0; - c->extended_cpuid_level = 0; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - /* Get vendor name */ cpuid(0x00000000, (unsigned int *)&c->cpuid_level, (unsigned int *)&c->x86_vendor_id[0], (unsigned int *)&c->x86_vendor_id[8], (unsigned int *)&c->x86_vendor_id[4]); - get_cpu_vendor(c); - - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { - __u32 misc; - cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4], - &c->x86_capability[0]); + u32 junk, tfms, cap0, misc; + cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = (tfms >> 8) & 0xf; c->x86_model = (tfms >> 4) & 0xf; c->x86_mask = tfms & 0xf; @@ -306,17 +269,32 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) + if (cap0 & (1<<19)) c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; } +} + + +static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) +{ + u32 tfms, xlvl; + u32 ebx; + + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 capability, excap; + + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + } - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; -#ifdef CONFIG_SMP - c->phys_proc_id = c->initial_apicid; -#endif /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); c->extended_cpuid_level = xlvl; @@ -325,8 +303,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[1] = cpuid_edx(0x80000001); c->x86_capability[6] = cpuid_ecx(0x80000001); } - if (xlvl >= 0x80000004) - get_model_name(c); /* Default name */ } /* Transmeta-defined flags: level 0x80860001 */ @@ -346,8 +322,26 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; } +} - detect_nopl(c); +/* Do some early cpuid on the boot CPU to get some parameter that are + needed before check_bugs. Everything advanced is in identify_cpu + below. */ +static void __init early_identify_cpu(struct cpuinfo_x86 *c) +{ + + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; + + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + c->extended_cpuid_level = 0; + + cpu_detect(c); + + get_cpu_vendor(c); + + get_cpu_cap(c); if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) @@ -356,6 +350,39 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) validate_pat_support(c); } +void __init early_cpu_init(void) +{ + struct cpu_vendor_dev *cvdev; + + for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + + early_cpu_support_print(); + early_identify_cpu(&boot_cpu_data); +} + +static void __cpuinit generic_identify(struct cpuinfo_x86 *c) +{ + c->extended_cpuid_level = 0; + + cpu_detect(c); + + get_cpu_vendor(c); + + get_cpu_cap(c); + + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; +#ifdef CONFIG_SMP + c->phys_proc_id = c->initial_apicid; +#endif + + if (c->extended_cpuid_level >= 0x80000004) + get_model_name(c); /* Default name */ + + init_scattered_cpuid_features(c); + detect_nopl(c); +} + /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -363,9 +390,19 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; - early_identify_cpu(c); + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; + c->x86_max_cores = 1; + c->x86_coreid_bits = 0; + memset(&c->x86_capability, 0, sizeof c->x86_capability); - init_scattered_cpuid_features(c); + generic_identify(c); c->apicid = phys_pkg_id(0); -- cgit v1.2.3 From 9d31d35b5f9d619bb2482235cc889326de049e29 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:44 +0200 Subject: x86: order functions in cpu/common.c and cpu/common_64.c v2 v2: make 64 bit get c->x86_cache_alignment = c->x86_clfush_size Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 190 ++++++++++++++++++++++------------------ arch/x86/kernel/cpu/common_64.c | 106 +++++++++++----------- 2 files changed, 156 insertions(+), 140 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 96e1b8698d3..10e89ae5a60 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -60,6 +60,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +/* Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. */ +void switch_to_new_gdt(void) +{ + struct desc_ptr gdt_descr; + + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +} + static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -123,15 +135,15 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { - unsigned int n, dummy, ecx, edx, l2size; + unsigned int n, dummy, ebx, ecx, edx, l2size; n = c->extended_cpuid_level; if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size = (ecx>>24)+(edx>>24); + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size = (ecx>>24) + (edx>>24); } if (n < 0x80000006) /* Some chips just has a large L1. */ @@ -185,6 +197,51 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } +#ifdef CONFIG_X86_HT +void __cpuinit detect_ht(struct cpuinfo_x86 *c) +{ + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; + + cpuid(1, &eax, &ebx, &ecx, &edx); + + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) + return; + + smp_num_siblings = (ebx & 0xff0000) >> 16; + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1) { + + if (smp_num_siblings > NR_CPUS) { + printk(KERN_WARNING "CPU: Unsupported number of siblings %d", + smp_num_siblings); + smp_num_siblings = 1; + return; + } + + index_msb = get_count_order(smp_num_siblings); + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + + smp_num_siblings = smp_num_siblings / c->x86_max_cores; + + index_msb = get_count_order(smp_num_siblings); + + core_bits = get_count_order(c->x86_max_cores); + + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & + ((1 << core_bits) - 1); + + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + } +} +#endif static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { @@ -258,7 +315,26 @@ static int __cpuinit have_cpuid_p(void) return flag_is_changeable_p(X86_EFLAGS_ID); } -void __init cpu_detect(struct cpuinfo_x86 *c) +static void __init early_cpu_support_print(void) +{ + int i,j; + struct cpu_dev *cpu_devx; + + printk("KERNEL supported cpus:\n"); + for (i = 0; i < X86_VENDOR_NUM; i++) { + cpu_devx = cpu_devs[i]; + if (!cpu_devx) + continue; + for (j = 0; j < 2; j++) { + if (!cpu_devx->c_ident[j]) + continue; + printk(" %s %s\n", cpu_devx->c_vendor, + cpu_devx->c_ident[j]); + } + } +} + +void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ cpuid(0x00000000, (unsigned int *)&c->cpuid_level, @@ -267,19 +343,20 @@ void __init cpu_detect(struct cpuinfo_x86 *c) (unsigned int *)&c->x86_vendor_id[4]); c->x86 = 4; + /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; + c->x86 = (tfms >> 8) & 0xf; + c->x86_model = (tfms >> 4) & 0xf; + c->x86_mask = tfms & 0xf; if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; + c->x86_model += ((tfms >> 16) & 0xf) << 4; if (cap0 & (1<<19)) { - c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; + c->x86_cache_alignment = c->x86_clflush_size; } } } @@ -341,6 +418,17 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) validate_pat_support(c); } +void __init early_cpu_init(void) +{ + struct cpu_vendor_dev *cvdev; + + for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + + early_cpu_support_print(); + early_identify_cpu(&boot_cpu_data); +} + /* * The NOPL instruction is supposed to exist on all CPUs with * family >= 6, unfortunately, that's not true in practice because @@ -500,7 +588,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) */ if (c != &boot_cpu_data) { /* AND the already accumulated flags with these */ - for (i = 0 ; i < NCAPINTS ; i++) + for (i = 0; i < NCAPINTS; i++) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } @@ -529,52 +617,6 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); } -#ifdef CONFIG_X86_HT -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) - return; - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of the " - "siblings %d", smp_num_siblings); - smp_num_siblings = 1; - return; - } - - index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings) ; - - core_bits = get_count_order(c->x86_max_cores); - - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); - - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - } -} -#endif - static __init int setup_noclflush(char *arg) { setup_clear_cpu_cap(X86_FEATURE_CLFLSH); @@ -592,17 +634,17 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) vendor = c->x86_vendor_id; if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk("%s ", vendor); + printk(KERN_CONT "%s ", vendor); - if (!c->x86_model_id[0]) - printk("%d86", c->x86); + if (c->x86_model_id[0]) + printk(KERN_CONT "%s", c->x86_model_id); else - printk("%s", c->x86_model_id); + printk(KERN_CONT "%d86", c->x86); if (c->x86_mask || c->cpuid_level >= 0) - printk(" stepping %02x\n", c->x86_mask); + printk(KERN_CONT " stepping %02x\n", c->x86_mask); else - printk("\n"); + printk(KERN_CONT "\n"); } static __init int setup_disablecpuid(char *arg) @@ -618,16 +660,6 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; -void __init early_cpu_init(void) -{ - struct cpu_vendor_dev *cvdev; - - for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - - early_identify_cpu(&boot_cpu_data); -} - /* Make sure %fs is initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { @@ -636,18 +668,6 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) return regs; } -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ -void switch_to_new_gdt(void) -{ - struct desc_ptr gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -} - /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 28719fe0794..522a5f2e405 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -103,9 +103,8 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " - "D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); /* On K8 L1 TLB is inclusive, so don't count it */ c->x86_tlbsize = 0; @@ -143,8 +142,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } else if (smp_num_siblings > 1) { if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of " - "siblings %d", smp_num_siblings); + printk(KERN_WARNING "CPU: Unsupported number of siblings %d", + smp_num_siblings); smp_num_siblings = 1; return; } @@ -182,7 +181,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) if (cpu_devs[i]) { if (!strcmp(v, cpu_devs[i]->c_ident[0]) || (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { + !strcmp(v, cpu_devs[i]->c_ident[1]))) { c->x86_vendor = i; this_cpu = cpu_devs[i]; return; @@ -217,39 +216,6 @@ static void __init early_cpu_support_print(void) } } -/* - * The NOPL instruction is supposed to exist on all CPUs with - * family >= 6, unfortunately, that's not true in practice because - * of early VIA chips and (more importantly) broken virtualizers that - * are not easy to detect. Hence, probe for it based on first - * principles. - * - * Note: no 64-bit chip is known to lack these, but put the code here - * for consistency with 32 bits, and to make it utterly trivial to - * diagnose the problem should it ever surface. - */ -static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) -{ - const u32 nopl_signature = 0x888c53b1; /* Random number */ - u32 has_nopl = nopl_signature; - - clear_cpu_cap(c, X86_FEATURE_NOPL); - if (c->x86 >= 6) { - asm volatile("\n" - "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ - "2:\n" - " .section .fixup,\"ax\"\n" - "3: xor %0,%0\n" - " jmp 2b\n" - " .previous\n" - _ASM_EXTABLE(1b,3b) - : "+a" (has_nopl)); - - if (has_nopl == nopl_signature) - set_cpu_cap(c, X86_FEATURE_NOPL); - } -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -258,6 +224,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) (unsigned int *)&c->x86_vendor_id[8], (unsigned int *)&c->x86_vendor_id[4]); + c->x86 = 4; /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; @@ -268,12 +235,11 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - if (cap0 & (1<<19)) + c->x86_model += ((tfms >> 16) & 0xf) << 4; + if (cap0 & (1<<19)) { c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; + c->x86_cache_alignment = c->x86_clflush_size; + } } } @@ -283,9 +249,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) u32 tfms, xlvl; u32 ebx; - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 capability, excap; @@ -361,6 +324,39 @@ void __init early_cpu_init(void) early_identify_cpu(&boot_cpu_data); } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { c->extended_cpuid_level = 0; @@ -448,7 +444,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) } -void __cpuinit identify_boot_cpu(void) +void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); } @@ -460,13 +456,6 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); } -static __init int setup_noclflush(char *arg) -{ - setup_clear_cpu_cap(X86_FEATURE_CLFLSH); - return 1; -} -__setup("noclflush", setup_noclflush); - struct msr_range { unsigned min; unsigned max; @@ -510,6 +499,13 @@ static __init int setup_show_msr(char *arg) } __setup("show_msr=", setup_show_msr); +static __init int setup_noclflush(char *arg) +{ + setup_clear_cpu_cap(X86_FEATURE_CLFLSH); + return 1; +} +__setup("noclflush", setup_noclflush); + void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { if (c->x86_model_id[0]) -- cgit v1.2.3 From 10a434fcb23a57c385177a0086955fae01003f64 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:45 +0200 Subject: x86: remove cpu_vendor_dev 1. add c_x86_vendor into cpu_dev 2. change cpu_devs to static 3. check c_x86_vendor before put that cpu_dev into array 4. remove alignment for 64bit 5. order the sequence in cpu_devs according to link sequence... so could put intel at first, then amd... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 4 +-- arch/x86/kernel/cpu/amd.c | 3 +- arch/x86/kernel/cpu/amd_64.c | 4 +-- arch/x86/kernel/cpu/centaur.c | 3 +- arch/x86/kernel/cpu/centaur_64.c | 3 +- arch/x86/kernel/cpu/common.c | 71 ++++++++++++++++++++-------------------- arch/x86/kernel/cpu/common_64.c | 71 ++++++++++++++++++++-------------------- arch/x86/kernel/cpu/cpu.h | 18 ++++------ arch/x86/kernel/cpu/cyrix.c | 6 ++-- arch/x86/kernel/cpu/intel.c | 3 +- arch/x86/kernel/cpu/intel_64.c | 3 +- arch/x86/kernel/cpu/transmeta.c | 3 +- arch/x86/kernel/cpu/umc.c | 3 +- arch/x86/kernel/vmlinux_32.lds.S | 8 ++--- arch/x86/kernel/vmlinux_64.lds.S | 9 +++-- 15 files changed, 106 insertions(+), 106 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3ede19a4e0b..403e689df0b 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -8,14 +8,14 @@ obj-y += proc.o capflags.o powerflags.o obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o +obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o +obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o -obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o -obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 18514ed2610..d64ea6097ca 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -298,6 +298,7 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_early_init = early_init_amd, .c_init = init_amd, .c_size_cache = amd_size_cache, + .c_x86_vendor = X86_VENDOR_AMD, }; -cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); +cpu_dev_register(amd_cpu_dev); diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index d1692b2a41f..d1c721c0c49 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -218,7 +218,7 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_ident = { "AuthenticAMD" }, .c_early_init = early_init_amd, .c_init = init_amd, + .c_x86_vendor = X86_VENDOR_AMD, }; -cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); - +cpu_dev_register(amd_cpu_dev); diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index a0534c04d38..e5f6d89521b 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -475,6 +475,7 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_early_init = early_init_centaur, .c_init = init_centaur, .c_size_cache = centaur_size_cache, + .c_x86_vendor = X86_VENDOR_CENTAUR, }; -cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); +cpu_dev_register(centaur_cpu_dev); diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 1d181c40e2e..49cfc6d2f2f 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -29,7 +29,8 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_ident = { "CentaurHauls" }, .c_early_init = early_init_centaur, .c_init = init_centaur, + .c_x86_vendor = X86_VENDOR_CENTAUR, }; -cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); +cpu_dev_register(centaur_cpu_dev); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 10e89ae5a60..ee1044ca481 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -75,7 +75,7 @@ void switch_to_new_gdt(void) static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; -struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; +static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { @@ -93,8 +93,9 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; +static struct cpu_dev *this_cpu __cpuinitdata; static int __init cachesize_setup(char *str) { @@ -250,21 +251,24 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) static int printed; for (i = 0; i < X86_VENDOR_NUM; i++) { - if (cpu_devs[i]) { - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - c->x86_vendor = i; - this_cpu = cpu_devs[i]; - return; - } + if (!cpu_devs[i]) + break; + + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { + this_cpu = cpu_devs[i]; + c->x86_vendor = this_cpu->c_x86_vendor; + return; } } + if (!printed) { printed++; printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } + c->x86_vendor = X86_VENDOR_UNKNOWN; this_cpu = &default_cpu; } @@ -315,25 +319,6 @@ static int __cpuinit have_cpuid_p(void) return flag_is_changeable_p(X86_EFLAGS_ID); } -static void __init early_cpu_support_print(void) -{ - int i,j; - struct cpu_dev *cpu_devx; - - printk("KERNEL supported cpus:\n"); - for (i = 0; i < X86_VENDOR_NUM; i++) { - cpu_devx = cpu_devs[i]; - if (!cpu_devx) - continue; - for (j = 0; j < 2; j++) { - if (!cpu_devx->c_ident[j]) - continue; - printk(" %s %s\n", cpu_devx->c_vendor, - cpu_devx->c_ident[j]); - } - } -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -411,21 +396,35 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_cap(c); - if (c->x86_vendor != X86_VENDOR_UNKNOWN && - cpu_devs[c->x86_vendor]->c_early_init) - cpu_devs[c->x86_vendor]->c_early_init(c); + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); validate_pat_support(c); } void __init early_cpu_init(void) { - struct cpu_vendor_dev *cvdev; + struct cpu_dev **cdev; + int count = 0; + + printk("KERNEL supported cpus:\n"); + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { + struct cpu_dev *cpudev = *cdev; + unsigned int j; - for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + if (count >= X86_VENDOR_NUM) + break; + cpu_devs[count] = cpudev; + count++; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(" %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } + } - early_cpu_support_print(); early_identify_cpu(&boot_cpu_data); } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 522a5f2e405..b82479c1083 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -66,7 +66,7 @@ void switch_to_new_gdt(void) load_gdt(&gdt_descr); } -struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; +static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { @@ -76,8 +76,9 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; +static struct cpu_dev *this_cpu __cpuinitdata; int __cpuinit get_model_name(struct cpuinfo_x86 *c) { @@ -178,44 +179,28 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) static int printed; for (i = 0; i < X86_VENDOR_NUM; i++) { - if (cpu_devs[i]) { - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - c->x86_vendor = i; - this_cpu = cpu_devs[i]; - return; - } + if (!cpu_devs[i]) + break; + + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { + this_cpu = cpu_devs[i]; + c->x86_vendor = this_cpu->c_x86_vendor; + return; } } + if (!printed) { printed++; printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } + c->x86_vendor = X86_VENDOR_UNKNOWN; this_cpu = &default_cpu; } -static void __init early_cpu_support_print(void) -{ - int i,j; - struct cpu_dev *cpu_devx; - - printk("KERNEL supported cpus:\n"); - for (i = 0; i < X86_VENDOR_NUM; i++) { - cpu_devx = cpu_devs[i]; - if (!cpu_devx) - continue; - for (j = 0; j < 2; j++) { - if (!cpu_devx->c_ident[j]) - continue; - printk(" %s %s\n", cpu_devx->c_vendor, - cpu_devx->c_ident[j]); - } - } -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -306,21 +291,35 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_cap(c); - if (c->x86_vendor != X86_VENDOR_UNKNOWN && - cpu_devs[c->x86_vendor]->c_early_init) - cpu_devs[c->x86_vendor]->c_early_init(c); + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); validate_pat_support(c); } void __init early_cpu_init(void) { - struct cpu_vendor_dev *cvdev; + struct cpu_dev **cdev; + int count = 0; + + printk("KERNEL supported cpus:\n"); + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { + struct cpu_dev *cpudev = *cdev; + unsigned int j; - for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + if (count >= X86_VENDOR_NUM) + break; + cpu_devs[count] = cpudev; + count++; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(" %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } + } - early_cpu_support_print(); early_identify_cpu(&boot_cpu_data); } diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 4d894e8565f..3cc9d92afd8 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -21,21 +21,15 @@ struct cpu_dev { void (*c_init)(struct cpuinfo_x86 * c); void (*c_identify)(struct cpuinfo_x86 * c); unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); + int c_x86_vendor; }; -extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; +#define cpu_dev_register(cpu_devX) \ + static struct cpu_dev *__cpu_dev_##cpu_devX __used \ + __attribute__((__section__(".x86_cpu_dev.init"))) = \ + &cpu_devX; -struct cpu_vendor_dev { - int vendor; - struct cpu_dev *cpu_dev; -}; - -#define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \ - static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \ - __attribute__((__section__(".x86cpuvendor.init"))) = \ - { cpu_vendor_id, cpu_dev } - -extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[]; +extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 13f8fa16b81..3f8c7283d81 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -442,14 +442,16 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_early_init = early_init_cyrix, .c_init = init_cyrix, .c_identify = cyrix_identify, + .c_x86_vendor = X86_VENDOR_CYRIX, }; -cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev); +cpu_dev_register(cyrix_cpu_dev); static struct cpu_dev nsc_cpu_dev __cpuinitdata = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, .c_init = init_nsc, + .c_x86_vendor = X86_VENDOR_NSC, }; -cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev); +cpu_dev_register(nsc_cpu_dev); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 77618c717d7..c5ac08124ad 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -303,9 +303,10 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_early_init = early_init_intel, .c_init = init_intel, .c_size_cache = intel_size_cache, + .c_x86_vendor = X86_VENDOR_INTEL, }; -cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); +cpu_dev_register(intel_cpu_dev); /* arch_initcall(intel_cpu_init); */ diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 1019c58d39f..0a8128a240d 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -90,6 +90,7 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_ident = { "GenuineIntel" }, .c_early_init = early_init_intel, .c_init = init_intel, + .c_x86_vendor = X86_VENDOR_INTEL, }; -cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); +cpu_dev_register(intel_cpu_dev); diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index b911a2c61b8..7c46e6ecedc 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -102,6 +102,7 @@ static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { .c_ident = { "GenuineTMx86", "TransmetaCPU" }, .c_init = init_transmeta, .c_identify = transmeta_identify, + .c_x86_vendor = X86_VENDOR_TRANSMETA, }; -cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev); +cpu_dev_register(transmeta_cpu_dev); diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index b1fc90989d7..e777f79e096 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c @@ -19,7 +19,8 @@ static struct cpu_dev umc_cpu_dev __cpuinitdata = { } }, }, + .c_x86_vendor = X86_VENDOR_UMC, }; -cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev); +cpu_dev_register(umc_cpu_dev); diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index af5bdad8460..21b4f7eefaa 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -140,10 +140,10 @@ SECTIONS *(.con_initcall.init) __con_initcall_end = .; } - .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { - __x86cpuvendor_start = .; - *(.x86cpuvendor.init) - __x86cpuvendor_end = .; + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + __x86_cpu_dev_start = .; + *(.x86_cpu_dev.init) + __x86_cpu_dev_end = .; } SECURITY_INIT . = ALIGN(4); diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 63e5c1a22e8..201e81a91a9 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -168,13 +168,12 @@ SECTIONS *(.con_initcall.init) } __con_initcall_end = .; - . = ALIGN(16); - __x86cpuvendor_start = .; - .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { - *(.x86cpuvendor.init) + __x86_cpu_dev_start = .; + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + *(.x86_cpu_dev.init) } - __x86cpuvendor_end = .; SECURITY_INIT + __x86_cpu_dev_end = .; . = ALIGN(8); .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { -- cgit v1.2.3 From a0854a46c5eb82588126421a9cbceb973384a644 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:46 +0200 Subject: x86: make 32bit support show_msr like 64 bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 51 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ee1044ca481..a79cf5c52b6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -616,6 +616,49 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); } +struct msr_range { + unsigned min; + unsigned max; +}; + +static struct msr_range msr_range_array[] __cpuinitdata = { + { 0x00000000, 0x00000418}, + { 0xc0000000, 0xc000040b}, + { 0xc0010000, 0xc0010142}, + { 0xc0011000, 0xc001103b}, +}; + +static void __cpuinit print_cpu_msr(void) +{ + unsigned index; + u64 val; + int i; + unsigned index_min, index_max; + + for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { + index_min = msr_range_array[i].min; + index_max = msr_range_array[i].max; + for (index = index_min; index < index_max; index++) { + if (rdmsrl_amd_safe(index, &val)) + continue; + printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + } + } +} + +static int show_msr __cpuinitdata; +static __init int setup_show_msr(char *arg) +{ + int num; + + get_option(&arg, &num); + + if (num > 0) + show_msr = num; + return 1; +} +__setup("show_msr=", setup_show_msr); + static __init int setup_noclflush(char *arg) { setup_clear_cpu_cap(X86_FEATURE_CLFLSH); @@ -644,6 +687,14 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT " stepping %02x\n", c->x86_mask); else printk(KERN_CONT "\n"); + +#ifdef CONFIG_SMP + if (c->cpu_index < show_msr) + print_cpu_msr(); +#else + if (show_msr) + print_cpu_msr(); +#endif } static __init int setup_disablecpuid(char *arg) -- cgit v1.2.3 From 01b2e16a7a9be6573cba5d594d6659b3c6cb46a0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:46 +0200 Subject: x86: make get_mode_name of 64bit the same as 32bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index b82479c1083..f1fb94e766b 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -83,6 +83,7 @@ static struct cpu_dev *this_cpu __cpuinitdata; int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; + char *p, *q; if (c->extended_cpuid_level < 0x80000004) return 0; @@ -92,6 +93,19 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); c->x86_model_id[48] = 0; + + /* Intel chips right-justify this string for some dumb reason; + undo that brain damage */ + p = q = &c->x86_model_id[0]; + while (*p == ' ') + p++; + if (p != q) { + while (*p) + *q++ = *p++; + while (q <= &c->x86_model_id[48]) + *q++ = '\0'; /* Zero-pad the rest */ + } + return 1; } -- cgit v1.2.3 From 0a488a53d7ca46ac638c30079072c57e50cfcc7b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:47 +0200 Subject: x86: move 32bit related functions together Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 251 ++++++++++++++++++++-------------------- arch/x86/kernel/cpu/common_64.c | 34 +++--- 2 files changed, 143 insertions(+), 142 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a79cf5c52b6..2b2c170e8d6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -22,6 +22,8 @@ #include "cpu.h" +static struct cpu_dev *this_cpu __cpuinitdata; + DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, @@ -58,6 +60,109 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_serial_nr __cpuinitdata = 1; + +static int __init cachesize_setup(char *str) +{ + get_option(&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + +static int __init x86_fxsr_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_XMM); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + +static int __init x86_sep_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_SEP); + return 1; +} +__setup("nosep", x86_sep_setup); + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + +/* Probe for the CPUID instruction */ +static int __cpuinit have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { + /* Disable processor serial number */ + unsigned long lo, hi; + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_cpu_cap(c, X86_FEATURE_PN); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); + } +} + +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; +} +__setup("serialnumber", x86_serial_nr_setup); + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -72,9 +177,6 @@ void switch_to_new_gdt(void) asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); } -static int cachesize_override __cpuinitdata = -1; -static int disable_x86_serial_nr __cpuinitdata = 1; - static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) @@ -95,14 +197,6 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_vendor = "Unknown", .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata; - -static int __init cachesize_setup(char *str) -{ - get_option(&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); int __cpuinit get_model_name(struct cpuinfo_x86 *c) { @@ -133,7 +227,6 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) return 1; } - void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -150,7 +243,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n < 0x80000006) /* Some chips just has a large L1. */ return; - ecx = cpuid_ecx(0x80000006); + cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); l2size = ecx >> 16; /* do processor-specific cache resizing */ @@ -167,48 +260,23 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) c->x86_cache_size = l2size; printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ + l2size, ecx & 0xFF); } #ifdef CONFIG_X86_HT void __cpuinit detect_ht(struct cpuinfo_x86 *c) { - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - cpuid(1, &eax, &ebx, &ecx, &edx); + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) + if (!cpu_has(c, X86_FEATURE_HT)) return; + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) + goto out; + + cpuid(1, &eax, &ebx, &ecx, &edx); + smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { @@ -225,8 +293,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) index_msb = get_count_order(smp_num_siblings); c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -236,10 +302,14 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); + } - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); +out: + if ((c->x86_max_cores * smp_num_siblings) > 1) { + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); } } #endif @@ -273,52 +343,6 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) this_cpu = &default_cpu; } - -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - - -static int __init x86_sep_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_SEP); - return 1; -} -__setup("nosep", x86_sep_setup); - - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - - -/* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -380,16 +404,16 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - c->x86_cache_alignment = 32; c->x86_clflush_size = 32; + c->x86_cache_alignment = c->x86_clflush_size; if (!have_cpuid_p()) return; - c->extended_cpuid_level = 0; - memset(&c->x86_capability, 0, sizeof c->x86_capability); + c->extended_cpuid_level = 0; + cpu_detect(c); get_cpu_vendor(c); @@ -487,31 +511,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) detect_nopl(c); } -static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { - /* Disable processor serial number */ - unsigned long lo, hi; - rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_cpu_cap(c, X86_FEATURE_PN); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } -} - -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); - - - /* * This does the hard work of actually picking apart the CPU stuff... */ diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index f1fb94e766b..b2da0413532 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -37,6 +37,8 @@ #include "cpu.h" +static struct cpu_dev *this_cpu __cpuinitdata; + /* We need valid kernel segments for data and code in long mode too * IRET will check the segment types kkeil 2000/10/28 * Also sysret mandates a special GDT layout @@ -78,7 +80,6 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_vendor = "Unknown", .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata; int __cpuinit get_model_name(struct cpuinfo_x86 *c) { @@ -112,7 +113,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { - unsigned int n, dummy, ebx, ecx, edx; + unsigned int n, dummy, ebx, ecx, edx, l2size; n = c->extended_cpuid_level; @@ -125,15 +126,17 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) c->x86_tlbsize = 0; } - if (n >= 0x80000006) { - cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); - ecx = cpuid_ecx(0x80000006); - c->x86_cache_size = ecx >> 16; - c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); + if (n < 0x80000006) /* Some chips just has a large L1. */ + return; - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - c->x86_cache_size, ecx & 0xFF); - } + cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); + l2size = ecx >> 16; + c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); + + c->x86_cache_size = l2size; + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + l2size, ecx & 0xFF); } void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -142,14 +145,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) u32 eax, ebx, ecx, edx; int index_msb, core_bits; - cpuid(1, &eax, &ebx, &ecx, &edx); - - if (!cpu_has(c, X86_FEATURE_HT)) return; if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) goto out; + cpuid(1, &eax, &ebx, &ecx, &edx); + smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { @@ -175,6 +177,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) c->cpu_core_id = phys_pkg_id(index_msb) & ((1 << core_bits) - 1); } + out: if ((c->x86_max_cores * smp_num_siblings) > 1) { printk(KERN_INFO "CPU: Physical Processor ID: %d\n", @@ -182,7 +185,6 @@ out: printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); } - #endif } @@ -405,10 +407,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_clflush_size = 64; - c->x86_cache_alignment = c->x86_clflush_size; c->x86_max_cores = 1; c->x86_coreid_bits = 0; + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); generic_identify(c); -- cgit v1.2.3 From 6ac40ed0413ef4096720f966e11c7cdf259eee3f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 4 Sep 2008 10:41:22 -0700 Subject: x86: quick TSC calibration Introduce a fast TSC-calibration method on sane hardware. It only uses 17920 PIT timer ticks to calibrate the TSC, plus 256 ticks on each side to make sure the TSC values were very close to the tick, so the whole calibration takes 15ms. Yet, despite only takign 15ms, we can actually give pretty stringent guarantees of accuracy: - the code requires that we hit each 256-counter block at least 50 times, so the TSC error is basically at *MOST* just a few PIT cycles off in any direction. In practice, it's going to be about one microseconds off (which is how long it takes to read the counter) - so over 17920 PIT cycles, we can pretty much guarantee that the calibration error is less than one half of a percent. My testing bears this out: on my machine, the quick-calibration reports 2934.085kHz, while the slow one reports 2933.415. Yes, the slower calibration is still more precise. For me, the slow calibration is stable to within about one hundreth of a percent, so it's (at a guess) roughly an order-and-a-half of magnitude more precise. The longer you wait, the more precise you can be. However, the nice thing about the fast TSC PIT synchronization is that it's pretty much _guaranteed_ to give that 0.5% precision, and fail gracefully (and very quickly) if it doesn't get it. And it really is fairly simple (even if there's a lot of _details_ there, and I didn't get all of those right ont he first try or even the second ;) The patch says "110 insertions", but 63 of those new lines are actually comments. Signed-off-by: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 110 insertions(+), 1 deletions(-) --- arch/x86/kernel/tsc.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index da033b5b3e1..839070ba846 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -227,6 +227,117 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) return delta; } +/* + * This reads the current MSB of the PIT counter, and + * checks if we are running on sufficiently fast and + * non-virtualized hardware. + * + * Our expectations are: + * + * - the PIT is running at roughly 1.19MHz + * + * - each IO is going to take about 1us on real hardware, + * but we allow it to be much faster (by a factor of 10) or + * _slightly_ slower (ie we allow up to a 2us read+counter + * update - anything else implies a unacceptably slow CPU + * or PIT for the fast calibration to work. + * + * - with 256 PIT ticks to read the value, we have 214us to + * see the same MSB (and overhead like doing a single TSC + * read per MSB value etc). + * + * - We're doing 2 reads per loop (LSB, MSB), and we expect + * them each to take about a microsecond on real hardware. + * So we expect a count value of around 100. But we'll be + * generous, and accept anything over 50. + * + * - if the PIT is stuck, and we see *many* more reads, we + * return early (and the next caller of pit_expect_msb() + * then consider it a failure when they don't see the + * next expected value). + * + * These expectations mean that we know that we have seen the + * transition from one expected value to another with a fairly + * high accuracy, and we didn't miss any events. We can thus + * use the TSC value at the transitions to calculate a pretty + * good value for the TSC frequencty. + */ +static inline int pit_expect_msb(unsigned char val) +{ + int count = 0; + + for (count = 0; count < 50000; count++) { + /* Ignore LSB */ + inb(0x42); + if (inb(0x42) != val) + break; + } + return count > 50; +} + +/* + * How many MSB values do we want to see? We aim for a + * 15ms calibration, which assuming a 2us counter read + * error should give us roughly 150 ppm precision for + * the calibration. + */ +#define QUICK_PIT_MS 15 +#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) + +static unsigned long quick_pit_calibrate(void) +{ + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Counter 2, mode 0 (one-shot), binary count + * + * NOTE! Mode 2 decrements by two (and then the + * output is flipped each time, giving the same + * final output frequency as a decrement-by-one), + * so mode 0 is much better when looking at the + * individual counts. + */ + outb(0xb0, 0x43); + + /* Start at 0xffff */ + outb(0xff, 0x42); + outb(0xff, 0x42); + + if (pit_expect_msb(0xff)) { + int i; + u64 t1, t2, delta; + unsigned char expect = 0xfe; + + t1 = get_cycles(); + for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { + if (!pit_expect_msb(expect)) + goto failed; + } + t2 = get_cycles(); + + /* + * Ok, if we get here, then we've seen the + * MSB of the PIT decrement QUICK_PIT_ITERATIONS + * times, and each MSB had many hits, so we never + * had any sudden jumps. + * + * As a result, we can depend on there not being + * any odd delays anywhere, and the TSC reads are + * reliable. + * + * kHz = ticks / time-in-seconds / 1000; + * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000 + * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000) + */ + delta = (t2 - t1)*PIT_TICK_RATE; + do_div(delta, QUICK_PIT_ITERATIONS*256*1000); + printk("Fast TSC calibration using PIT\n"); + return delta; + } +failed: + return 0; +} /** * native_calibrate_tsc - calibrate the tsc on boot @@ -235,9 +346,15 @@ unsigned long native_calibrate_tsc(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags, latch, ms; + unsigned long flags, latch, ms, fast_calibrate; int hpet = is_hpet_enabled(), i, loopmin; + local_irq_save(flags); + fast_calibrate = quick_pit_calibrate(); + local_irq_restore(flags); + if (fast_calibrate) + return fast_calibrate; + /* * Run 5 calibration loops to get the lowest frequency value * (the best estimate). We use two different calibration modes -- cgit v1.2.3 From 4156e9a8ef5b521185f451213d33fa661f38512e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 4 Sep 2008 22:47:47 +0200 Subject: x86: quick TSC calibration, improve - make sure the final TSC timestamp is reliable too Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 839070ba846..6dab90f6851 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -316,6 +316,12 @@ static unsigned long quick_pit_calibrate(void) } t2 = get_cycles(); + /* + * Make sure we can rely on the second TSC timestamp: + */ + if (!pit_expect_msb(--expect)) + goto failed; + /* * Ok, if we get here, then we've seen the * MSB of the PIT decrement QUICK_PIT_ITERATIONS -- cgit v1.2.3 From 97e4db7c8719f67c52793eeca5ec4df4c3407f2a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:08:59 -0700 Subject: x86: make detect_ht depend on CONFIG_X86_HT 64-bit has X86_HT set too, so use that instead of SMP. This also removes a include/asm-x86/processor.h ifdef. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/cpu/common_64.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7d5a07f0fd2..1f80ce07daa 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -263,9 +263,9 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) l2size, ecx & 0xFF); } -#ifdef CONFIG_X86_HT void __cpuinit detect_ht(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_HT u32 eax, ebx, ecx, edx; int index_msb, core_bits; @@ -311,8 +311,8 @@ out: printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); } -} #endif +} static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index bcb48ce05d2..8e630734e1f 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -141,7 +141,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) void __cpuinit detect_ht(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT u32 eax, ebx, ecx, edx; int index_msb, core_bits; -- cgit v1.2.3 From f0fc4aff1fa4db1c201593422dcbf85c9897ec4f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:00 -0700 Subject: x86: make header file the same in arch/x86/kernel/cpu/common_xx.c Make the files more similar in preparation to unification, no code changed. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 22 +++++++++++++++++++--- arch/x86/kernel/cpu/common_64.c | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1f80ce07daa..6bbdbc24f2b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,25 +1,41 @@ #include +#include +#include #include +#include +#include +#include +#include +#include #include #include -#include #include -#include -#include #include #include #include +#include #include #include #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include #include +#include #endif +#include +#include +#include +#include +#include +#include +#include +#include + #include "cpu.h" static struct cpu_dev *this_cpu __cpuinitdata; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 8e630734e1f..5daec699acf 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -24,6 +24,7 @@ #include #include #include +#include #endif #include #include @@ -33,7 +34,6 @@ #include #include #include -#include #include "cpu.h" -- cgit v1.2.3 From 950ad7ff6ec17fc1b47abc95c5c74628eb1adf8b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:01 -0700 Subject: x86: same gdt_page with macro Move the 32-bit and 64-bit gdt_page definitions next to each other, separated with an #ifdef. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 17 +++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6bbdbc24f2b..e0ca51f4f2d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -40,6 +40,22 @@ static struct cpu_dev *this_cpu __cpuinitdata; +#ifdef CONFIG_X86_64 +/* We need valid kernel segments for data and code in long mode too + * IRET will check the segment types kkeil 2000/10/28 + * Also sysret mandates a special GDT layout + */ +/* The TLS descriptors are currently at a different place compared to i386. + Hopefully nobody expects them at a fixed place (Wine?) */ +DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, + [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, +} }; +#else DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, @@ -74,6 +90,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, } }; +#endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); static int cachesize_override __cpuinitdata = -1; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 5daec699acf..b4890504284 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -39,6 +39,7 @@ static struct cpu_dev *this_cpu __cpuinitdata; +#ifdef CONFIG_X86_64 /* We need valid kernel segments for data and code in long mode too * IRET will check the segment types kkeil 2000/10/28 * Also sysret mandates a special GDT layout @@ -53,6 +54,42 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, } }; +#else +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, + /* + * Segments used for calling PnP BIOS have byte granularity. + * They code segments and data segments have fixed 64k limits, + * the transfer segment sizes are set at run time. + */ + /* 32-bit code */ + [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, + /* 16-bit code */ + [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, + /* + * The APM segments have byte granularity and their bases + * are set at run time. All have 64k limits. + */ + /* 32-bit code */ + [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, + /* 16-bit code */ + [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, + /* data */ + [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, + + [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, + [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, +} }; +#endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; -- cgit v1.2.3 From ba51dced0b55eb62874e1646d5eeff344c3d4e67 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:02 -0700 Subject: x86: cpu/common.c, let 64-bit code have 32-bit only functions No effect on 64-bit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 +++ arch/x86/kernel/cpu/common_64.c | 111 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e0ca51f4f2d..9128ba0c8d3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -93,6 +93,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +#ifdef CONFIG_X86_32 static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -195,6 +196,13 @@ static int __init x86_serial_nr_setup(char *s) return 1; } __setup("serialnumber", x86_serial_nr_setup); +#else +/* Probe for the CPUID instruction */ +static inline int have_cpuid_p(void) +{ + return 1; +} +#endif __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index b4890504284..40c9d89cc14 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -92,6 +92,117 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +#ifdef CONFIG_X86_32 +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_serial_nr __cpuinitdata = 1; + +static int __init cachesize_setup(char *str) +{ + get_option(&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + +static int __init x86_fxsr_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_XMM); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + +static int __init x86_sep_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_SEP); + return 1; +} +__setup("nosep", x86_sep_setup); + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + +/* Probe for the CPUID instruction */ +static int __cpuinit have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { + /* Disable processor serial number */ + unsigned long lo, hi; + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_cpu_cap(c, X86_FEATURE_PN); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); + } +} + +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; +} +__setup("serialnumber", x86_serial_nr_setup); +#else +/* Probe for the CPUID instruction */ +static inline int have_cpuid_p(void) +{ + return 1; +} +#endif + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, -- cgit v1.2.3 From d5494d4f517158b1d2eea1ae33f6c264eb12675f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:03 -0700 Subject: x86: cpu/common*.c, make 32-bit have 64-bit only functions Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 138 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 12 ++++ 2 files changed, 150 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9128ba0c8d3..dcd3ebd5ba6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -750,6 +750,143 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; +#ifdef CONFIG_X86_64 +struct x8664_pda **_cpu_pda __read_mostly; +EXPORT_SYMBOL(_cpu_pda); + +struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; + +char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; + +unsigned long __supported_pte_mask __read_mostly = ~0UL; +EXPORT_SYMBOL_GPL(__supported_pte_mask); + +static int do_not_nx __cpuinitdata; + +/* noexec=on|off +Control non executable mappings for 64bit processes. + +on Enable(default) +off Disable +*/ +static int __init nonx_setup(char *str) +{ + if (!str) + return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; + do_not_nx = 0; + } else if (!strncmp(str, "off", 3)) { + do_not_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } + return 0; +} +early_param("noexec", nonx_setup); + +int force_personality32; + +/* noexec32=on|off +Control non executable heap for 32bit processes. +To control the stack too use noexec=off + +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +off PROT_READ implies PROT_EXEC +*/ +static int __init nonx32_setup(char *str) +{ + if (!strcmp(str, "on")) + force_personality32 &= ~READ_IMPLIES_EXEC; + else if (!strcmp(str, "off")) + force_personality32 |= READ_IMPLIES_EXEC; + return 1; +} +__setup("noexec32=", nonx32_setup); + +void pda_init(int cpu) +{ + struct x8664_pda *pda = cpu_pda(cpu); + + /* Setup up data that may be needed in __get_free_pages early */ + loadsegment(fs, 0); + loadsegment(gs, 0); + /* Memory clobbers used to order PDA accessed */ + mb(); + wrmsrl(MSR_GS_BASE, pda); + mb(); + + pda->cpunumber = cpu; + pda->irqcount = -1; + pda->kernelstack = (unsigned long)stack_thread_info() - + PDA_STACKOFFSET + THREAD_SIZE; + pda->active_mm = &init_mm; + pda->mmu_state = 0; + + if (cpu == 0) { + /* others are initialized in smpboot.c */ + pda->pcurrent = &init_task; + pda->irqstackptr = boot_cpu_stack; + pda->irqstackptr += IRQSTACKSIZE - 64; + } else { + if (!pda->irqstackptr) { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", + cpu); + pda->irqstackptr += IRQSTACKSIZE - 64; + } + + if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) + pda->nodenumber = cpu_to_node(cpu); + } +} + +char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + + DEBUG_STKSZ] __page_aligned_bss; + +extern asmlinkage void ignore_sysret(void); + +/* May not be marked __init: used by software suspend */ +void syscall_init(void) +{ + /* + * LSTAR and STAR live in a bit strange symbiosis. + * They both write to the same internal register. STAR allows to + * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. + */ + wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); + wrmsrl(MSR_LSTAR, system_call); + wrmsrl(MSR_CSTAR, ignore_sysret); + +#ifdef CONFIG_IA32_EMULATION + syscall32_cpu_init(); +#endif + + /* Flags to clear on syscall */ + wrmsrl(MSR_SYSCALL_MASK, + X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); +} + +void __cpuinit check_efer(void) +{ + unsigned long efer; + + rdmsrl(MSR_EFER, efer); + if (!(efer & EFER_NX) || do_not_nx) + __supported_pte_mask &= ~_PAGE_NX; +} + +unsigned long kernel_eflags; + +/* + * Copies of the original ist values from the tss are only accessed during + * debugging, no special alignment required. + */ +DEFINE_PER_CPU(struct orig_ist, orig_ist); + +#else + /* Make sure %fs is initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { @@ -757,6 +894,7 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) regs->fs = __KERNEL_PERCPU; return regs; } +#endif /* * cpu_init() initializes state that is per-CPU. Some data is already diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 40c9d89cc14..9f2a6ece82d 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -704,6 +704,7 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; +#ifdef CONFIG_X86_64 struct x8664_pda **_cpu_pda __read_mostly; EXPORT_SYMBOL(_cpu_pda); @@ -838,6 +839,17 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); +#else + +/* Make sure %fs is initialized properly in idle threads */ +struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) +{ + memset(regs, 0, sizeof(struct pt_regs)); + regs->fs = __KERNEL_PERCPU; + return regs; +} +#endif + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT -- cgit v1.2.3 From 1ba76586f778be327e452180d8378e40ee70f066 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:04 -0700 Subject: x86: cpu/common*.c have same cpu_init(), with copying and #ifdef hard to merge by lines... (as here we have material differences between 32-bit and 64-bit mode) - will try to do it later. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 124 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 88 +++++++++++++++++++++++++++- 2 files changed, 211 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dcd3ebd5ba6..f44678db161 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -901,7 +901,129 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. + * A lot of state is already set up in PDA init for 64 bit */ +#ifdef CONFIG_X86_64 +void __cpuinit cpu_init(void) +{ + int cpu = stack_smp_processor_id(); + struct tss_struct *t = &per_cpu(init_tss, cpu); + struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); + unsigned long v; + char *estacks = NULL; + struct task_struct *me; + int i; + + /* CPU 0 is initialised in head64.c */ + if (cpu != 0) + pda_init(cpu); + else + estacks = boot_exception_stacks; + + me = current; + + if (cpu_test_and_set(cpu, cpu_initialized)) + panic("CPU#%d already initialized!\n", cpu); + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + + /* + * Initialize the per-CPU GDT with the boot GDT, + * and set up the GDT descriptor: + */ + + switch_to_new_gdt(); + load_idt((const struct desc_ptr *)&idt_descr); + + memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); + syscall_init(); + + wrmsrl(MSR_FS_BASE, 0); + wrmsrl(MSR_KERNEL_GS_BASE, 0); + barrier(); + + check_efer(); + if (cpu != 0 && x2apic) + enable_x2apic(); + + /* + * set up and load the per-CPU TSS + */ + if (!orig_ist->ist[0]) { + static const unsigned int order[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + }; + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception " + "stack %ld %d\n", v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; + } + } + + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + /* + * <= is required because the CPU will access up to + * 8 bits beyond the end of the IO permission bitmap. + */ + for (i = 0; i <= IO_BITMAP_LONGS; i++) + t->io_bitmap[i] = ~0UL; + + atomic_inc(&init_mm.mm_count); + me->active_mm = &init_mm; + if (me->mm) + BUG(); + enter_lazy_tlb(&init_mm, me); + + load_sp0(t, ¤t->thread); + set_tss_desc(cpu, t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_KGDB + /* + * If the kgdb is connected no debug regs should be altered. This + * is only applicable when KGDB and a KGDB I/O module are built + * into the kernel and you are using early debugging with + * kgdbwait. KGDB will control the kernel HW breakpoint registers. + */ + if (kgdb_connected && arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); + else { +#endif + /* + * Clear all 6 debug registers: + */ + + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); +#ifdef CONFIG_KGDB + /* If the kgdb is connected no debug regs should be altered. */ + } +#endif + + fpu_init(); + + raw_local_save_flags(kernel_eflags); + + if (is_uv_system()) + uv_cpu_init(); +} + +#else + void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); @@ -982,3 +1104,5 @@ void __cpuinit cpu_uninit(void) per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; } #endif + +#endif diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 9f2a6ece82d..2bd0ed5abb0 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -855,8 +855,9 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init. + * A lot of state is already set up in PDA init for 64 bit */ +#ifdef CONFIG_X86_64 void __cpuinit cpu_init(void) { int cpu = stack_smp_processor_id(); @@ -974,3 +975,88 @@ void __cpuinit cpu_init(void) if (is_uv_system()) uv_cpu_init(); } + +#else + +void __cpuinit cpu_init(void) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + struct tss_struct *t = &per_cpu(init_tss, cpu); + struct thread_struct *thread = &curr->thread; + + if (cpu_test_and_set(cpu, cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); + for (;;) local_irq_enable(); + } + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + + load_idt(&idt_descr); + switch_to_new_gdt(); + + /* + * Set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + curr->active_mm = &init_mm; + if (curr->mm) + BUG(); + enter_lazy_tlb(&init_mm, curr); + + load_sp0(t, thread); + set_tss_desc(cpu, t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_DOUBLEFAULT + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); +#endif + + /* Clear %gs. */ + asm volatile ("mov %0, %%gs" : : "r" (0)); + + /* Clear all 6 debug registers: */ + set_debugreg(0, 0); + set_debugreg(0, 1); + set_debugreg(0, 2); + set_debugreg(0, 3); + set_debugreg(0, 6); + set_debugreg(0, 7); + + /* + * Force FPU initialization: + */ + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; + clear_used_math(); + mxcsr_feature_mask_init(); + + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + + xsave_init(); +} + +#ifdef CONFIG_HOTPLUG_CPU +void __cpuinit cpu_uninit(void) +{ + int cpu = raw_smp_processor_id(); + cpu_clear(cpu, cpu_initialized); + + /* lazy TLB state */ + per_cpu(cpu_tlbstate, cpu).state = 0; + per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; +} +#endif + +#endif -- cgit v1.2.3 From fab334c1d5f24d23d12f98ad652d399279cd03ce Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:05 -0700 Subject: x86: cpu/common*.c, merge switch_to_new_gdt() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 ++ arch/x86/kernel/cpu/common_64.c | 3 +++ 2 files changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f44678db161..43d5287bb2a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -215,7 +215,9 @@ void switch_to_new_gdt(void) gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); +#ifdef CONFIG_X86_32 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +#endif } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 2bd0ed5abb0..d7b996518f8 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -214,6 +214,9 @@ void switch_to_new_gdt(void) gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); +#ifdef CONFIG_X86_32 + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +#endif } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; -- cgit v1.2.3 From b9e67f00424e164dcd29391eb48dc941db8691ad Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:06 -0700 Subject: x86: cpu/common.c, merge default_init() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++++ arch/x86/kernel/cpu/common_64.c | 12 ++++++++++++ 2 files changed, 16 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 43d5287bb2a..2c4bfa2e56a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -224,6 +224,9 @@ static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 + display_cacheinfo(c); +#else /* Not much we can do here... */ /* Check if at least it has cpuid */ if (c->cpuid_level == -1) { @@ -233,6 +236,7 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) else if (c->x86 == 3) strcpy(c->x86_model_id, "386"); } +#endif } static struct cpu_dev __cpuinitdata default_cpu = { diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index d7b996518f8..2fda1097481 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -223,7 +223,19 @@ static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 display_cacheinfo(c); +#else + /* Not much we can do here... */ + /* Check if at least it has cpuid */ + if (c->cpuid_level == -1) { + /* No cpuid. It must be an ancient CPU */ + if (c->x86 == 4) + strcpy(c->x86_model_id, "486"); + else if (c->x86 == 3) + strcpy(c->x86_model_id, "386"); + } +#endif } static struct cpu_dev __cpuinitdata default_cpu = { -- cgit v1.2.3 From 140fc72709278989f08eb756d16a70008bdcc409 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:07 -0700 Subject: x86: cpu/common*.c, merge display_cacheinfo() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 ++++++++ arch/x86/kernel/cpu/common_64.c | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2c4bfa2e56a..f9191207718 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -285,6 +285,10 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); +#ifdef CONFIG_X86_64 + /* On K8 L1 TLB is inclusive, so don't count it */ + c->x86_tlbsize = 0; +#endif } if (n < 0x80000006) /* Some chips just has a large L1. */ @@ -293,6 +297,9 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); l2size = ecx >> 16; +#ifdef CONFIG_X86_64 + c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); +#else /* do processor-specific cache resizing */ if (this_cpu->c_size_cache) l2size = this_cpu->c_size_cache(c, l2size); @@ -303,6 +310,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (l2size == 0) return; /* Again, no L2 cache is possible */ +#endif c->x86_cache_size = l2size; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 2fda1097481..f7a2d524b1e 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -285,8 +285,10 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); +#ifdef CONFIG_X86_64 /* On K8 L1 TLB is inclusive, so don't count it */ c->x86_tlbsize = 0; +#endif } if (n < 0x80000006) /* Some chips just has a large L1. */ @@ -294,7 +296,22 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); l2size = ecx >> 16; + +#ifdef CONFIG_X86_64 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); +#else + + /* do processor-specific cache resizing */ + if (this_cpu->c_size_cache) + l2size = this_cpu->c_size_cache(c, l2size); + + /* Allow user to override all this if necessary. */ + if (cachesize_override != -1) + l2size = cachesize_override; + + if (l2size == 0) + return; /* Again, no L2 cache is possible */ +#endif c->x86_cache_size = l2size; -- cgit v1.2.3 From 1cd78776c7d5487e3000426d0ce1ff203c5d60cd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:08 -0700 Subject: x86: cpu/common*.c, merge detect_ht() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 13 ++++++++++++- arch/x86/kernel/cpu/common_64.c | 9 +++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f9191207718..b2018f76c94 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -330,6 +330,9 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) goto out; + if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) + return; + cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; @@ -346,8 +349,11 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); +#ifdef CONFIG_X86_64 + c->phys_proc_id = phys_pkg_id(index_msb); +#else c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - +#endif smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -355,8 +361,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); +#ifdef CONFIG_X86_64 + c->cpu_core_id = phys_pkg_id(index_msb) & + ((1 << core_bits) - 1); +#else c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); +#endif } out: diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index f7a2d524b1e..7ac0a00743c 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -349,7 +349,11 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); +#ifdef CONFIG_X86_64 c->phys_proc_id = phys_pkg_id(index_msb); +#else + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); +#endif smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -357,8 +361,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); +#ifdef CONFIG_X86_64 c->cpu_core_id = phys_pkg_id(index_msb) & ((1 << core_bits) - 1); +#else + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & + ((1 << core_bits) - 1); +#endif } out: -- cgit v1.2.3 From 5122c890ba969e6efeaba9ed45f5936e62d3c6d5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:09 -0700 Subject: x86: cpu/common.c: merge get_cpu_cap() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 20 ++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 2 ++ 2 files changed, 22 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b2018f76c94..ffc2f5ed09a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -458,6 +458,26 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[6] = cpuid_ecx(0x80000001); } } + +#ifdef CONFIG_X86_64 + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ((xlvl & 0xffff0000) == 0x80860000) { + /* Don't set x86_cpuid_level here for now to not confuse. */ + if (xlvl >= 0x80860001) + c->x86_capability[2] = cpuid_edx(0x80860001); + } + + if (c->extended_cpuid_level >= 0x80000007) + c->x86_power = cpuid_edx(0x80000007); + + if (c->extended_cpuid_level >= 0x80000008) { + u32 eax = cpuid_eax(0x80000008); + + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } +#endif } /* * Do minimum CPU detection early. diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 7ac0a00743c..75bb7ff99ee 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -461,6 +461,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } } +#ifdef CONFIG_X86_64 /* Transmeta-defined flags: level 0x80860001 */ xlvl = cpuid_eax(0x80860000); if ((xlvl & 0xffff0000) == 0x80860000) { @@ -478,6 +479,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; } +#endif } /* Do some early cpuid on the boot CPU to get some parameter that are -- cgit v1.2.3 From 6627d2423067f2c6eedb422a59fba9270a3c5e36 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:10 -0700 Subject: x86: cpu/common*.c, merge early_identify_cpu() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++++ arch/x86/kernel/cpu/common_64.c | 19 ++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ffc2f5ed09a..61a70748213 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -490,7 +490,11 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 + c->x86_clflush_size = 64; +#else c->x86_clflush_size = 32; +#endif c->x86_cache_alignment = c->x86_clflush_size; if (!have_cpuid_p()) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 75bb7ff99ee..6475548d64e 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -482,15 +482,28 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) #endif } -/* Do some early cpuid on the boot CPU to get some parameter that are - needed before check_bugs. Everything advanced is in identify_cpu - below. */ +/* + * Do minimum CPU detection early. + * Fields really needed: vendor, cpuid_level, family, model, mask, + * cache alignment. + * The others are not touched to avoid unwanted side effects. + * + * WARNING: this function is only called on the BP. Don't add code here + * that is supposed to run on all CPUs. + */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 c->x86_clflush_size = 64; +#else + c->x86_clflush_size = 32; +#endif c->x86_cache_alignment = c->x86_clflush_size; + if (!have_cpuid_p()) + return; + memset(&c->x86_capability, 0, sizeof c->x86_capability); c->extended_cpuid_level = 0; -- cgit v1.2.3 From 56f0d033be2ebb983993e5d7f24ae232c9a1e7f9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:11 -0700 Subject: x86: cpu/common*.c: merge print_cpu_info() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 6475548d64e..68b06a39dca 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -730,8 +730,20 @@ __setup("noclflush", setup_noclflush); void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { + char *vendor = NULL; + + if (c->x86_vendor < X86_VENDOR_NUM) + vendor = this_cpu->c_vendor; + else if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + + if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) + printk(KERN_CONT "%s ", vendor); + if (c->x86_model_id[0]) printk(KERN_CONT "%s", c->x86_model_id); + else + printk(KERN_CONT "%d86", c->x86); if (c->x86_mask || c->cpuid_level >= 0) printk(KERN_CONT " stepping %02x\n", c->x86_mask); -- cgit v1.2.3 From b89d3b3e2caeab3d895301d1aee3cd2a91eddb79 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:12 -0700 Subject: x86: cpu/common*.c, merge generic_identify() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 15 ++++++++++++--- arch/x86/kernel/cpu/common_64.c | 19 ++++++++++++++++--- 2 files changed, 28 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 61a70748213..f35baa7f303 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -548,6 +548,10 @@ void __init early_cpu_init(void) * of early VIA chips and (more importantly) broken virtualizers that * are not easy to detect. Hence, probe for it based on first * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. */ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) { @@ -586,11 +590,16 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) if (c->cpuid_level >= 0x00000001) { c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; -#ifdef CONFIG_X86_HT +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_HT c->apicid = phys_pkg_id(c->initial_apicid, 0); - c->phys_proc_id = c->initial_apicid; -#else +# else c->apicid = c->initial_apicid; +# endif +#endif + +#ifdef CONFIG_X86_HT + c->phys_proc_id = c->initial_apicid; #endif } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 68b06a39dca..869a6ff9f7d 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -581,6 +581,9 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { + if (!have_cpuid_p()) + return; + c->extended_cpuid_level = 0; cpu_detect(c); @@ -589,11 +592,21 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) get_cpu_cap(c); - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; -#ifdef CONFIG_SMP - c->phys_proc_id = c->initial_apicid; + if (c->cpuid_level >= 0x00000001) { + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_HT + c->apicid = phys_pkg_id(c->initial_apicid, 0); +# else + c->apicid = c->initial_apicid; +# endif #endif +#ifdef CONFIG_X86_HT + c->phys_proc_id = c->initial_apicid; +#endif + } + if (c->extended_cpuid_level >= 0x80000004) get_model_name(c); /* Default name */ -- cgit v1.2.3 From 102bbe3ab83c7ac04461d277df23cd5acdb49892 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:13 -0700 Subject: x86: cpu/common*.c, merge identify_cpu() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 89 ++++++++++++++++++++---------- arch/x86/kernel/cpu/common_64.c | 116 ++++++++++++++++++++++++++++++---------- 2 files changed, 147 insertions(+), 58 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f35baa7f303..eef868c97b8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -104,34 +104,6 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - static int __init x86_fxsr_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_FXSR); @@ -197,13 +169,48 @@ static int __init x86_serial_nr_setup(char *s) } __setup("serialnumber", x86_serial_nr_setup); #else +static inline int flag_is_changeable_p(u32 flag) +{ + return 1; +} /* Probe for the CPUID instruction */ static inline int have_cpuid_p(void) { return 1; } +static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ +} #endif +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -620,12 +627,18 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->loops_per_jiffy = loops_per_jiffy; c->x86_cache_size = -1; c->x86_vendor = X86_VENDOR_UNKNOWN; - c->cpuid_level = -1; /* CPUID not detected */ c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; +#ifdef CONFIG_X86_64 + c->x86_coreid_bits = 0; + c->x86_clflush_size = 64; +#else + c->cpuid_level = -1; /* CPUID not detected */ c->x86_clflush_size = 32; +#endif + c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -644,6 +657,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_identify) this_cpu->c_identify(c); +#ifdef CONFIG_X86_64 + c->apicid = phys_pkg_id(0); +#endif + /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are @@ -677,6 +694,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86, c->x86_model); } +#ifdef CONFIG_X86_64 + detect_ht(c); +#endif + /* * On SMP, boot_cpu_data holds the common feature set between * all CPUs; so make sure that we indicate which features are @@ -693,24 +714,34 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) for (i = 0; i < NCAPINTS; i++) c->x86_capability[i] &= ~cleared_cpu_caps[i]; +#ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ mcheck_init(c); +#endif select_idle_routine(c); + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + numa_add_cpu(smp_processor_id()); +#endif } void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); +#ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); +#endif } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) { BUG_ON(c == &boot_cpu_data); identify_cpu(c); +#ifdef CONFIG_X86_32 enable_sep_cpu(); +#endif mtrr_ap_init(); } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 869a6ff9f7d..6a42371a609 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -103,34 +103,6 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - static int __init x86_fxsr_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_FXSR); @@ -196,13 +168,48 @@ static int __init x86_serial_nr_setup(char *s) } __setup("serialnumber", x86_serial_nr_setup); #else +static inline int flag_is_changeable_p(u32 flag) +{ + return 1; +} /* Probe for the CPUID instruction */ static inline int have_cpuid_p(void) { return 1; } +static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ +} #endif +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -628,14 +635,35 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; +#ifdef CONFIG_X86_64 c->x86_coreid_bits = 0; c->x86_clflush_size = 64; +#else + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_clflush_size = 32; +#endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); + if (!have_cpuid_p()) { + /* + * First of all, decide if this is a 486 or higher + * It's a 486 if we can modify the AC flag + */ + if (flag_is_changeable_p(X86_EFLAGS_AC)) + c->x86 = 4; + else + c->x86 = 3; + } + generic_identify(c); + if (this_cpu->c_identify) + this_cpu->c_identify(c); + +#ifdef CONFIG_X86_64 c->apicid = phys_pkg_id(0); +#endif /* * Vendor-specific initialization. In this section we @@ -650,7 +678,29 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_init) this_cpu->c_init(c); + /* Disable the PN if appropriate */ + squash_the_stupid_serial_number(c); + + /* + * The vendor-specific functions might have changed features. Now + * we do "generic changes." + */ + + /* If the model name is still unset, do table lookup. */ + if (!c->x86_model_id[0]) { + char *p; + p = table_lookup_model(c); + if (p) + strcpy(c->x86_model_id, p); + else + /* Last resort... */ + sprintf(c->x86_model_id, "%02x/%02x", + c->x86, c->x86_model); + } + +#ifdef CONFIG_X86_64 detect_ht(c); +#endif /* * On SMP, boot_cpu_data holds the common feature set between @@ -669,11 +719,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[i] &= ~cleared_cpu_caps[i]; #ifdef CONFIG_X86_MCE + /* Init Machine Check Exception if available. */ mcheck_init(c); #endif select_idle_routine(c); -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) numa_add_cpu(smp_processor_id()); #endif @@ -682,12 +733,19 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); +#ifdef CONFIG_X86_32 + sysenter_setup(); + enable_sep_cpu(); +#endif } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) { BUG_ON(c == &boot_cpu_data); identify_cpu(c); +#ifdef CONFIG_X86_32 + enable_sep_cpu(); +#endif mtrr_ap_init(); } -- cgit v1.2.3 From 143b604a2d07ff69cc2bc02ecd9395b28e0b5292 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 5 Sep 2008 09:37:15 +0200 Subject: x86: cpu/common*.c, merge whitespaces Merge leftover whitespaces, to make arch/x86/kernel/cpu/common_64.c exactly identical to arch/x86/kernel/cpu/common.c. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 6a42371a609..eef868c97b8 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -26,6 +26,7 @@ #include #include #endif + #include #include #include @@ -280,7 +281,6 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) return 1; } - void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -307,7 +307,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); #else - /* do processor-specific cache resizing */ if (this_cpu->c_size_cache) l2size = this_cpu->c_size_cache(c, l2size); @@ -334,6 +333,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_HT)) return; + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) goto out; @@ -443,7 +443,6 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) } } - static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; @@ -452,7 +451,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; @@ -488,7 +486,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } #endif } - /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -500,7 +497,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; #else @@ -722,12 +718,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) /* Init Machine Check Exception if available. */ mcheck_init(c); #endif + select_idle_routine(c); #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) numa_add_cpu(smp_processor_id()); #endif - } void __init identify_boot_cpu(void) -- cgit v1.2.3 From f5017cfa3591d8eaf5c792e40ff30f18e498b68c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:14 -0700 Subject: x86: use cpu/common.c on 64 bit Use cpu/common.c on both 64-bit and 32-bit and remove cpu/common_64.c. We started out with this linecount: 816 arch/x86/kernel/cpu/common_64.c 805 arch/x86/kernel/cpu/common.c and the resulting common.c is 1197 lines long, so there's already 424 lines of code eliminated in this phase of the unification. Signed-off-by: Yinghai Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 6 +- arch/x86/kernel/cpu/common_64.c | 1197 --------------------------------------- 2 files changed, 3 insertions(+), 1200 deletions(-) delete mode 100644 arch/x86/kernel/cpu/common_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 403e689df0b..d031f248dfc 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -3,10 +3,10 @@ # obj-y := intel_cacheinfo.o addon_cpuid_features.o -obj-y += proc.o capflags.o powerflags.o +obj-y += proc.o capflags.o powerflags.o common.o -obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o -obj-$(CONFIG_X86_64) += common_64.o bugs_64.o +obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o +obj-$(CONFIG_X86_64) += bugs_64.o obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c deleted file mode 100644 index eef868c97b8..00000000000 --- a/arch/x86/kernel/cpu/common_64.c +++ /dev/null @@ -1,1197 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#include -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cpu.h" - -static struct cpu_dev *this_cpu __cpuinitdata; - -#ifdef CONFIG_X86_64 -/* We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - */ -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { - [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, - [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, -} }; -#else -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, - /* - * Segments used for calling PnP BIOS have byte granularity. - * They code segments and data segments have fixed 64k limits, - * the transfer segment sizes are set at run time. - */ - /* 32-bit code */ - [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, - /* 16-bit code */ - [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, - /* - * The APM segments have byte granularity and their bases - * are set at run time. All have 64k limits. - */ - /* 32-bit code */ - [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, - /* 16-bit code */ - [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, - /* data */ - [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, - - [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, - [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, -} }; -#endif -EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); - -#ifdef CONFIG_X86_32 -static int cachesize_override __cpuinitdata = -1; -static int disable_x86_serial_nr __cpuinitdata = 1; - -static int __init cachesize_setup(char *str) -{ - get_option(&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); - -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - -static int __init x86_sep_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_SEP); - return 1; -} -__setup("nosep", x86_sep_setup); - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - -/* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - -static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { - /* Disable processor serial number */ - unsigned long lo, hi; - rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_cpu_cap(c, X86_FEATURE_PN); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } -} - -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); -#else -static inline int flag_is_changeable_p(u32 flag) -{ - return 1; -} -/* Probe for the CPUID instruction */ -static inline int have_cpuid_p(void) -{ - return 1; -} -static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ -} -#endif - -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - -__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; - -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ -void switch_to_new_gdt(void) -{ - struct desc_ptr gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); -#ifdef CONFIG_X86_32 - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -#endif -} - -static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; - -static void __cpuinit default_init(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - display_cacheinfo(c); -#else - /* Not much we can do here... */ - /* Check if at least it has cpuid */ - if (c->cpuid_level == -1) { - /* No cpuid. It must be an ancient CPU */ - if (c->x86 == 4) - strcpy(c->x86_model_id, "486"); - else if (c->x86 == 3) - strcpy(c->x86_model_id, "386"); - } -#endif -} - -static struct cpu_dev __cpuinitdata default_cpu = { - .c_init = default_init, - .c_vendor = "Unknown", - .c_x86_vendor = X86_VENDOR_UNKNOWN, -}; - -int __cpuinit get_model_name(struct cpuinfo_x86 *c) -{ - unsigned int *v; - char *p, *q; - - if (c->extended_cpuid_level < 0x80000004) - return 0; - - v = (unsigned int *) c->x86_model_id; - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); - c->x86_model_id[48] = 0; - - /* Intel chips right-justify this string for some dumb reason; - undo that brain damage */ - p = q = &c->x86_model_id[0]; - while (*p == ' ') - p++; - if (p != q) { - while (*p) - *q++ = *p++; - while (q <= &c->x86_model_id[48]) - *q++ = '\0'; /* Zero-pad the rest */ - } - - return 1; -} - -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int n, dummy, ebx, ecx, edx, l2size; - - n = c->extended_cpuid_level; - - if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size = (ecx>>24) + (edx>>24); -#ifdef CONFIG_X86_64 - /* On K8 L1 TLB is inclusive, so don't count it */ - c->x86_tlbsize = 0; -#endif - } - - if (n < 0x80000006) /* Some chips just has a large L1. */ - return; - - cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); - l2size = ecx >> 16; - -#ifdef CONFIG_X86_64 - c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); -#else - /* do processor-specific cache resizing */ - if (this_cpu->c_size_cache) - l2size = this_cpu->c_size_cache(c, l2size); - - /* Allow user to override all this if necessary. */ - if (cachesize_override != -1) - l2size = cachesize_override; - - if (l2size == 0) - return; /* Again, no L2 cache is possible */ -#endif - - c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - if (!cpu_has(c, X86_FEATURE_HT)) - return; - - if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) - goto out; - - if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) - return; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of siblings %d", - smp_num_siblings); - smp_num_siblings = 1; - return; - } - - index_msb = get_count_order(smp_num_siblings); -#ifdef CONFIG_X86_64 - c->phys_proc_id = phys_pkg_id(index_msb); -#else - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); -#endif - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings); - - core_bits = get_count_order(c->x86_max_cores); - -#ifdef CONFIG_X86_64 - c->cpu_core_id = phys_pkg_id(index_msb) & - ((1 << core_bits) - 1); -#else - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); -#endif - } - -out: - if ((c->x86_max_cores * smp_num_siblings) > 1) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - } -#endif -} - -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) -{ - char *v = c->x86_vendor_id; - int i; - static int printed; - - for (i = 0; i < X86_VENDOR_NUM; i++) { - if (!cpu_devs[i]) - break; - - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - this_cpu = cpu_devs[i]; - c->x86_vendor = this_cpu->c_x86_vendor; - return; - } - } - - if (!printed) { - printed++; - printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); - printk(KERN_ERR "CPU: Your system may be unstable.\n"); - } - - c->x86_vendor = X86_VENDOR_UNKNOWN; - this_cpu = &default_cpu; -} - -void __cpuinit cpu_detect(struct cpuinfo_x86 *c) -{ - /* Get vendor name */ - cpuid(0x00000000, (unsigned int *)&c->cpuid_level, - (unsigned int *)&c->x86_vendor_id[0], - (unsigned int *)&c->x86_vendor_id[8], - (unsigned int *)&c->x86_vendor_id[4]); - - c->x86 = 4; - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 junk, tfms, cap0, misc; - cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 0xf; - c->x86_model = (tfms >> 4) & 0xf; - c->x86_mask = tfms & 0xf; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xf) << 4; - if (cap0 & (1<<19)) { - c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - c->x86_cache_alignment = c->x86_clflush_size; - } - } -} - -static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) -{ - u32 tfms, xlvl; - u32 ebx; - - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - c->extended_cpuid_level = xlvl; - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - } - -#ifdef CONFIG_X86_64 - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - /* Don't set x86_cpuid_level here for now to not confuse. */ - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - - if (c->extended_cpuid_level >= 0x80000007) - c->x86_power = cpuid_edx(0x80000007); - - if (c->extended_cpuid_level >= 0x80000008) { - u32 eax = cpuid_eax(0x80000008); - - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - } -#endif -} -/* - * Do minimum CPU detection early. - * Fields really needed: vendor, cpuid_level, family, model, mask, - * cache alignment. - * The others are not touched to avoid unwanted side effects. - * - * WARNING: this function is only called on the BP. Don't add code here - * that is supposed to run on all CPUs. - */ -static void __init early_identify_cpu(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - c->x86_clflush_size = 64; -#else - c->x86_clflush_size = 32; -#endif - c->x86_cache_alignment = c->x86_clflush_size; - - if (!have_cpuid_p()) - return; - - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - c->extended_cpuid_level = 0; - - cpu_detect(c); - - get_cpu_vendor(c); - - get_cpu_cap(c); - - if (this_cpu->c_early_init) - this_cpu->c_early_init(c); - - validate_pat_support(c); -} - -void __init early_cpu_init(void) -{ - struct cpu_dev **cdev; - int count = 0; - - printk("KERNEL supported cpus:\n"); - for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { - struct cpu_dev *cpudev = *cdev; - unsigned int j; - - if (count >= X86_VENDOR_NUM) - break; - cpu_devs[count] = cpudev; - count++; - - for (j = 0; j < 2; j++) { - if (!cpudev->c_ident[j]) - continue; - printk(" %s %s\n", cpudev->c_vendor, - cpudev->c_ident[j]); - } - } - - early_identify_cpu(&boot_cpu_data); -} - -/* - * The NOPL instruction is supposed to exist on all CPUs with - * family >= 6, unfortunately, that's not true in practice because - * of early VIA chips and (more importantly) broken virtualizers that - * are not easy to detect. Hence, probe for it based on first - * principles. - * - * Note: no 64-bit chip is known to lack these, but put the code here - * for consistency with 32 bits, and to make it utterly trivial to - * diagnose the problem should it ever surface. - */ -static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) -{ - const u32 nopl_signature = 0x888c53b1; /* Random number */ - u32 has_nopl = nopl_signature; - - clear_cpu_cap(c, X86_FEATURE_NOPL); - if (c->x86 >= 6) { - asm volatile("\n" - "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ - "2:\n" - " .section .fixup,\"ax\"\n" - "3: xor %0,%0\n" - " jmp 2b\n" - " .previous\n" - _ASM_EXTABLE(1b,3b) - : "+a" (has_nopl)); - - if (has_nopl == nopl_signature) - set_cpu_cap(c, X86_FEATURE_NOPL); - } -} - -static void __cpuinit generic_identify(struct cpuinfo_x86 *c) -{ - if (!have_cpuid_p()) - return; - - c->extended_cpuid_level = 0; - - cpu_detect(c); - - get_cpu_vendor(c); - - get_cpu_cap(c); - - if (c->cpuid_level >= 0x00000001) { - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; -#ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); -# else - c->apicid = c->initial_apicid; -# endif -#endif - -#ifdef CONFIG_X86_HT - c->phys_proc_id = c->initial_apicid; -#endif - } - - if (c->extended_cpuid_level >= 0x80000004) - get_model_name(c); /* Default name */ - - init_scattered_cpuid_features(c); - detect_nopl(c); -} - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) -{ - int i; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_max_cores = 1; -#ifdef CONFIG_X86_64 - c->x86_coreid_bits = 0; - c->x86_clflush_size = 64; -#else - c->cpuid_level = -1; /* CPUID not detected */ - c->x86_clflush_size = 32; -#endif - c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - if (!have_cpuid_p()) { - /* - * First of all, decide if this is a 486 or higher - * It's a 486 if we can modify the AC flag - */ - if (flag_is_changeable_p(X86_EFLAGS_AC)) - c->x86 = 4; - else - c->x86 = 3; - } - - generic_identify(c); - - if (this_cpu->c_identify) - this_cpu->c_identify(c); - -#ifdef CONFIG_X86_64 - c->apicid = phys_pkg_id(0); -#endif - - /* - * Vendor-specific initialization. In this section we - * canonicalize the feature flags, meaning if there are - * features a certain CPU supports which CPUID doesn't - * tell us, CPUID claiming incorrect flags, or other bugs, - * we handle them here. - * - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ - if (this_cpu->c_init) - this_cpu->c_init(c); - - /* Disable the PN if appropriate */ - squash_the_stupid_serial_number(c); - - /* - * The vendor-specific functions might have changed features. Now - * we do "generic changes." - */ - - /* If the model name is still unset, do table lookup. */ - if (!c->x86_model_id[0]) { - char *p; - p = table_lookup_model(c); - if (p) - strcpy(c->x86_model_id, p); - else - /* Last resort... */ - sprintf(c->x86_model_id, "%02x/%02x", - c->x86, c->x86_model); - } - -#ifdef CONFIG_X86_64 - detect_ht(c); -#endif - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if (c != &boot_cpu_data) { - /* AND the already accumulated flags with these */ - for (i = 0; i < NCAPINTS; i++) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } - - /* Clear all flags overriden by options */ - for (i = 0; i < NCAPINTS; i++) - c->x86_capability[i] &= ~cleared_cpu_caps[i]; - -#ifdef CONFIG_X86_MCE - /* Init Machine Check Exception if available. */ - mcheck_init(c); -#endif - - select_idle_routine(c); - -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - numa_add_cpu(smp_processor_id()); -#endif -} - -void __init identify_boot_cpu(void) -{ - identify_cpu(&boot_cpu_data); -#ifdef CONFIG_X86_32 - sysenter_setup(); - enable_sep_cpu(); -#endif -} - -void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) -{ - BUG_ON(c == &boot_cpu_data); - identify_cpu(c); -#ifdef CONFIG_X86_32 - enable_sep_cpu(); -#endif - mtrr_ap_init(); -} - -struct msr_range { - unsigned min; - unsigned max; -}; - -static struct msr_range msr_range_array[] __cpuinitdata = { - { 0x00000000, 0x00000418}, - { 0xc0000000, 0xc000040b}, - { 0xc0010000, 0xc0010142}, - { 0xc0011000, 0xc001103b}, -}; - -static void __cpuinit print_cpu_msr(void) -{ - unsigned index; - u64 val; - int i; - unsigned index_min, index_max; - - for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { - index_min = msr_range_array[i].min; - index_max = msr_range_array[i].max; - for (index = index_min; index < index_max; index++) { - if (rdmsrl_amd_safe(index, &val)) - continue; - printk(KERN_INFO " MSR%08x: %016llx\n", index, val); - } - } -} - -static int show_msr __cpuinitdata; -static __init int setup_show_msr(char *arg) -{ - int num; - - get_option(&arg, &num); - - if (num > 0) - show_msr = num; - return 1; -} -__setup("show_msr=", setup_show_msr); - -static __init int setup_noclflush(char *arg) -{ - setup_clear_cpu_cap(X86_FEATURE_CLFLSH); - return 1; -} -__setup("noclflush", setup_noclflush); - -void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) -{ - char *vendor = NULL; - - if (c->x86_vendor < X86_VENDOR_NUM) - vendor = this_cpu->c_vendor; - else if (c->cpuid_level >= 0) - vendor = c->x86_vendor_id; - - if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk(KERN_CONT "%s ", vendor); - - if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); - else - printk(KERN_CONT "%d86", c->x86); - - if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT " stepping %02x\n", c->x86_mask); - else - printk(KERN_CONT "\n"); - -#ifdef CONFIG_SMP - if (c->cpu_index < show_msr) - print_cpu_msr(); -#else - if (show_msr) - print_cpu_msr(); -#endif -} - -static __init int setup_disablecpuid(char *arg) -{ - int bit; - if (get_option(&arg, &bit) && bit < NCAPINTS*32) - setup_clear_cpu_cap(bit); - else - return 0; - return 1; -} -__setup("clearcpuid=", setup_disablecpuid); - -cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; - -#ifdef CONFIG_X86_64 -struct x8664_pda **_cpu_pda __read_mostly; -EXPORT_SYMBOL(_cpu_pda); - -struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; - -char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; - -unsigned long __supported_pte_mask __read_mostly = ~0UL; -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -static int do_not_nx __cpuinitdata; - -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable(default) -off Disable -*/ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; - } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -int force_personality32; - -/* noexec32=on|off -Control non executable heap for 32bit processes. -To control the stack too use noexec=off - -on PROT_READ does not imply PROT_EXEC for 32bit processes (default) -off PROT_READ implies PROT_EXEC -*/ -static int __init nonx32_setup(char *str) -{ - if (!strcmp(str, "on")) - force_personality32 &= ~READ_IMPLIES_EXEC; - else if (!strcmp(str, "off")) - force_personality32 |= READ_IMPLIES_EXEC; - return 1; -} -__setup("noexec32=", nonx32_setup); - -void pda_init(int cpu) -{ - struct x8664_pda *pda = cpu_pda(cpu); - - /* Setup up data that may be needed in __get_free_pages early */ - loadsegment(fs, 0); - loadsegment(gs, 0); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); - - pda->cpunumber = cpu; - pda->irqcount = -1; - pda->kernelstack = (unsigned long)stack_thread_info() - - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; - - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - pda->irqstackptr = boot_cpu_stack; - pda->irqstackptr += IRQSTACKSIZE - 64; - } else { - if (!pda->irqstackptr) { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", - cpu); - pda->irqstackptr += IRQSTACKSIZE - 64; - } - - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) - pda->nodenumber = cpu_to_node(cpu); - } -} - -char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] __page_aligned_bss; - -extern asmlinkage void ignore_sysret(void); - -/* May not be marked __init: used by software suspend */ -void syscall_init(void) -{ - /* - * LSTAR and STAR live in a bit strange symbiosis. - * They both write to the same internal register. STAR allows to - * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. - */ - wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, system_call); - wrmsrl(MSR_CSTAR, ignore_sysret); - -#ifdef CONFIG_IA32_EMULATION - syscall32_cpu_init(); -#endif - - /* Flags to clear on syscall */ - wrmsrl(MSR_SYSCALL_MASK, - X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); -} - -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) - __supported_pte_mask &= ~_PAGE_NX; -} - -unsigned long kernel_eflags; - -/* - * Copies of the original ist values from the tss are only accessed during - * debugging, no special alignment required. - */ -DEFINE_PER_CPU(struct orig_ist, orig_ist); - -#else - -/* Make sure %fs is initialized properly in idle threads */ -struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) -{ - memset(regs, 0, sizeof(struct pt_regs)); - regs->fs = __KERNEL_PERCPU; - return regs; -} -#endif - -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init for 64 bit - */ -#ifdef CONFIG_X86_64 -void __cpuinit cpu_init(void) -{ - int cpu = stack_smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); - unsigned long v; - char *estacks = NULL; - struct task_struct *me; - int i; - - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) - pda_init(cpu); - else - estacks = boot_exception_stacks; - - me = current; - - if (cpu_test_and_set(cpu, cpu_initialized)) - panic("CPU#%d already initialized!\n", cpu); - - printk(KERN_INFO "Initializing CPU#%d\n", cpu); - - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - /* - * Initialize the per-CPU GDT with the boot GDT, - * and set up the GDT descriptor: - */ - - switch_to_new_gdt(); - load_idt((const struct desc_ptr *)&idt_descr); - - memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); - syscall_init(); - - wrmsrl(MSR_FS_BASE, 0); - wrmsrl(MSR_KERNEL_GS_BASE, 0); - barrier(); - - check_efer(); - if (cpu != 0 && x2apic) - enable_x2apic(); - - /* - * set up and load the per-CPU TSS - */ - if (!orig_ist->ist[0]) { - static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER - }; - for (v = 0; v < N_EXCEPTION_STACKS; v++) { - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception " - "stack %ld %d\n", v, cpu); - } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = - (unsigned long)estacks; - } - } - - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); - /* - * <= is required because the CPU will access up to - * 8 bits beyond the end of the IO permission bitmap. - */ - for (i = 0; i <= IO_BITMAP_LONGS; i++) - t->io_bitmap[i] = ~0UL; - - atomic_inc(&init_mm.mm_count); - me->active_mm = &init_mm; - if (me->mm) - BUG(); - enter_lazy_tlb(&init_mm, me); - - load_sp0(t, ¤t->thread); - set_tss_desc(cpu, t); - load_TR_desc(); - load_LDT(&init_mm.context); - -#ifdef CONFIG_KGDB - /* - * If the kgdb is connected no debug regs should be altered. This - * is only applicable when KGDB and a KGDB I/O module are built - * into the kernel and you are using early debugging with - * kgdbwait. KGDB will control the kernel HW breakpoint registers. - */ - if (kgdb_connected && arch_kgdb_ops.correct_hw_break) - arch_kgdb_ops.correct_hw_break(); - else { -#endif - /* - * Clear all 6 debug registers: - */ - - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); - set_debugreg(0UL, 6); - set_debugreg(0UL, 7); -#ifdef CONFIG_KGDB - /* If the kgdb is connected no debug regs should be altered. */ - } -#endif - - fpu_init(); - - raw_local_save_flags(kernel_eflags); - - if (is_uv_system()) - uv_cpu_init(); -} - -#else - -void __cpuinit cpu_init(void) -{ - int cpu = smp_processor_id(); - struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct thread_struct *thread = &curr->thread; - - if (cpu_test_and_set(cpu, cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) local_irq_enable(); - } - - printk(KERN_INFO "Initializing CPU#%d\n", cpu); - - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - load_idt(&idt_descr); - switch_to_new_gdt(); - - /* - * Set up and load the per-CPU TSS and LDT - */ - atomic_inc(&init_mm.mm_count); - curr->active_mm = &init_mm; - if (curr->mm) - BUG(); - enter_lazy_tlb(&init_mm, curr); - - load_sp0(t, thread); - set_tss_desc(cpu, t); - load_TR_desc(); - load_LDT(&init_mm.context); - -#ifdef CONFIG_DOUBLEFAULT - /* Set up doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); -#endif - - /* Clear %gs. */ - asm volatile ("mov %0, %%gs" : : "r" (0)); - - /* Clear all 6 debug registers: */ - set_debugreg(0, 0); - set_debugreg(0, 1); - set_debugreg(0, 2); - set_debugreg(0, 3); - set_debugreg(0, 6); - set_debugreg(0, 7); - - /* - * Force FPU initialization: - */ - if (cpu_has_xsave) - current_thread_info()->status = TS_XSAVE; - else - current_thread_info()->status = 0; - clear_used_math(); - mxcsr_feature_mask_init(); - - /* - * Boot processor to setup the FP and extended state context info. - */ - if (!smp_processor_id()) - init_thread_xstate(); - - xsave_init(); -} - -#ifdef CONFIG_HOTPLUG_CPU -void __cpuinit cpu_uninit(void) -{ - int cpu = raw_smp_processor_id(); - cpu_clear(cpu, cpu_initialized); - - /* lazy TLB state */ - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} -#endif - -#endif -- cgit v1.2.3 From bd220a24a9263eaa70d5e6821383085950b5a13c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Sep 2008 00:58:28 -0700 Subject: x86: move nonx_setup etc from common.c to init_64.c like 32 bit put it in init_32.c Signed-off-by: Yinghai Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 54 -------------------------------------------- 1 file changed, 54 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index eef868c97b8..286c89a991b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -847,51 +847,6 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; -unsigned long __supported_pte_mask __read_mostly = ~0UL; -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -static int do_not_nx __cpuinitdata; - -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable(default) -off Disable -*/ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; - } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -int force_personality32; - -/* noexec32=on|off -Control non executable heap for 32bit processes. -To control the stack too use noexec=off - -on PROT_READ does not imply PROT_EXEC for 32bit processes (default) -off PROT_READ implies PROT_EXEC -*/ -static int __init nonx32_setup(char *str) -{ - if (!strcmp(str, "on")) - force_personality32 &= ~READ_IMPLIES_EXEC; - else if (!strcmp(str, "off")) - force_personality32 |= READ_IMPLIES_EXEC; - return 1; -} -__setup("noexec32=", nonx32_setup); - void pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -957,15 +912,6 @@ void syscall_init(void) X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); } -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) - __supported_pte_mask &= ~_PAGE_NX; -} - unsigned long kernel_eflags; /* -- cgit v1.2.3 From 551b4545bf9658dc5ae5a1277dcd4d7bf0028b28 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 5 Sep 2008 17:58:49 +0900 Subject: x86: gart alloc_coherent doesn't need to check NULL device argument asm/dma-mapping.h guarantees that gart alloc_coherent doesn't get NULL device argument. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 4d0864900b8..0b99d4a06f7 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -506,9 +506,6 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, align_mask = (1UL << get_order(size)) - 1; - if (!dev) - dev = &x86_dma_fallback_dev; - *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, align_mask); flush_gart(); -- cgit v1.2.3 From 913da64b54b2b3bb212a59aba2e6f2b8294ca1fa Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Wed, 3 Sep 2008 14:30:23 +0100 Subject: x86: build fix for !CONFIG_SMP Move reset_lazy_tlbstate into tlb_32.c, and define noop versions of play_dead() in process_{32,64}.c when !CONFIG_SMP. Signed-off-by: Alex Nixon Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 7 ------- arch/x86/kernel/process_32.c | 7 +++++++ arch/x86/kernel/process_64.c | 7 +++++++ arch/x86/kernel/tlb_32.c | 8 ++++++++ 4 files changed, 22 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 18f5551b32d..76d10d5c2fa 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -714,10 +714,3 @@ void __cpuinit cpu_init(void) mxcsr_feature_mask_init(); } -void reset_lazy_tlbstate(void) -{ - int cpu = raw_smp_processor_id(); - - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 633cf06578f..b76b38ff962 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -72,6 +72,13 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return ((unsigned long *)tsk->thread.sp)[3]; } +#ifndef CONFIG_SMP +static inline void play_dead(void) +{ + BUG(); +} +#endif + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index aa28ed01cdf..ec27afa43d7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -85,6 +85,13 @@ void exit_idle(void) __exit_idle(); } +#ifndef CONFIG_SMP +static inline void play_dead(void) +{ + BUG(); +} +#endif + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index fec1ecedc9b..e00534b3353 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -241,3 +241,11 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, NULL, 1); } +void reset_lazy_tlbstate(void) +{ + int cpu = raw_smp_processor_id(); + + per_cpu(cpu_tlbstate, cpu).state = 0; + per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; +} + -- cgit v1.2.3 From e7250b8ae3870f37f660c2f65cafcaba85e3bfd3 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 5 Sep 2008 18:33:26 +0200 Subject: x86: hpet: modify IXP400 quirk to enable interrupts The current quirk is incomplete. Some more chipset fiddling has to be done to enable HPET interrupts. This patch aims to do this. From my tests it seems to work faultlessly. But the official statement is that HPET is not supported on SB4X0. Users will still have to use hpet=force to enable it. Use it at your own risk. Signed-off-by: Andreas Herrmann Signed-off-by: Ingo Molnar --- arch/x86/kernel/quirks.c | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index d1385881810..f6a11b9b1f9 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -354,9 +354,27 @@ static void ati_force_hpet_resume(void) printk(KERN_DEBUG "Force enabled HPET at resume\n"); } +static u32 ati_ixp4x0_rev(struct pci_dev *dev) +{ + u32 d; + u8 b; + + pci_read_config_byte(dev, 0xac, &b); + b &= ~(1<<5); + pci_write_config_byte(dev, 0xac, b); + pci_read_config_dword(dev, 0x70, &d); + d |= 1<<8; + pci_write_config_dword(dev, 0x70, d); + pci_read_config_dword(dev, 0x8, &d); + d &= 0xff; + dev_printk(KERN_DEBUG, &dev->dev, "SB4X0 revision 0x%x\n", d); + return d; +} + static void ati_force_enable_hpet(struct pci_dev *dev) { - u32 uninitialized_var(val); + u32 d, val; + u8 b; if (hpet_address || force_hpet_address) return; @@ -366,14 +384,33 @@ static void ati_force_enable_hpet(struct pci_dev *dev) return; } + d = ati_ixp4x0_rev(dev); + if (d < 0x82) + return; + + /* base address */ pci_write_config_dword(dev, 0x14, 0xfed00000); pci_read_config_dword(dev, 0x14, &val); + + /* enable interrupt */ + outb(0x72, 0xcd6); b = inb(0xcd7); + b |= 0x1; + outb(0x72, 0xcd6); outb(b, 0xcd7); + outb(0x72, 0xcd6); b = inb(0xcd7); + if (!(b & 0x1)) + return; + pci_read_config_dword(dev, 0x64, &d); + d |= (1<<10); + pci_write_config_dword(dev, 0x64, d); + pci_read_config_dword(dev, 0x64, &d); + if (!(d & (1<<10))) + return; + force_hpet_address = val; force_hpet_resume_type = ATI_FORCE_HPET_RESUME; dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", force_hpet_address); cached_dev = dev; - return; } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, ati_force_enable_hpet); -- cgit v1.2.3 From cf169702ba6928cee9d4f4adf3e932b643b8db7a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Sep 2008 13:13:40 +0200 Subject: x86, gart: add detection of AMD family 0x11 northbridges This patch adds the detection of the northbridges in the AMD family 0x11 processors. It also fixes the magic numbers there while changing this code. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/k8.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index 7377ccb2133..304d8bad655 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c @@ -16,8 +16,9 @@ EXPORT_SYMBOL(num_k8_northbridges); static u32 *flush_words; struct pci_device_id k8_nb_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) }, {} }; EXPORT_SYMBOL(k8_nb_ids); -- cgit v1.2.3 From e51af6630848406fc97adbd71443818cdcda297b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 4 Sep 2008 09:54:37 +0100 Subject: x86: blacklist DMAR on Intel G31/G33 chipsets Some BIOSes (the Intel DG33BU, for example) wrongly claim to have DMAR when they don't. Avoid the resulting crashes when it doesn't work as expected. I'd still be grateful if someone could test it on a DG33BU with the old BIOS though, since I've killed mine. I tested the DMI version, but not this one. Signed-off-by: David Woodhouse Signed-off-by: Ingo Molnar --- arch/x86/kernel/early-quirks.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 4353cf5e6fa..24bb5faf5ef 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -95,6 +95,20 @@ static void __init nvidia_bugs(int num, int slot, int func) } +#ifdef CONFIG_DMAR +static void __init intel_g33_dmar(int num, int slot, int func) +{ + struct acpi_table_header *dmar_tbl; + acpi_status status; + + status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); + if (ACPI_SUCCESS(status)) { + printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n"); + dmar_disabled = 1; + } +} +#endif + #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -114,6 +128,10 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, +#ifdef CONFIG_DMAR + { PCI_VENDOR_ID_INTEL, 0x29c0, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, +#endif {} }; -- cgit v1.2.3 From 1b05d60d60e81c6594da8298107a05b506f01797 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 6 Sep 2008 01:52:27 -0700 Subject: x86: remove duplicated get_model_name() calling Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 3 --- arch/x86/kernel/cpu/amd_64.c | 3 +-- arch/x86/kernel/cpu/centaur.c | 1 - arch/x86/kernel/cpu/common.c | 9 +++------ arch/x86/kernel/cpu/cpu.h | 1 - arch/x86/kernel/cpu/cyrix.c | 1 - arch/x86/kernel/cpu/transmeta.c | 1 - 7 files changed, 4 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d64ea6097ca..e0ba2c7c5a1 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -42,7 +42,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) { u32 l, h; int mbytes = num_physpages >> (20-PAGE_SHIFT); - int r; #ifdef CONFIG_SMP unsigned long long value; @@ -75,8 +74,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) */ clear_cpu_cap(c, 0*32+31); - r = get_model_name(c); - switch (c->x86) { case 4: /* diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index d1c721c0c49..c5fbf7477ea 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -157,8 +157,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - level = get_model_name(c); - if (!level) { + if (!c->x86_model_id[0]) { switch (c->x86) { case 0xf: /* Should distinguish Models here, but this is only diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e5f6d89521b..89bfdd9cacc 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -289,7 +289,6 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) if (c->x86_model >= 6 && c->x86_model < 9) set_cpu_cap(c, X86_FEATURE_3DNOW); - get_model_name(c); display_cacheinfo(c); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 286c89a991b..a8b9b724242 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -252,13 +252,13 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -int __cpuinit get_model_name(struct cpuinfo_x86 *c) +static void __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; if (c->extended_cpuid_level < 0x80000004) - return 0; + return; v = (unsigned int *) c->x86_model_id; cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); @@ -277,8 +277,6 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) while (q <= &c->x86_model_id[48]) *q++ = '\0'; /* Zero-pad the rest */ } - - return 1; } void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) @@ -610,8 +608,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) #endif } - if (c->extended_cpuid_level >= 0x80000004) - get_model_name(c); /* Default name */ + get_model_name(c); /* Default name */ init_scattered_cpuid_features(c); detect_nopl(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 3cc9d92afd8..de4094a3921 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -31,7 +31,6 @@ struct cpu_dev { extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; -extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3f8c7283d81..ffd0f5ed071 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -301,7 +301,6 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) */ if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) geode_configure(); - get_model_name(c); /* get CPU marketing name */ return; } else { /* MediaGX */ Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 7c46e6ecedc..738e03244f9 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -12,7 +12,6 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; char cpu_info[65]; - get_model_name(c); /* Same as AMD/Cyrix */ display_cacheinfo(c); /* Print CMS and CPU revision */ -- cgit v1.2.3 From e3224234717b4228c235cee401af89212f17a3a4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 6 Sep 2008 01:52:28 -0700 Subject: x86, cpu init: call early_init_xxx in init_xxx so we: 1. could set some cap to ap 2. restore some cap after memset in identify_cpu for boot cpu esp for CONSTANT_TSC this matters, as: before this patch: flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm 3dnowext 3dnow rep_good nopl pni monitor cx16 lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs after this patch: flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm 3dnowext 3dnow constant_tsc rep_good nopl pni monitor cx16 lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs so constant_tsc is back... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 7 ++----- arch/x86/kernel/cpu/amd_64.c | 2 ++ arch/x86/kernel/cpu/centaur_64.c | 2 ++ arch/x86/kernel/cpu/common.c | 7 ++++--- arch/x86/kernel/cpu/intel_64.c | 2 ++ 5 files changed, 12 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e0ba2c7c5a1..c3175da7bc6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -26,11 +26,8 @@ __asm__(".align 4\nvide: ret"); static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { - if (cpuid_eax(0x80000000) >= 0x80000007) { - c->x86_power = cpuid_edx(0x80000007); - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - } + if (c->x86_power & (1<<8)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); /* Set MTRR capability flag if appropriate */ if (c->x86_model == 13 || c->x86_model == 9 || diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index c5fbf7477ea..8c2d07f06f1 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -140,6 +140,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } #endif + early_init_amd(c); + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_cpu_cap(c, 0*32+31); diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 49cfc6d2f2f..0e5cf17a3c8 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -16,6 +16,8 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { + early_init_centaur(c); + if (c->x86 == 0x6 && c->x86_model >= 0xf) { c->x86_cache_alignment = c->x86_clflush_size * 2; set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a8b9b724242..e8045c4ef1c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -473,9 +473,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[2] = cpuid_edx(0x80860001); } - if (c->extended_cpuid_level >= 0x80000007) - c->x86_power = cpuid_edx(0x80000007); - if (c->extended_cpuid_level >= 0x80000008) { u32 eax = cpuid_eax(0x80000008); @@ -483,6 +480,10 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_phys_bits = eax & 0xff; } #endif + + if (c->extended_cpuid_level >= 0x80000007) + c->x86_power = cpuid_edx(0x80000007); + } /* * Do minimum CPU detection early. diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 0c0a58dfe09..14a2cd98d48 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -54,6 +54,8 @@ static void __cpuinit srat_detect_node(void) static void __cpuinit init_intel(struct cpuinfo_x86 *c) { + early_init_intel(c); + init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); -- cgit v1.2.3 From bb57925f5057837c248b57a51181fd6b138a32f3 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 5 Sep 2008 16:26:55 -0700 Subject: x86_32: signal: use syscall_get_nr and error Use asm/syscall.h interfaces that do the same things. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index b21070ea33a..3e4a688bb84 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "sigframe.h" @@ -507,9 +508,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, int ret; /* Are we from a system call? */ - if ((long)regs->orig_ax >= 0) { + if (syscall_get_nr(current, regs) >= 0) { /* If so, check system call restarting.. */ - switch (regs->ax) { + switch (syscall_get_error(current, regs)) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: regs->ax = -EINTR; @@ -623,9 +624,9 @@ static void do_signal(struct pt_regs *regs) } /* Did we come from a system call? */ - if ((long)regs->orig_ax >= 0) { + if (syscall_get_nr(current, regs) >= 0) { /* Restart the system call - no handlers present */ - switch (regs->ax) { + switch (syscall_get_error(current, regs)) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: -- cgit v1.2.3 From 72fa50f4ef9014f4212945b766af84ea94308903 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 5 Sep 2008 16:27:11 -0700 Subject: x86_32: signal: introduce signal_fault() implement signal_fault() for 32bit. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 3e4a688bb84..76d05d70384 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -243,7 +243,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused) return ax; badframe: - force_sig(SIGSEGV, current); + signal_fault(regs, frame, "rt sigreturn"); return 0; } @@ -669,3 +669,19 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) clear_thread_flag(TIF_IRET); } + +void signal_fault(struct pt_regs *regs, void __user *frame, char *where) +{ + struct task_struct *me = current; + + if (show_unhandled_signals && printk_ratelimit()) { + printk(KERN_INFO + "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", + me->comm, me->pid, where, frame, + regs->ip, regs->sp, regs->orig_ax); + print_vma_addr(" in ", regs->ip); + printk(KERN_CONT "\n"); + } + + force_sig(SIGSEGV, me); +} -- cgit v1.2.3 From 8fcd8e20f388d787f6abf701b11037b122029d5b Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 5 Sep 2008 16:27:39 -0700 Subject: x86: signal: make NR_restart_syscall make NR_restart_syscall macro for cosmetic unification of handle_signal(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 3 ++- arch/x86/kernel/signal_64.c | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 76d05d70384..bd9b65031a9 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -572,6 +572,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, return 0; } +#define NR_restart_syscall __NR_restart_syscall /* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by @@ -635,7 +636,7 @@ static void do_signal(struct pt_regs *regs) break; case -ERESTART_RESTARTBLOCK: - regs->ax = __NR_restart_syscall; + regs->ax = NR_restart_syscall; regs->ip -= 2; break; } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 823a55bf8c3..19e2b914320 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -362,6 +362,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, return ret; } +#define NR_restart_syscall \ + test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall /* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by @@ -423,9 +425,7 @@ static void do_signal(struct pt_regs *regs) regs->ip -= 2; break; case -ERESTART_RESTARTBLOCK: - regs->ax = test_thread_flag(TIF_IA32) ? - __NR_ia32_restart_syscall : - __NR_restart_syscall; + regs->ax = NR_restart_syscall; regs->ip -= 2; break; } -- cgit v1.2.3 From 1d13024e624d0e2e47f117b383917249a41ef5ce Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 5 Sep 2008 16:28:06 -0700 Subject: x86: signal: split out frame setups Make setup_rt_frame() and split out frame setups from handle_signal(). This is for cosmetic unification of handle_signal(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 29 ++++++++++++++++++++--------- arch/x86/kernel/signal_64.c | 30 ++++++++++++++++++++---------- 2 files changed, 40 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index bd9b65031a9..48982f7ce88 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -338,8 +338,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, } static int -setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, - struct pt_regs *regs) +__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, + struct pt_regs *regs) { struct sigframe __user *frame; void __user *restorer; @@ -416,8 +416,8 @@ give_sigsegv: return -EFAULT; } -static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs) +static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; void __user *restorer; @@ -501,6 +501,21 @@ give_sigsegv: /* * OK, we're invoking a handler: */ +static int +setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + int ret; + + /* Set up the stack frame */ + if (ka->sa.sa_flags & SA_SIGINFO) + ret = __setup_rt_frame(sig, ka, info, set, regs); + else + ret = __setup_frame(sig, ka, set, regs); + + return ret; +} + static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs) @@ -537,11 +552,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, likely(test_and_clear_thread_flag(TIF_FORCED_TF))) regs->flags &= ~X86_EFLAGS_TF; - /* Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - ret = setup_rt_frame(sig, ka, info, oldset, regs); - else - ret = setup_frame(sig, ka, oldset, regs); + ret = setup_rt_frame(sig, ka, info, oldset, regs); if (ret) return ret; diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 19e2b914320..8fbdd23d5cc 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -195,8 +195,8 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) return (void __user *)round_down(sp - size, 64); } -static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs) +static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; void __user *fp = NULL; @@ -280,6 +280,24 @@ give_sigsegv: /* * OK, we're invoking a handler */ +static int +setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + int ret; + +#ifdef CONFIG_IA32_EMULATION + if (test_thread_flag(TIF_IA32)) { + if (ka->sa.sa_flags & SA_SIGINFO) + ret = ia32_setup_rt_frame(sig, ka, info, set, regs); + else + ret = ia32_setup_frame(sig, ka, set, regs); + } else +#endif + ret = __setup_rt_frame(sig, ka, info, set, regs); + + return ret; +} static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, @@ -317,14 +335,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, likely(test_and_clear_thread_flag(TIF_FORCED_TF))) regs->flags &= ~X86_EFLAGS_TF; -#ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) { - if (ka->sa.sa_flags & SA_SIGINFO) - ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); - else - ret = ia32_setup_frame(sig, ka, oldset, regs); - } else -#endif ret = setup_rt_frame(sig, ka, info, oldset, regs); if (ret == 0) { -- cgit v1.2.3 From 13ad7725e9955ac68fff670a039da99ab33e86a0 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 5 Sep 2008 16:28:38 -0700 Subject: x86_32: signal: move signal number conversion to upper layer Bring signal number conversion in __setup_frame() and __setup_rt_frame() up into the common part setup_frame(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 48982f7ce88..b668efc18ea 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -344,7 +344,6 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct sigframe __user *frame; void __user *restorer; int err = 0; - int usig; void __user *fpstate = NULL; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); @@ -352,13 +351,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - usig = current_thread_info()->exec_domain - && current_thread_info()->exec_domain->signal_invmap - && sig < 32 - ? current_thread_info()->exec_domain->signal_invmap[sig] - : sig; - - err = __put_user(usig, &frame->sig); + err = __put_user(sig, &frame->sig); if (err) goto give_sigsegv; @@ -422,7 +415,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct rt_sigframe __user *frame; void __user *restorer; int err = 0; - int usig; void __user *fpstate = NULL; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); @@ -430,13 +422,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - usig = current_thread_info()->exec_domain - && current_thread_info()->exec_domain->signal_invmap - && sig < 32 - ? current_thread_info()->exec_domain->signal_invmap[sig] - : sig; - - err |= __put_user(usig, &frame->sig); + err |= __put_user(sig, &frame->sig); err |= __put_user(&frame->info, &frame->pinfo); err |= __put_user(&frame->uc, &frame->puc); err |= copy_siginfo_to_user(&frame->info, info); @@ -482,7 +468,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->sp = (unsigned long)frame; regs->ip = (unsigned long)ka->sa.sa_handler; - regs->ax = (unsigned long)usig; + regs->ax = (unsigned long)sig; regs->dx = (unsigned long)&frame->info; regs->cx = (unsigned long)&frame->uc; @@ -506,12 +492,19 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { int ret; + int usig; + + usig = current_thread_info()->exec_domain + && current_thread_info()->exec_domain->signal_invmap + && sig < 32 + ? current_thread_info()->exec_domain->signal_invmap[sig] + : sig; /* Set up the stack frame */ if (ka->sa.sa_flags & SA_SIGINFO) - ret = __setup_rt_frame(sig, ka, info, set, regs); + ret = __setup_rt_frame(usig, ka, info, set, regs); else - ret = __setup_frame(sig, ka, set, regs); + ret = __setup_frame(usig, ka, set, regs); return ret; } -- cgit v1.2.3 From a19aac8548d85cf15d744335b8e82201da760d80 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 30 Aug 2008 06:03:34 +0400 Subject: x86: make setup_xstate_init() __init WARNING: vmlinux.o(.text+0x22453): Section mismatch in reference from the function setup_xstate_init() to the function .init.text:__alloc_bootmem() The function setup_xstate_init() references the function __init __alloc_bootmem(). This is often because setup_xstate_init lacks a __init annotation or the annotation of __alloc_bootmem is wrong. Signed-off-by: Alexey Dobriyan Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 07713d64deb..ed5274a5bb0 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -272,7 +272,7 @@ void __cpuinit xsave_init(void) /* * setup the xstate image representing the init state */ -void setup_xstate_init(void) +static void __init setup_xstate_init(void) { init_xstate_buf = alloc_bootmem(xstate_size); init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; -- cgit v1.2.3 From 30cec97967beb5aa08e893e8ff69623d5fecd9b7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 29 Aug 2008 12:49:55 +0100 Subject: x86: init annotations in early_printk() setup Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 825e9136b02..02fc5b40c14 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -120,7 +120,7 @@ static __init void early_serial_init(char *s) if (!strncmp(s, "0x", 2)) { early_serial_base = simple_strtoul(s, &e, 16); } else { - static int bases[] = { 0x3f8, 0x2f8 }; + static const int __initconst bases[] = { 0x3f8, 0x2f8 }; if (!strncmp(s, "ttyS", 4)) s += 4; @@ -920,7 +920,7 @@ static struct console simnow_console = { /* Direct interface for emergencies */ static struct console *early_console = &early_vga_console; -static int early_console_initialized; +static int __initdata early_console_initialized; asmlinkage void early_printk(const char *fmt, ...) { -- cgit v1.2.3 From 2d9cd6c27f00958a31622b8e9909d5e35f069b28 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 29 Aug 2008 13:15:04 +0100 Subject: x86-64: add two __cpuinit annotations Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e8045c4ef1c..5cde4b18448 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -845,7 +845,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; -void pda_init(int cpu) +void __cpuinit pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 726a5fcdf34..4b031a4ac85 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -860,7 +860,7 @@ error: return err; } -static void mce_remove_device(unsigned int cpu) +static __cpuinit void mce_remove_device(unsigned int cpu) { int i; -- cgit v1.2.3 From 5df45515512436a808d3476a90e83f2efb022422 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 6 Sep 2008 23:55:40 +0200 Subject: x86, tsc calibration: fix my brown paperbag day ... Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6dab90f6851..4847a928050 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -319,7 +319,7 @@ static unsigned long quick_pit_calibrate(void) /* * Make sure we can rely on the second TSC timestamp: */ - if (!pit_expect_msb(--expect)) + if (!pit_expect_msb(expect)) goto failed; /* -- cgit v1.2.3 From 5394f80f92642c61fc2a95385be85f2fdcfb5adb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 7 Sep 2008 01:51:32 -0700 Subject: x86: check for and defend against BIOS memory corruption Some BIOSes have been observed to corrupt memory in the low 64k. This change: - Reserves all memory which does not have to be in that area, to prevent it from being used as general memory by the kernel. Things like the SMP trampoline are still in the memory, however. - Clears the reserved memory so we can observe changes to it. - Adds a function check_for_bios_corruption() which checks and reports on memory becoming unexpectedly non-zero. Currently it's called in the x86 fault handler, and the powermanagement debug output. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 362d4e7f2d3..ee89ebc5aab 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -578,6 +578,89 @@ static struct x86_quirks default_x86_quirks __initdata; struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; +/* + * Some BIOSes seem to corrupt the low 64k of memory during events + * like suspend/resume and unplugging an HDMI cable. Reserve all + * remaining free memory in that area and fill it with a distinct + * pattern. + */ +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION +#define MAX_SCAN_AREAS 8 +static struct e820entry scan_areas[MAX_SCAN_AREAS]; +static int num_scan_areas; + +static void __init setup_bios_corruption_check(void) +{ + u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ + + while(addr < 0x10000 && num_scan_areas < MAX_SCAN_AREAS) { + u64 size; + addr = find_e820_area_size(addr, &size, PAGE_SIZE); + + if (addr == 0) + break; + + if ((addr + size) > 0x10000) + size = 0x10000 - addr; + + if (size == 0) + break; + + e820_update_range(addr, size, E820_RAM, E820_RESERVED); + scan_areas[num_scan_areas].addr = addr; + scan_areas[num_scan_areas].size = size; + num_scan_areas++; + + /* Assume we've already mapped this early memory */ + memset(__va(addr), 0, size); + + addr += size; + } + + printk(KERN_INFO "scanning %d areas for BIOS corruption\n", + num_scan_areas); + update_e820(); +} + +static int __read_mostly bios_corruption_check = 1; + +void check_for_bios_corruption(void) +{ + int i; + int corruption = 0; + + if (!bios_corruption_check) + return; + + for(i = 0; i < num_scan_areas; i++) { + unsigned long *addr = __va(scan_areas[i].addr); + unsigned long size = scan_areas[i].size; + + for(; size; addr++, size -= sizeof(unsigned long)) { + if (!*addr) + continue; + printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", + addr, __pa(addr), *addr); + corruption = 1; + *addr = 0; + } + } + + if (corruption) + dump_stack(); +} + +static int set_bios_corruption_check(char *arg) +{ + char *end; + + bios_corruption_check = simple_strtol(arg, &end, 10); + + return (*end == 0) ? 0 : -EINVAL; +} +early_param("bios_corruption_check", set_bios_corruption_check); +#endif + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -750,6 +833,10 @@ void __init setup_arch(char **cmdline_p) high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; #endif +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION + setup_bios_corruption_check(); +#endif + /* max_pfn_mapped is updated here */ max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn< Date: Sun, 7 Sep 2008 01:51:33 -0700 Subject: x86: add periodic corruption check Perodically check for corruption in low phusical memory. Don't bother checking at fault time, since it won't show anything useful. Signed-off-by: Hugh Dickins Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ee89ebc5aab..c239b378097 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -623,6 +623,7 @@ static void __init setup_bios_corruption_check(void) } static int __read_mostly bios_corruption_check = 1; +static struct timer_list periodic_check_timer; void check_for_bios_corruption(void) { @@ -650,6 +651,22 @@ void check_for_bios_corruption(void) dump_stack(); } +static void periodic_check_for_corruption(unsigned long data) +{ + check_for_bios_corruption(); + mod_timer(&periodic_check_timer, jiffies + 60*HZ); +} + +void start_periodic_check_for_corruption(void) +{ + if (!bios_corruption_check) + return; + + init_timer(&periodic_check_timer); + periodic_check_timer.function = &periodic_check_for_corruption; + periodic_check_for_corruption(0); +} + static int set_bios_corruption_check(char *arg) { char *end; -- cgit v1.2.3 From 9f077871ce7237e2387fc76542b3b4033cb05e49 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 7 Sep 2008 01:51:34 -0700 Subject: x86: clean up memory corruption check and add more kernel parameters The corruption check is enabled in Kconfig by default, but disabled at runtime. This patch adds several kernel parameters to control the corruption check's behaviour; these are documented in kernel-parameters.txt. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 80 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c239b378097..27ae9128885 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -586,22 +586,71 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; */ #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION #define MAX_SCAN_AREAS 8 + +static int __read_mostly memory_corruption_check = 0; +static unsigned __read_mostly corruption_check_size = 64*1024; +static unsigned __read_mostly corruption_check_period = 60; /* seconds */ + static struct e820entry scan_areas[MAX_SCAN_AREAS]; static int num_scan_areas; + +static int set_corruption_check(char *arg) +{ + char *end; + + memory_corruption_check = simple_strtol(arg, &end, 10); + + return (*end == 0) ? 0 : -EINVAL; +} +early_param("memory_corruption_check", set_corruption_check); + +static int set_corruption_check_period(char *arg) +{ + char *end; + + corruption_check_period = simple_strtoul(arg, &end, 10); + + return (*end == 0) ? 0 : -EINVAL; +} +early_param("memory_corruption_check_period", set_corruption_check_period); + +static int set_corruption_check_size(char *arg) +{ + char *end; + unsigned size; + + size = memparse(arg, &end); + + if (*end == '\0') + corruption_check_size = size; + + return (size == corruption_check_size) ? 0 : -EINVAL; +} +early_param("memory_corruption_check_size", set_corruption_check_size); + + static void __init setup_bios_corruption_check(void) { u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ - while(addr < 0x10000 && num_scan_areas < MAX_SCAN_AREAS) { + if (corruption_check_size == 0) + memory_corruption_check = 0; + + if (!memory_corruption_check) + return; + + corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); + + while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { u64 size; addr = find_e820_area_size(addr, &size, PAGE_SIZE); if (addr == 0) break; - if ((addr + size) > 0x10000) - size = 0x10000 - addr; + if ((addr + size) > corruption_check_size) + size = corruption_check_size - addr; if (size == 0) break; @@ -617,12 +666,11 @@ static void __init setup_bios_corruption_check(void) addr += size; } - printk(KERN_INFO "scanning %d areas for BIOS corruption\n", + printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas); update_e820(); } -static int __read_mostly bios_corruption_check = 1; static struct timer_list periodic_check_timer; void check_for_bios_corruption(void) @@ -630,7 +678,7 @@ void check_for_bios_corruption(void) int i; int corruption = 0; - if (!bios_corruption_check) + if (!memory_corruption_check) return; for(i = 0; i < num_scan_areas; i++) { @@ -647,35 +695,27 @@ void check_for_bios_corruption(void) } } - if (corruption) - dump_stack(); + WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n"); } static void periodic_check_for_corruption(unsigned long data) { check_for_bios_corruption(); - mod_timer(&periodic_check_timer, jiffies + 60*HZ); + mod_timer(&periodic_check_timer, jiffies + corruption_check_period*HZ); } void start_periodic_check_for_corruption(void) { - if (!bios_corruption_check) + if (!memory_corruption_check || corruption_check_period == 0) return; + printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", + corruption_check_period); + init_timer(&periodic_check_timer); periodic_check_timer.function = &periodic_check_for_corruption; periodic_check_for_corruption(0); } - -static int set_bios_corruption_check(char *arg) -{ - char *end; - - bios_corruption_check = simple_strtol(arg, &end, 10); - - return (*end == 0) ? 0 : -EINVAL; -} -early_param("bios_corruption_check", set_bios_corruption_check); #endif /* -- cgit v1.2.3 From c885df50f571faf9fd9f395361cfff1b3a16e06e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 7 Sep 2008 02:37:32 -0700 Subject: x86: default corruption check to off, but put parameter default in Kconfig Default the low memory corruption check to off, but make the default setting of the memory_corruption_check kernel parameter a config parameter. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 27ae9128885..ec7e56c1b98 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -587,7 +587,8 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION #define MAX_SCAN_AREAS 8 -static int __read_mostly memory_corruption_check = 0; +static int __read_mostly memory_corruption_check = -1; + static unsigned __read_mostly corruption_check_size = 64*1024; static unsigned __read_mostly corruption_check_period = 60; /* seconds */ @@ -634,6 +635,16 @@ static void __init setup_bios_corruption_check(void) { u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ + if (memory_corruption_check == -1) { + memory_corruption_check = +#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK + 1 +#else + 0 +#endif + ; + } + if (corruption_check_size == 0) memory_corruption_check = 0; -- cgit v1.2.3 From 11fdd252bb5b461289fc5c21dc8fc87dc66f3284 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:50 -0700 Subject: x86: cpu make amd.c more like amd_64.c v2 1. make 32bit have early_init_amd_mc and amd_detect_cmp 2. seperate init_amd_k5/k6/k7 ... v2: fix compiling for !CONFIG_SMP Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 396 ++++++++++++++++++++++++------------------- arch/x86/kernel/cpu/amd_64.c | 17 +- arch/x86/kernel/cpu/common.c | 2 +- 3 files changed, 235 insertions(+), 180 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c3175da7bc6..a3a9e3cbdea 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -24,8 +24,200 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); +static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) +{ +/* + * General Systems BIOSen alias the cpu frequency registers + * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * drivers subsequently pokes it, and changes the CPU speed. + * Workaround : Remove the unneeded alias. + */ +#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ +#define CBAR_ENB (0x80000000) +#define CBAR_KEY (0X000000CB) + if (c->x86_model == 9 || c->x86_model == 10) { + if (inl (CBAR) & CBAR_ENB) + outl (0 | CBAR_KEY, CBAR); + } +} + + +static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) +{ + u32 l, h; + int mbytes = num_physpages >> (20-PAGE_SHIFT); + + if (c->x86_model < 6) { + /* Based on AMD doc 20734R - June 2000 */ + if (c->x86_model == 0) { + clear_cpu_cap(c, X86_FEATURE_APIC); + set_cpu_cap(c, X86_FEATURE_PGE); + } + return; + } + + if (c->x86_model == 6 && c->x86_mask == 1) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model == 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if (mbytes > 508) + mbytes = 508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF) == 0) { + unsigned long flags; + l = (1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + return; + } + + if ((c->x86_model == 8 && c->x86_mask > 7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if (mbytes > 4092) + mbytes = 4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000) == 0) { + unsigned long flags; + l = ((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + return; + } + + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + return; + } +} + +static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) +{ + u32 l, h; + + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_cpu_cap(c, X86_FEATURE_XMM); + } + } + + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, + ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + + set_cpu_cap(c, X86_FEATURE_K7); +} + +/* + * On a AMD dual core setup the lower bits of the APIC id distingush the cores. + * Assumes number of cores is a power of two. + */ +static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_HT + unsigned bits; + + bits = c->x86_coreid_bits; + + /* Low order bits define the core id (index of core in socket) */ + c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); + /* Convert the initial APIC ID into the socket ID */ + c->phys_proc_id = c->initial_apicid >> bits; +#endif +} + +static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_HT + unsigned bits, ecx; + + /* Multi core CPU? */ + if (c->extended_cpuid_level < 0x80000008) + return; + + ecx = cpuid_ecx(0x80000008); + + c->x86_max_cores = (ecx & 0xff) + 1; + + /* CPU telling us the core id bits shift? */ + bits = (ecx >> 12) & 0xF; + + /* Otherwise recompute */ + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + + c->x86_coreid_bits = bits; +#endif +} + static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { + early_init_amd_mc(c); + if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); @@ -37,9 +229,6 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) static void __cpuinit init_amd(struct cpuinfo_x86 *c) { - u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); - #ifdef CONFIG_SMP unsigned long long value; @@ -50,7 +239,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * Errata 63 for SH-B3 steppings * Errata 122 for all steppings (F+ have it disabled by default) */ - if (c->x86 == 15) { + if (c->x86 == 0xf) { rdmsrl(MSR_K7_HWCR, value); value |= 1 << 6; wrmsrl(MSR_K7_HWCR, value); @@ -73,192 +262,55 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) switch (c->x86) { case 4: - /* - * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux - * drivers subsequently pokes it, and changes the CPU speed. - * Workaround : Remove the unneeded alias. - */ -#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ -#define CBAR_ENB (0x80000000) -#define CBAR_KEY (0X000000CB) - if (c->x86_model == 9 || c->x86_model == 10) { - if (inl (CBAR) & CBAR_ENB) - outl (0 | CBAR_KEY, CBAR); - } - break; + init_amd_k5(c); + break; case 5: - if (c->x86_model < 6) { - /* Based on AMD doc 20734R - June 2000 */ - if (c->x86_model == 0) { - clear_cpu_cap(c, X86_FEATURE_APIC); - set_cpu_cap(c, X86_FEATURE_PGE); - } - break; - } - - if (c->x86_model == 6 && c->x86_mask == 1) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if (mbytes > 508) - mbytes = 508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF) == 0) { - unsigned long flags; - l = (1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - break; - } - - if ((c->x86_model == 8 && c->x86_mask > 7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if (mbytes > 4092) - mbytes = 4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000) == 0) { - unsigned long flags; - l = ((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - break; - } - - if (c->x86_model == 10) { - /* AMD Geode LX is model 10 */ - /* placeholder for any needed mods */ - break; - } - break; - case 6: /* An Athlon/Duron */ - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - break; - } - - switch (c->x86) { - case 15: - /* Use K8 tuning for Fam10h and Fam11h */ - case 0x10: - case 0x11: - set_cpu_cap(c, X86_FEATURE_K8); + init_amd_k6(c); break; - case 6: - set_cpu_cap(c, X86_FEATURE_K7); + case 6: /* An Athlon/Duron */ + init_amd_k7(c); break; } + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_cpu_cap(c, X86_FEATURE_MCE); + if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - display_cacheinfo(c); + if (!c->x86_model_id[0]) { + switch (c->x86) { + case 0xf: + /* Should distinguish Models here, but this is only + a fallback anyways. */ + strcpy(c->x86_model_id, "Hammer"); + break; + } + } - if (cpuid_eax(0x80000000) >= 0x80000008) - c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + display_cacheinfo(c); -#ifdef CONFIG_X86_HT - /* - * On a AMD multi core setup the lower bits of the APIC id - * distinguish the cores. - */ - if (c->x86_max_cores > 1) { - int cpu = smp_processor_id(); - unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; + /* Multi core CPU? */ + if (c->extended_cpuid_level >= 0x80000008) + amd_detect_cmp(c); - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - c->cpu_core_id = c->phys_proc_id & ((1<phys_proc_id >>= bits; - printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_max_cores, c->cpu_core_id); - } -#endif + detect_ht(c); - if (cpuid_eax(0x80000000) >= 0x80000006) { - if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) + if (c->extended_cpuid_level >= 0x80000006) { + if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) num_cache_leaves = 4; else num_cache_leaves = 3; } - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_cpu_cap(c, X86_FEATURE_MCE); + if (c->x86 >= 0xf && c->x86 <= 0x11) + set_cpu_cap(c, X86_FEATURE_K8); - if (cpu_has_xmm2) + if (cpu_has_xmm2) { + /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } } static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 8c2d07f06f1..7913e48f673 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -174,17 +174,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x80000008) amd_detect_cmp(c); - if (c->extended_cpuid_level >= 0x80000006 && - (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; + if (c->extended_cpuid_level >= 0x80000006) { + if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + } if (c->x86 >= 0xf && c->x86 <= 0x11) set_cpu_cap(c, X86_FEATURE_K8); - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + if (cpu_has_xmm2) { + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } if (c->x86 == 0x10) { /* do this for boot cpu */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5cde4b18448..9a57106c199 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -629,8 +629,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; -#ifdef CONFIG_X86_64 c->x86_coreid_bits = 0; +#ifdef CONFIG_X86_64 c->x86_clflush_size = 64; #else c->cpuid_level = -1; /* CPUID not detected */ -- cgit v1.2.3 From c58606ad551e9a1c85a9bd4f1698ca41ec279b2c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:51 -0700 Subject: x86: remove duplicated force_mwait Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd_64.c | 2 -- arch/x86/kernel/process.c | 1 - 2 files changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 7913e48f673..466a1eaa81e 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -9,8 +9,6 @@ #include "cpu.h" -int force_mwait __cpuinitdata; - #ifdef CONFIG_NUMA static int __cpuinit nearby_node(int apicid) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc4d5b0a6a..9f94bb1c811 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -15,7 +15,6 @@ unsigned long idle_nomwait; EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; -static int force_mwait __cpuinitdata; int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { -- cgit v1.2.3 From 2b86473604f6e9aef0b5e89c56798a90ccddcdbe Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:52 -0700 Subject: x86: add srat_detect_node for amd64 separate that from amd_detect_cmp() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd_64.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 466a1eaa81e..20c3f12dfe7 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -36,19 +36,23 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP unsigned bits; -#ifdef CONFIG_NUMA - int cpu = smp_processor_id(); - int node = 0; - unsigned apicid = hard_smp_processor_id(); -#endif + bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); /* Convert the initial APIC ID into the socket ID */ c->phys_proc_id = c->initial_apicid >> bits; +#endif +} +static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) +{ #ifdef CONFIG_NUMA + int cpu = smp_processor_id(); + int node; + unsigned apicid = hard_smp_processor_id(); + node = c->phys_proc_id; if (apicid_to_node[apicid] != NUMA_NO_NODE) node = apicid_to_node[apicid]; @@ -76,7 +80,6 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); #endif -#endif } static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) @@ -169,8 +172,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) + if (c->extended_cpuid_level >= 0x80000008) { amd_detect_cmp(c); + srat_detect_node(c); + } if (c->extended_cpuid_level >= 0x80000006) { if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) -- cgit v1.2.3 From 8d71a2ea0ad4ef9b9076ffd44726bad1f0ccf59b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:53 -0700 Subject: x86: merge header in amd_64.c Singed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 8 ++++++++ arch/x86/kernel/cpu/amd_64.c | 13 ++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a3a9e3cbdea..3c8090d1005 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1,11 +1,19 @@ #include #include #include + #include #include #include +#ifdef CONFIG_X86_64 +# include +# include +# include +#endif + #include + #include "cpu.h" /* diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 20c3f12dfe7..1d83601e85e 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -1,9 +1,16 @@ #include +#include #include -#include -#include -#include +#include +#include +#include + +#ifdef CONFIG_X86_64 +# include +# include +# include +#endif #include -- cgit v1.2.3 From 6c62aa4a3c12989a1f1fcbbe6f1ee5d4de4b2300 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:54 -0700 Subject: x86: make amd.c have 64bit support code Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 137 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 126 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3c8090d1005..32e73520adf 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -16,6 +16,7 @@ #include "cpu.h" +#ifdef CONFIG_X86_32 /* * B step AMD K6 before B 9730xxxx have hardware bugs that can cause * misexecution of code under Linux. Owners of such processors should @@ -177,6 +178,26 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K7); } +#endif + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +static int __cpuinit nearby_node(int apicid) +{ + int i, node; + + for (i = apicid - 1; i >= 0; i--) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + return first_node(node_online_map); /* Shouldn't happen */ +} +#endif /* * On a AMD dual core setup the lower bits of the APIC id distingush the cores. @@ -196,6 +217,42 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) #endif } +static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) +{ +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + int cpu = smp_processor_id(); + int node; + unsigned apicid = hard_smp_processor_id(); + + node = c->phys_proc_id; + if (apicid_to_node[apicid] != NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (!node_online(node)) { + /* Two possibilities here: + - The CPU is missing memory and no node was created. + In that case try picking one from a nearby CPU + - The APIC IDs differ from the HyperTransport node IDs + which the K8 northbridge parsing fills in. + Assume they are all increased by a constant offset, + but in the same order as the HT nodeids. + If that doesn't result in a usable node fall back to the + path for the previous case. */ + + int ht_nodeid = c->initial_apicid; + + if (ht_nodeid >= 0 && + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = apicid_to_node[ht_nodeid]; + /* Pick a nearby node */ + if (!node_online(node)) + node = nearby_node(apicid); + } + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +} + static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_HT @@ -226,13 +283,19 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { early_init_amd_mc(c); + /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSCALL32); +#else /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); + if (c->x86 == 5) + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); +#endif } static void __cpuinit init_amd(struct cpuinfo_x86 *c) @@ -256,18 +319,31 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) early_init_amd(c); - /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ - /* * Bit 31 in normal CPUID used for nonstandard 3DNow ID; * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_cpu_cap(c, 0*32+31); +#ifdef CONFIG_X86_64 + /* On C+ stepping K8 rep microcode works well for copy/memset */ + if (c->x86 == 0xf) { + u32 level; + + level = cpuid_eax(1); + if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + } + if (c->x86 == 0x10 || c->x86 == 0x11) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else + + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ + switch (c->x86) { case 4: init_amd_k5(c); @@ -283,7 +359,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* K6s reports MCEs but don't actually have all the MSRs */ if (c->x86 < 6) clear_cpu_cap(c, X86_FEATURE_MCE); +#endif + /* Enable workaround for FXSAVE leak */ if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); @@ -300,10 +378,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) + if (c->extended_cpuid_level >= 0x80000008) { amd_detect_cmp(c); + srat_detect_node(c); + } +#ifdef CONFIG_X86_32 detect_ht(c); +#endif if (c->extended_cpuid_level >= 0x80000006) { if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) @@ -319,8 +401,38 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } + +#ifdef CONFIG_X86_64 + if (c->x86 == 0x10) { + /* do this for boot cpu */ + if (c == &boot_cpu_data) + check_enable_amd_mmconf_dmi(); + + fam10h_check_enable_mmcfg(); + } + + if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { + unsigned long long tseg; + + /* + * Split up direct mapping around the TSEG SMM area. + * Don't do it for gbpages because there seems very little + * benefit in doing so. + */ + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { + printk(KERN_DEBUG "tseg: %010llx\n", tseg); + if ((tseg>>PMD_SHIFT) < + (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || + ((tseg>>PMD_SHIFT) < + (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && + (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) + set_memory_4k((unsigned long)__va(tseg), 1); + } + } +#endif } +#ifdef CONFIG_X86_32 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ @@ -333,10 +445,12 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int } return size; } +#endif static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, +#ifdef CONFIG_X86_32 .c_models = { { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = { @@ -349,9 +463,10 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { } }, }, + .c_size_cache = amd_size_cache, +#endif .c_early_init = early_init_amd, .c_init = init_amd, - .c_size_cache = amd_size_cache, .c_x86_vendor = X86_VENDOR_AMD, }; -- cgit v1.2.3 From 2a02505055fdd44958efd0e140dd87cb9fdecaf1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:55 -0700 Subject: x86: make amd_64 have 32 bit code Signed-off-by: Yinghai Lu Cc: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd_64.c | 259 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 247 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 1d83601e85e..32e73520adf 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -16,7 +16,171 @@ #include "cpu.h" -#ifdef CONFIG_NUMA +#ifdef CONFIG_X86_32 +/* + * B step AMD K6 before B 9730xxxx have hardware bugs that can cause + * misexecution of code under Linux. Owners of such processors should + * contact AMD for precise details and a CPU swap. + * + * See http://www.multimania.com/poulot/k6bug.html + * http://www.amd.com/K6/k6docs/revgd.html + * + * The following test is erm.. interesting. AMD neglected to up + * the chip setting when fixing the bug but they also tweaked some + * performance at the same time.. + */ + +extern void vide(void); +__asm__(".align 4\nvide: ret"); + +static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) +{ +/* + * General Systems BIOSen alias the cpu frequency registers + * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * drivers subsequently pokes it, and changes the CPU speed. + * Workaround : Remove the unneeded alias. + */ +#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ +#define CBAR_ENB (0x80000000) +#define CBAR_KEY (0X000000CB) + if (c->x86_model == 9 || c->x86_model == 10) { + if (inl (CBAR) & CBAR_ENB) + outl (0 | CBAR_KEY, CBAR); + } +} + + +static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) +{ + u32 l, h; + int mbytes = num_physpages >> (20-PAGE_SHIFT); + + if (c->x86_model < 6) { + /* Based on AMD doc 20734R - June 2000 */ + if (c->x86_model == 0) { + clear_cpu_cap(c, X86_FEATURE_APIC); + set_cpu_cap(c, X86_FEATURE_PGE); + } + return; + } + + if (c->x86_model == 6 && c->x86_mask == 1) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model == 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if (mbytes > 508) + mbytes = 508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF) == 0) { + unsigned long flags; + l = (1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + return; + } + + if ((c->x86_model == 8 && c->x86_mask > 7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if (mbytes > 4092) + mbytes = 4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000) == 0) { + unsigned long flags; + l = ((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + return; + } + + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + return; + } +} + +static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) +{ + u32 l, h; + + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_cpu_cap(c, X86_FEATURE_XMM); + } + } + + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, + ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + + set_cpu_cap(c, X86_FEATURE_K7); +} +#endif + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) static int __cpuinit nearby_node(int apicid) { int i, node; @@ -41,7 +205,7 @@ static int __cpuinit nearby_node(int apicid) */ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT unsigned bits; bits = c->x86_coreid_bits; @@ -55,7 +219,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) { -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) int cpu = smp_processor_id(); int node; unsigned apicid = hard_smp_processor_id(); @@ -91,7 +255,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT unsigned bits, ecx; /* Multi core CPU? */ @@ -112,7 +276,6 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) } c->x86_coreid_bits = bits; - #endif } @@ -124,15 +287,21 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSCALL32); +#else + /* Set MTRR capability flag if appropriate */ + if (c->x86 == 5) + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); +#endif } static void __cpuinit init_amd(struct cpuinfo_x86 *c) { - unsigned level; - #ifdef CONFIG_SMP - unsigned long value; + unsigned long long value; /* * Disable TLB flush filter by setting HWCR.FFDIS on K8 @@ -142,26 +311,55 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * Errata 122 for all steppings (F+ have it disabled by default) */ if (c->x86 == 0xf) { - rdmsrl(MSR_K8_HWCR, value); + rdmsrl(MSR_K7_HWCR, value); value |= 1 << 6; - wrmsrl(MSR_K8_HWCR, value); + wrmsrl(MSR_K7_HWCR, value); } #endif early_init_amd(c); - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + /* + * Bit 31 in normal CPUID used for nonstandard 3DNow ID; + * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + */ clear_cpu_cap(c, 0*32+31); +#ifdef CONFIG_X86_64 /* On C+ stepping K8 rep microcode works well for copy/memset */ if (c->x86 == 0xf) { + u32 level; + level = cpuid_eax(1); if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) set_cpu_cap(c, X86_FEATURE_REP_GOOD); } if (c->x86 == 0x10 || c->x86 == 0x11) set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else + + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ + + switch (c->x86) { + case 4: + init_amd_k5(c); + break; + case 5: + init_amd_k6(c); + break; + case 6: /* An Athlon/Duron */ + init_amd_k7(c); + break; + } + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_cpu_cap(c, X86_FEATURE_MCE); +#endif /* Enable workaround for FXSAVE leak */ if (c->x86 >= 6) @@ -176,6 +374,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) break; } } + display_cacheinfo(c); /* Multi core CPU? */ @@ -184,6 +383,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) srat_detect_node(c); } +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + if (c->extended_cpuid_level >= 0x80000006) { if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) num_cache_leaves = 4; @@ -199,6 +402,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } +#ifdef CONFIG_X86_64 if (c->x86 == 0x10) { /* do this for boot cpu */ if (c == &boot_cpu_data) @@ -225,11 +429,42 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) set_memory_4k((unsigned long)__va(tseg), 1); } } +#endif +} + +#ifdef CONFIG_X86_32 +static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) +{ + /* AMD errata T13 (order #21922) */ + if ((c->x86 == 6)) { + if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ + size = 64; + if (c->x86_model == 4 && + (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ + size = 256; + } + return size; } +#endif static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, +#ifdef CONFIG_X86_32 + .c_models = { + { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = + { + [3] = "486 DX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB", + [14] = "Am5x86-WT", + [15] = "Am5x86-WB" + } + }, + }, + .c_size_cache = amd_size_cache, +#endif .c_early_init = early_init_amd, .c_init = init_amd, .c_x86_vendor = X86_VENDOR_AMD, -- cgit v1.2.3 From ff73152ced60871f7d5fb7dee52fa499902a3c6d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:56 -0700 Subject: x86: make 64 bit to use amd.c arch/x86/kernel/cpu/amd.c is now 100% identical to arch/x86/kernel/cpu/amd_64.c, so use amd.c on 64-bit too and fix up the namespace impact. Simplify the Kconfig glue as well. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 3 +- arch/x86/kernel/cpu/amd_64.c | 473 ------------------------------------------- 2 files changed, 1 insertion(+), 475 deletions(-) delete mode 100644 arch/x86/kernel/cpu/amd_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index d031f248dfc..510d1bcb058 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -10,8 +10,7 @@ obj-$(CONFIG_X86_64) += bugs_64.o obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o -obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o -obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o +obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c deleted file mode 100644 index 32e73520adf..00000000000 --- a/arch/x86/kernel/cpu/amd_64.c +++ /dev/null @@ -1,473 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_X86_64 -# include -# include -# include -#endif - -#include - -#include "cpu.h" - -#ifdef CONFIG_X86_32 -/* - * B step AMD K6 before B 9730xxxx have hardware bugs that can cause - * misexecution of code under Linux. Owners of such processors should - * contact AMD for precise details and a CPU swap. - * - * See http://www.multimania.com/poulot/k6bug.html - * http://www.amd.com/K6/k6docs/revgd.html - * - * The following test is erm.. interesting. AMD neglected to up - * the chip setting when fixing the bug but they also tweaked some - * performance at the same time.. - */ - -extern void vide(void); -__asm__(".align 4\nvide: ret"); - -static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) -{ -/* - * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux - * drivers subsequently pokes it, and changes the CPU speed. - * Workaround : Remove the unneeded alias. - */ -#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ -#define CBAR_ENB (0x80000000) -#define CBAR_KEY (0X000000CB) - if (c->x86_model == 9 || c->x86_model == 10) { - if (inl (CBAR) & CBAR_ENB) - outl (0 | CBAR_KEY, CBAR); - } -} - - -static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) -{ - u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); - - if (c->x86_model < 6) { - /* Based on AMD doc 20734R - June 2000 */ - if (c->x86_model == 0) { - clear_cpu_cap(c, X86_FEATURE_APIC); - set_cpu_cap(c, X86_FEATURE_PGE); - } - return; - } - - if (c->x86_model == 6 && c->x86_mask == 1) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if (mbytes > 508) - mbytes = 508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF) == 0) { - unsigned long flags; - l = (1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - return; - } - - if ((c->x86_model == 8 && c->x86_mask > 7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if (mbytes > 4092) - mbytes = 4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000) == 0) { - unsigned long flags; - l = ((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - return; - } - - if (c->x86_model == 10) { - /* AMD Geode LX is model 10 */ - /* placeholder for any needed mods */ - return; - } -} - -static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) -{ - u32 l, h; - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - - set_cpu_cap(c, X86_FEATURE_K7); -} -#endif - -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) -static int __cpuinit nearby_node(int apicid) -{ - int i, node; - - for (i = apicid - 1; i >= 0; i--) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - return first_node(node_online_map); /* Shouldn't happen */ -} -#endif - -/* - * On a AMD dual core setup the lower bits of the APIC id distingush the cores. - * Assumes number of cores is a power of two. - */ -static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - unsigned bits; - - bits = c->x86_coreid_bits; - - /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->phys_proc_id = c->initial_apicid >> bits; -#endif -} - -static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) -{ -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - int cpu = smp_processor_id(); - int node; - unsigned apicid = hard_smp_processor_id(); - - node = c->phys_proc_id; - if (apicid_to_node[apicid] != NUMA_NO_NODE) - node = apicid_to_node[apicid]; - if (!node_online(node)) { - /* Two possibilities here: - - The CPU is missing memory and no node was created. - In that case try picking one from a nearby CPU - - The APIC IDs differ from the HyperTransport node IDs - which the K8 northbridge parsing fills in. - Assume they are all increased by a constant offset, - but in the same order as the HT nodeids. - If that doesn't result in a usable node fall back to the - path for the previous case. */ - - int ht_nodeid = c->initial_apicid; - - if (ht_nodeid >= 0 && - apicid_to_node[ht_nodeid] != NUMA_NO_NODE) - node = apicid_to_node[ht_nodeid]; - /* Pick a nearby node */ - if (!node_online(node)) - node = nearby_node(apicid); - } - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -} - -static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; -#endif -} - -static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) -{ - early_init_amd_mc(c); - - /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSCALL32); -#else - /* Set MTRR capability flag if appropriate */ - if (c->x86 == 5) - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); -#endif -} - -static void __cpuinit init_amd(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned long long value; - - /* - * Disable TLB flush filter by setting HWCR.FFDIS on K8 - * bit 6 of msr C001_0015 - * - * Errata 63 for SH-B3 steppings - * Errata 122 for all steppings (F+ have it disabled by default) - */ - if (c->x86 == 0xf) { - rdmsrl(MSR_K7_HWCR, value); - value |= 1 << 6; - wrmsrl(MSR_K7_HWCR, value); - } -#endif - - early_init_amd(c); - - /* - * Bit 31 in normal CPUID used for nonstandard 3DNow ID; - * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway - */ - clear_cpu_cap(c, 0*32+31); - -#ifdef CONFIG_X86_64 - /* On C+ stepping K8 rep microcode works well for copy/memset */ - if (c->x86 == 0xf) { - u32 level; - - level = cpuid_eax(1); - if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - } - if (c->x86 == 0x10 || c->x86 == 0x11) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else - - /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ - - switch (c->x86) { - case 4: - init_amd_k5(c); - break; - case 5: - init_amd_k6(c); - break; - case 6: /* An Athlon/Duron */ - init_amd_k7(c); - break; - } - - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_cpu_cap(c, X86_FEATURE_MCE); -#endif - - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - - if (!c->x86_model_id[0]) { - switch (c->x86) { - case 0xf: - /* Should distinguish Models here, but this is only - a fallback anyways. */ - strcpy(c->x86_model_id, "Hammer"); - break; - } - } - - display_cacheinfo(c); - - /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) { - amd_detect_cmp(c); - srat_detect_node(c); - } - -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - - if (c->extended_cpuid_level >= 0x80000006) { - if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; - } - - if (c->x86 >= 0xf && c->x86 <= 0x11) - set_cpu_cap(c, X86_FEATURE_K8); - - if (cpu_has_xmm2) { - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 0x10) { - /* do this for boot cpu */ - if (c == &boot_cpu_data) - check_enable_amd_mmconf_dmi(); - - fam10h_check_enable_mmcfg(); - } - - if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { - printk(KERN_DEBUG "tseg: %010llx\n", tseg); - if ((tseg>>PMD_SHIFT) < - (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || - ((tseg>>PMD_SHIFT) < - (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && - (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) - set_memory_4k((unsigned long)__va(tseg), 1); - } - } -#endif -} - -#ifdef CONFIG_X86_32 -static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) -{ - /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { - if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ - size = 64; - if (c->x86_model == 4 && - (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ - size = 256; - } - return size; -} -#endif - -static struct cpu_dev amd_cpu_dev __cpuinitdata = { - .c_vendor = "AMD", - .c_ident = { "AuthenticAMD" }, -#ifdef CONFIG_X86_32 - .c_models = { - { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = - { - [3] = "486 DX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB", - [14] = "Am5x86-WT", - [15] = "Am5x86-WB" - } - }, - }, - .c_size_cache = amd_size_cache, -#endif - .c_early_init = early_init_amd, - .c_init = init_amd, - .c_x86_vendor = X86_VENDOR_AMD, -}; - -cpu_dev_register(amd_cpu_dev); -- cgit v1.2.3 From f69feff720497237ae9dd2f4604921bd3080c421 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:58 -0700 Subject: x86: little clean up of intel.c/intel_64.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 7 +++---- arch/x86/kernel/cpu/intel_64.c | 8 +++----- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 959417b8cd6..4a8ac9d4ff5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -76,7 +76,7 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax, ebx, ecx, edx; @@ -183,7 +183,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) * let's use the legacy cpuid vector 0x1 and 0x4 for topology * detection. */ - c->x86_max_cores = num_cpu_cores(c); + c->x86_max_cores = intel_num_cpu_cores(c); detect_ht(c); } @@ -210,9 +210,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (c->x86 == 15) { + if (c->x86 == 15) set_cpu_cap(c, X86_FEATURE_P4); - } if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_P3); if (cpu_has_ds) { diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 14a2cd98d48..aef4f282631 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -71,17 +71,15 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); - } - - - if (cpu_has_bts) ds_init_intel(c); + } if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); detect_extended_topology(c); if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) -- cgit v1.2.3 From de9f521fb72dd091aa4989fe2e004ecf4785a850 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 8 Sep 2008 18:10:11 +0900 Subject: x86: move pci-nommu's dma_mask check to common code The check to see if dev->dma_mask is NULL in pci-nommu is more appropriate for dma_alloc_coherent(). Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 73853d3fdca..0f51883cc6a 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -80,9 +80,6 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, int node; struct page *page; - if (hwdev->dma_mask == NULL) - return NULL; - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); gfp |= __GFP_ZERO; -- cgit v1.2.3 From 8a53ad675f86ee003482b557da944e070d3c4859 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 8 Sep 2008 18:10:12 +0900 Subject: x86: fix nommu_alloc_coherent allocation with NULL device argument We need to use __GFP_DMA for NULL device argument (fallback_dev) with pci-nommu. It's a hack for ISA (and some old code) so we need to use GFP_DMA. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 0f51883cc6a..ada1c87cafc 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -80,7 +80,6 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, int node; struct page *page; - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); gfp |= __GFP_ZERO; dma_mask = hwdev->coherent_dma_mask; @@ -93,7 +92,7 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, node = dev_to_node(hwdev); #ifdef CONFIG_X86_64 - if (dma_mask <= DMA_32BIT_MASK) + if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) gfp |= GFP_DMA32; #endif -- cgit v1.2.3 From 823e7e8c6ef12cd1943dc42fe7595ca74e8cc3d7 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 8 Sep 2008 18:10:13 +0900 Subject: x86: dma_alloc_coherent sets gfp flags properly Non real IOMMU implemenations (which doesn't do virtual mappings, e.g. swiotlb, pci-nommu, etc) need to use proper gfp flags and dma_mask to allocate pages in their own dma_alloc_coherent() (allocated page need to be suitable for device's coherent_dma_mask). This patch makes dma_alloc_coherent do this job so that IOMMUs don't need to take care of it any more. Real IOMMU implemenataions can simply ignore the gfp flags. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index ada1c87cafc..8e398b56f50 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -80,26 +80,11 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, int node; struct page *page; - gfp |= __GFP_ZERO; - - dma_mask = hwdev->coherent_dma_mask; - if (!dma_mask) - dma_mask = *(hwdev->dma_mask); + dma_mask = dma_alloc_coherent_mask(hwdev, gfp); - if (dma_mask < DMA_24BIT_MASK) - return NULL; + gfp |= __GFP_ZERO; node = dev_to_node(hwdev); - -#ifdef CONFIG_X86_64 - if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) - gfp |= GFP_DMA32; -#endif - - /* No alloc-free penalty for ISA devices */ - if (dma_mask == DMA_24BIT_MASK) - gfp |= GFP_DMA; - again: page = alloc_pages_node(node, gfp, get_order(size)); if (!page) -- cgit v1.2.3 From e545a6140b698b2494daf0b32107bdcc5e901390 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 7 Sep 2008 16:57:22 +0200 Subject: kernel/cpu.c: create a CPU_STARTING cpu_chain notifier Right now, there is no notifier that is called on a new cpu, before the new cpu begins processing interrupts/softirqs. Various kernel function would need that notification, e.g. kvm works around by calling smp_call_function_single(), rcu polls cpu_online_map. The patch adds a CPU_STARTING notification. It also adds a helper function that sends the message to all cpu_chain handlers. Tested on x86-64. All other archs are untested. Especially on sparc, I'm not sure if I got it right. Signed-off-by: Manfred Spraul Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7985c5b3f91..0b8261c3cac 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -257,6 +257,7 @@ static void __cpuinit smp_callin(void) end_local_APIC_setup(); map_cpu_to_logical_apicid(); + notify_cpu_starting(cpuid); /* * Get our bogomips. * -- cgit v1.2.3 From d6be118a97ce51ca84035270f91c2bccecbfac5f Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 9 Sep 2008 09:56:08 -0400 Subject: x86: fix memmap=exactmap boot argument When using kdump modifying the e820 map is yielding strange results. For example starting with BIOS-provided physical RAM map: BIOS-e820: 0000000000000100 - 0000000000093400 (usable) BIOS-e820: 0000000000093400 - 00000000000a0000 (reserved) BIOS-e820: 0000000000100000 - 000000003fee0000 (usable) BIOS-e820: 000000003fee0000 - 000000003fef3000 (ACPI data) BIOS-e820: 000000003fef3000 - 000000003ff80000 (ACPI NVS) BIOS-e820: 000000003ff80000 - 0000000040000000 (reserved) BIOS-e820: 00000000e0000000 - 00000000f0000000 (reserved) BIOS-e820: 00000000fec00000 - 00000000fec10000 (reserved) BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved) BIOS-e820: 00000000ff000000 - 0000000100000000 (reserved) and booting with args memmap=exactmap memmap=640K@0K memmap=5228K@16384K memmap=125188K@22252K memmap=76K#1047424K memmap=564K#1047500K resulted in: user-defined physical RAM map: user: 0000000000000000 - 0000000000093400 (usable) user: 0000000000093400 - 00000000000a0000 (reserved) user: 0000000000100000 - 000000003fee0000 (usable) user: 000000003fee0000 - 000000003fef3000 (ACPI data) user: 000000003fef3000 - 000000003ff80000 (ACPI NVS) user: 000000003ff80000 - 0000000040000000 (reserved) user: 00000000e0000000 - 00000000f0000000 (reserved) user: 00000000fec00000 - 00000000fec10000 (reserved) user: 00000000fee00000 - 00000000fee01000 (reserved) user: 00000000ff000000 - 0000000100000000 (reserved) But should have resulted in: user-defined physical RAM map: user: 0000000000000000 - 00000000000a0000 (usable) user: 0000000001000000 - 000000000151b000 (usable) user: 00000000015bb000 - 0000000008ffc000 (usable) user: 000000003fee0000 - 000000003ff80000 (ACPI data) This is happening because of an improper usage of strcmp() in the e820 parsing code. The strcmp() always returns !0 and never resets the value for e820.nr_map and returns an incorrect user-defined map. This patch fixes the problem. Signed-off-by: Prarit Bhargava Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9af89078f7b..66e48aa2dd1 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1203,7 +1203,7 @@ static int __init parse_memmap_opt(char *p) if (!p) return -EINVAL; - if (!strcmp(p, "exactmap")) { + if (!strncmp(p, "exactmap", 8)) { #ifdef CONFIG_CRASH_DUMP /* * If we are doing a crash dump, we still need to know -- cgit v1.2.3 From 185f3b9da24c09c26b784591ed354fe57998a7b1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:35 -0700 Subject: x86: make intel.c have 64-bit support code prepare for unification. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 119 ++++++++++++++++++++++++++++++-------------- 1 file changed, 83 insertions(+), 36 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a66989586a8..365a008080c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -15,6 +15,11 @@ #include #include +#ifdef CONFIG_X86_64 +#include +#include +#endif + #include "cpu.h" #ifdef CONFIG_X86_LOCAL_APIC @@ -25,14 +30,20 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { - /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ - if (c->x86 == 15 && c->x86_cache_alignment == 64) - c->x86_cache_alignment = 128; if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSENTER32); +#else + /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ + if (c->x86 == 15 && c->x86_cache_alignment == 64) + c->x86_cache_alignment = 128; +#endif } +#ifdef CONFIG_X86_32 /* * Early probe support logic for ppro memory erratum #50 * @@ -73,6 +84,40 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) } + +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); +} +#endif +#endif + +static void __cpuinit srat_detect_node(void) +{ +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + unsigned node; + int cpu = smp_processor_id(); + int apicid = hard_smp_processor_id(); + + /* Don't do the funky fallback heuristics the AMD version employs + for now. */ + node = apicid_to_node[apicid]; + if (node == NUMA_NO_NODE || !node_online(node)) + node = first_node(node_online_map); + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +} + /* * find out the number of processor cores on the die */ @@ -91,20 +136,6 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -#ifdef CONFIG_X86_F00F_BUG -static void __cpuinit trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif - static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; @@ -139,6 +170,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } +#ifdef CONFIG_X86_32 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) clear_cpu_cap(c, X86_FEATURE_SEP); @@ -176,18 +208,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (p) strcpy(c->x86_model_id, p); - detect_extended_topology(c); - - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - c->x86_max_cores = intel_num_cpu_cores(c); - detect_ht(c); - } - - /* Work around errata */ Intel_errata_workarounds(c); #ifdef CONFIG_X86_INTEL_USERCOPY @@ -206,14 +226,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) movsl_mask.mask = 7; break; } +#endif + #endif if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (c->x86 == 15) - set_cpu_cap(c, X86_FEATURE_P4); - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_P3); if (cpu_has_ds) { unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); @@ -224,6 +242,17 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } +#ifdef CONFIG_X86_64 + if (c->x86 == 15) + c->x86_cache_alignment = c->x86_clflush_size * 2; + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else + if (c->x86 == 15) + set_cpu_cap(c, X86_FEATURE_P4); + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_P3); + if (cpu_has_bts) ptrace_bts_init_intel(c); @@ -240,8 +269,25 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_NUMAQ numaq_tsc_disable(); #endif +#endif + + detect_extended_topology(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ + c->x86_max_cores = intel_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + } + + /* Work around errata */ + srat_detect_node(); } +#ifdef CONFIG_X86_32 static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* @@ -254,10 +300,12 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i size = 256; return size; } +#endif static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, +#ifdef CONFIG_X86_32 .c_models = { { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = { @@ -307,13 +355,12 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { } }, }, + .c_size_cache = intel_size_cache, +#endif .c_early_init = early_init_intel, .c_init = init_intel, - .c_size_cache = intel_size_cache, .c_x86_vendor = X86_VENDOR_INTEL, }; cpu_dev_register(intel_cpu_dev); -/* arch_initcall(intel_cpu_init); */ - -- cgit v1.2.3 From 58602c1681bdfa1a0deaa5574b8a72d6e30c0e97 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:36 -0700 Subject: x86: make intel_64.c the same as intel.c No change in functionality intended - this only adds the 32-bit side. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_64.c | 301 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 284 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index aef4f282631..365a008080c 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -1,42 +1,108 @@ #include +#include + +#include +#include #include +#include +#include + #include +#include +#include +#include #include +#include +#include + +#ifdef CONFIG_X86_64 #include #include +#endif #include "cpu.h" +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#include +#endif + static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) + (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); +#else + /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ + if (c->x86 == 15 && c->x86_cache_alignment == 64) + c->x86_cache_alignment = 128; +#endif } +#ifdef CONFIG_X86_32 /* - * find out the number of processor cores on the die + * Early probe support logic for ppro memory erratum #50 + * + * This is called before we do cpu ident work */ -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, t; - if (c->cpuid_level < 4) +int __cpuinit ppro_with_ram_bug(void) +{ + /* Uses data from early_cpu_detect now */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 1 && + boot_cpu_data.x86_mask < 8) { + printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; + } + return 0; +} - cpuid_count(4, 0, &eax, &t, &t, &t); - if (eax & 0x1f) - return ((eax >> 26) + 1); - else - return 1; +/* + * P4 Xeon errata 037 workaround. + * Hardware prefetcher may cause stale data to be loaded into the cache. + */ +static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) +{ + unsigned long lo, hi; + + if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { + rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); + if ((lo & (1<<9)) == 0) { + printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); + printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); + lo |= (1<<9); /* Disable hw prefetching */ + wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); + } + } +} + + + +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); } +#endif +#endif static void __cpuinit srat_detect_node(void) { -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) unsigned node; int cpu = smp_processor_id(); int apicid = hard_smp_processor_id(); @@ -52,11 +118,51 @@ static void __cpuinit srat_detect_node(void) #endif } +/* + * find out the number of processor cores on the die + */ +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) +{ + unsigned int eax, ebx, ecx, edx; + + if (c->cpuid_level < 4) + return 1; + + /* Intel has a non-standard dependency on %ecx for this CPUID level. */ + cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); + if (eax & 0x1f) + return ((eax >> 26) + 1); + else + return 1; +} + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { + unsigned int l2 = 0; + char *p = NULL; + early_init_intel(c); - init_intel_cacheinfo(c); +#ifdef CONFIG_X86_F00F_BUG + /* + * All current models of Pentium and Pentium with MMX technology CPUs + * have the F0 0F bug, which lets nonprivileged users lock up the system. + * Note that the workaround only should be initialized once... + */ + c->f00f_bug = 0; + if (!paravirt_enabled() && c->x86 == 5) { + static int f00f_workaround_enabled; + + c->f00f_bug = 1; + if (!f00f_workaround_enabled) { + trap_init_f00f_bug(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } + } +#endif + + l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); /* Check for version and the number of counters */ @@ -64,8 +170,70 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } +#ifdef CONFIG_X86_32 + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_cpu_cap(c, X86_FEATURE_SEP); + + /* + * Names for the Pentium II/Celeron processors + * detectable only by also checking the cache size. + * Dixon is NOT a Celeron. + */ + if (c->x86 == 6) { + switch (c->x86_model) { + case 5: + if (c->x86_mask == 0) { + if (l2 == 0) + p = "Celeron (Covington)"; + else if (l2 == 256) + p = "Mobile Pentium II (Dixon)"; + } + break; + + case 6: + if (l2 == 128) + p = "Celeron (Mendocino)"; + else if (c->x86_mask == 0 || c->x86_mask == 5) + p = "Celeron-A"; + break; + + case 8: + if (l2 == 128) + p = "Celeron (Coppermine)"; + break; + } + } + + if (p) + strcpy(c->x86_model_id, p); + + Intel_errata_workarounds(c); + +#ifdef CONFIG_X86_INTEL_USERCOPY + /* + * Set up the preferred alignment for movsl bulk memory moves + */ + switch (c->x86) { + case 4: /* 486: untested */ + break; + case 5: /* Old Pentia: untested */ + break; + case 6: /* PII/PIII only like movsl with 8-byte alignment */ + movsl_mask.mask = 7; + break; + case 15: /* P4 is OK down to 8-byte alignment */ + movsl_mask.mask = 7; + break; + } +#endif + +#endif + + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); if (cpu_has_ds) { - unsigned int l1, l2; + unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); if (!(l1 & (1<<11))) set_cpu_cap(c, X86_FEATURE_BTS); @@ -74,26 +242,125 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } +#ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +#else + if (c->x86 == 15) + set_cpu_cap(c, X86_FEATURE_P4); + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_P3); + + if (cpu_has_bts) + ptrace_bts_init_intel(c); + + /* + * See if we have a good local APIC by checking for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ + if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && + (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + set_cpu_cap(c, X86_FEATURE_11AP); + +#ifdef CONFIG_X86_NUMAQ + numaq_tsc_disable(); +#endif +#endif detect_extended_topology(c); - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ c->x86_max_cores = intel_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + } + /* Work around errata */ srat_detect_node(); } +#ifdef CONFIG_X86_32 +static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) +{ + /* + * Intel PIII Tualatin. This comes in two flavours. + * One has 256kb of cache, the other 512. We have no way + * to determine which, so we use a boottime override + * for the 512kb model, and assume 256 otherwise. + */ + if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) + size = 256; + return size; +} +#endif + static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, +#ifdef CONFIG_X86_32 + .c_models = { + { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = + { + [0] = "486 DX-25/33", + [1] = "486 DX-50", + [2] = "486 SX", + [3] = "486 DX/2", + [4] = "486 SL", + [5] = "486 SX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = + { + [0] = "Pentium 60/66 A-step", + [1] = "Pentium 60/66", + [2] = "Pentium 75 - 200", + [3] = "OverDrive PODP5V83", + [4] = "Pentium MMX", + [7] = "Mobile Pentium 75 - 200", + [8] = "Mobile Pentium MMX" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = + { + [0] = "Pentium Pro A-step", + [1] = "Pentium Pro", + [3] = "Pentium II (Klamath)", + [4] = "Pentium II (Deschutes)", + [5] = "Pentium II (Deschutes)", + [6] = "Mobile Pentium II", + [7] = "Pentium III (Katmai)", + [8] = "Pentium III (Coppermine)", + [10] = "Pentium III (Cascades)", + [11] = "Pentium III (Tualatin)", + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = + { + [0] = "Pentium 4 (Unknown)", + [1] = "Pentium 4 (Willamette)", + [2] = "Pentium 4 (Northwood)", + [4] = "Pentium 4 (Foster)", + [5] = "Pentium 4 (Foster)", + } + }, + }, + .c_size_cache = intel_size_cache, +#endif .c_early_init = early_init_intel, .c_init = init_intel, .c_x86_vendor = X86_VENDOR_INTEL, }; cpu_dev_register(intel_cpu_dev); + -- cgit v1.2.3 From 879d792b66d633bbe466974f61d1acc9aa8c78eb Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:37 -0700 Subject: x86: let intel 64-bit use intel.c now that arch/x86/kernel/cpu/intel_64.c and arch/x86/kernel/cpu/intel.c are equal, drop arch/x86/kernel/cpu/intel_64.c and fix up the glue. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 3 +- arch/x86/kernel/cpu/intel_64.c | 366 ----------------------------------------- 2 files changed, 1 insertion(+), 368 deletions(-) delete mode 100644 arch/x86/kernel/cpu/intel_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 510d1bcb058..7f0b45a5d78 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -8,8 +8,7 @@ obj-y += proc.o capflags.o powerflags.o common.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o -obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o -obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c deleted file mode 100644 index 365a008080c..00000000000 --- a/arch/x86/kernel/cpu/intel_64.c +++ /dev/null @@ -1,366 +0,0 @@ -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_X86_64 -#include -#include -#endif - -#include "cpu.h" - -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#include -#endif - -static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) -{ - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -#else - /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ - if (c->x86 == 15 && c->x86_cache_alignment == 64) - c->x86_cache_alignment = 128; -#endif -} - -#ifdef CONFIG_X86_32 -/* - * Early probe support logic for ppro memory erratum #50 - * - * This is called before we do cpu ident work - */ - -int __cpuinit ppro_with_ram_bug(void) -{ - /* Uses data from early_cpu_detect now */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask < 8) { - printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); - return 1; - } - return 0; -} - - -/* - * P4 Xeon errata 037 workaround. - * Hardware prefetcher may cause stale data to be loaded into the cache. - */ -static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) -{ - unsigned long lo, hi; - - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { - rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); - if ((lo & (1<<9)) == 0) { - printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); - printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); - lo |= (1<<9); /* Disable hw prefetching */ - wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); - } - } -} - - - -#ifdef CONFIG_X86_F00F_BUG -static void __cpuinit trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif -#endif - -static void __cpuinit srat_detect_node(void) -{ -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - unsigned node; - int cpu = smp_processor_id(); - int apicid = hard_smp_processor_id(); - - /* Don't do the funky fallback heuristics the AMD version employs - for now. */ - node = apicid_to_node[apicid]; - if (node == NUMA_NO_NODE || !node_online(node)) - node = first_node(node_online_map); - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -} - -/* - * find out the number of processor cores on the die - */ -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, ebx, ecx, edx; - - if (c->cpuid_level < 4) - return 1; - - /* Intel has a non-standard dependency on %ecx for this CPUID level. */ - cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); - if (eax & 0x1f) - return ((eax >> 26) + 1); - else - return 1; -} - -static void __cpuinit init_intel(struct cpuinfo_x86 *c) -{ - unsigned int l2 = 0; - char *p = NULL; - - early_init_intel(c); - -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if (!paravirt_enabled() && c->x86 == 5) { - static int f00f_workaround_enabled; - - c->f00f_bug = 1; - if (!f00f_workaround_enabled) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif - - l2 = init_intel_cacheinfo(c); - if (c->cpuid_level > 9) { - unsigned eax = cpuid_eax(10); - /* Check for version and the number of counters */ - if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) - set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); - } - -#ifdef CONFIG_X86_32 - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_cpu_cap(c, X86_FEATURE_SEP); - - /* - * Names for the Pentium II/Celeron processors - * detectable only by also checking the cache size. - * Dixon is NOT a Celeron. - */ - if (c->x86 == 6) { - switch (c->x86_model) { - case 5: - if (c->x86_mask == 0) { - if (l2 == 0) - p = "Celeron (Covington)"; - else if (l2 == 256) - p = "Mobile Pentium II (Dixon)"; - } - break; - - case 6: - if (l2 == 128) - p = "Celeron (Mendocino)"; - else if (c->x86_mask == 0 || c->x86_mask == 5) - p = "Celeron-A"; - break; - - case 8: - if (l2 == 128) - p = "Celeron (Coppermine)"; - break; - } - } - - if (p) - strcpy(c->x86_model_id, p); - - Intel_errata_workarounds(c); - -#ifdef CONFIG_X86_INTEL_USERCOPY - /* - * Set up the preferred alignment for movsl bulk memory moves - */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ - movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; - } -#endif - -#endif - - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (cpu_has_ds) { - unsigned int l1; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_cpu_cap(c, X86_FEATURE_BTS); - if (!(l1 & (1<<12))) - set_cpu_cap(c, X86_FEATURE_PEBS); - ds_init_intel(c); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 15) - c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else - if (c->x86 == 15) - set_cpu_cap(c, X86_FEATURE_P4); - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_P3); - - if (cpu_has_bts) - ptrace_bts_init_intel(c); - - /* - * See if we have a good local APIC by checking for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ - if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) - set_cpu_cap(c, X86_FEATURE_11AP); - -#ifdef CONFIG_X86_NUMAQ - numaq_tsc_disable(); -#endif -#endif - - detect_extended_topology(c); - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - c->x86_max_cores = intel_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - } - - /* Work around errata */ - srat_detect_node(); -} - -#ifdef CONFIG_X86_32 -static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) -{ - /* - * Intel PIII Tualatin. This comes in two flavours. - * One has 256kb of cache, the other 512. We have no way - * to determine which, so we use a boottime override - * for the 512kb model, and assume 256 otherwise. - */ - if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) - size = 256; - return size; -} -#endif - -static struct cpu_dev intel_cpu_dev __cpuinitdata = { - .c_vendor = "Intel", - .c_ident = { "GenuineIntel" }, -#ifdef CONFIG_X86_32 - .c_models = { - { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = - { - [0] = "486 DX-25/33", - [1] = "486 DX-50", - [2] = "486 SX", - [3] = "486 DX/2", - [4] = "486 SL", - [5] = "486 SX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = - { - [0] = "Pentium 60/66 A-step", - [1] = "Pentium 60/66", - [2] = "Pentium 75 - 200", - [3] = "OverDrive PODP5V83", - [4] = "Pentium MMX", - [7] = "Mobile Pentium 75 - 200", - [8] = "Mobile Pentium MMX" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = - { - [0] = "Pentium Pro A-step", - [1] = "Pentium Pro", - [3] = "Pentium II (Klamath)", - [4] = "Pentium II (Deschutes)", - [5] = "Pentium II (Deschutes)", - [6] = "Mobile Pentium II", - [7] = "Pentium III (Katmai)", - [8] = "Pentium III (Coppermine)", - [10] = "Pentium III (Cascades)", - [11] = "Pentium III (Tualatin)", - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = - { - [0] = "Pentium 4 (Unknown)", - [1] = "Pentium 4 (Willamette)", - [2] = "Pentium 4 (Northwood)", - [4] = "Pentium 4 (Foster)", - [5] = "Pentium 4 (Foster)", - } - }, - }, - .c_size_cache = intel_size_cache, -#endif - .c_early_init = early_init_intel, - .c_init = init_intel, - .c_x86_vendor = X86_VENDOR_INTEL, -}; - -cpu_dev_register(intel_cpu_dev); - -- cgit v1.2.3 From 4052704d92c3172ebc13cc0cc8df8ba2bd446a5c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:38 -0700 Subject: x86: intel.c put workaround for old cpus together consolidate the code some more. No change in functionality intended. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 192 ++++++++++++++++++++++---------------------- 1 file changed, 98 insertions(+), 94 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 365a008080c..5f76bf139fd 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -63,15 +63,54 @@ int __cpuinit ppro_with_ram_bug(void) return 0; } +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); -/* - * P4 Xeon errata 037 workaround. - * Hardware prefetcher may cause stale data to be loaded into the cache. - */ -static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); +} +#endif + +static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; +#ifdef CONFIG_X86_F00F_BUG + /* + * All current models of Pentium and Pentium with MMX technology CPUs + * have the F0 0F bug, which lets nonprivileged users lock up the system. + * Note that the workaround only should be initialized once... + */ + c->f00f_bug = 0; + if (!paravirt_enabled() && c->x86 == 5) { + static int f00f_workaround_enabled; + + c->f00f_bug = 1; + if (!f00f_workaround_enabled) { + trap_init_f00f_bug(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } + } +#endif + + /* + * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until + * model 3 mask 3 + */ + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_cpu_cap(c, X86_FEATURE_SEP); + + /* + * P4 Xeon errata 037 workaround. + * Hardware prefetcher may cause stale data to be loaded into the cache. + */ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); if ((lo & (1<<9)) == 0) { @@ -81,23 +120,44 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); } } -} - + /* + * See if we have a good local APIC by checking for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ + if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && + (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + set_cpu_cap(c, X86_FEATURE_11AP); -#ifdef CONFIG_X86_F00F_BUG -static void __cpuinit trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); +#ifdef CONFIG_X86_INTEL_USERCOPY /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. + * Set up the preferred alignment for movsl bulk memory moves */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} + switch (c->x86) { + case 4: /* 486: untested */ + break; + case 5: /* Old Pentia: untested */ + break; + case 6: /* PII/PIII only like movsl with 8-byte alignment */ + movsl_mask.mask = 7; + break; + case 15: /* P4 is OK down to 8-byte alignment */ + movsl_mask.mask = 7; + break; + } #endif + +#ifdef CONFIG_X86_NUMAQ + numaq_tsc_disable(); +#endif +} +#else +static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) +{ +} #endif static void __cpuinit srat_detect_node(void) @@ -139,28 +199,10 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; - char *p = NULL; early_init_intel(c); -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if (!paravirt_enabled() && c->x86 == 5) { - static int f00f_workaround_enabled; - - c->f00f_bug = 1; - if (!f00f_workaround_enabled) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif + intel_workarounds(c); l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9) { @@ -170,17 +212,32 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } -#ifdef CONFIG_X86_32 - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_cpu_cap(c, X86_FEATURE_SEP); + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + if (cpu_has_ds) { + unsigned int l1; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_cpu_cap(c, X86_FEATURE_BTS); + if (!(l1 & (1<<12))) + set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); + } +#ifdef CONFIG_X86_64 + if (c->x86 == 15) + c->x86_cache_alignment = c->x86_clflush_size * 2; + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else /* * Names for the Pentium II/Celeron processors * detectable only by also checking the cache size. * Dixon is NOT a Celeron. */ if (c->x86 == 6) { + char *p = NULL; + switch (c->x86_model) { case 5: if (c->x86_mask == 0) { @@ -203,51 +260,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) p = "Celeron (Coppermine)"; break; } - } - if (p) - strcpy(c->x86_model_id, p); - - Intel_errata_workarounds(c); - -#ifdef CONFIG_X86_INTEL_USERCOPY - /* - * Set up the preferred alignment for movsl bulk memory moves - */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ - movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; + if (p) + strcpy(c->x86_model_id, p); } -#endif - -#endif - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (cpu_has_ds) { - unsigned int l1; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_cpu_cap(c, X86_FEATURE_BTS); - if (!(l1 & (1<<12))) - set_cpu_cap(c, X86_FEATURE_PEBS); - ds_init_intel(c); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 15) - c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else if (c->x86 == 15) set_cpu_cap(c, X86_FEATURE_P4); if (c->x86 == 6) @@ -256,19 +273,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_bts) ptrace_bts_init_intel(c); - /* - * See if we have a good local APIC by checking for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ - if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) - set_cpu_cap(c, X86_FEATURE_11AP); - -#ifdef CONFIG_X86_NUMAQ - numaq_tsc_disable(); -#endif #endif detect_extended_topology(c); -- cgit v1.2.3 From ec70cae8698632917f06110fdb70c6364281ecc6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:39 -0700 Subject: x86: centaur_64.c remove duplicated setting of CONSTANT_TSC Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/centaur_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 0e5cf17a3c8..a1625f5a1e7 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -20,7 +20,6 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) if (c->x86 == 0x6 && c->x86_model >= 0xf) { c->x86_cache_alignment = c->x86_clflush_size * 2; - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_REP_GOOD); } set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); -- cgit v1.2.3 From b2994ef0de252d41f1511e5f87704bedda12dabc Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 9 Sep 2008 17:18:50 -0700 Subject: x86: signal_64.c: clean up signal_fault() clean up and make signal_fault() same as 32bit. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 8fbdd23d5cc..552a331313f 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -473,12 +473,14 @@ void do_notify_resume(struct pt_regs *regs, void *unused, void signal_fault(struct pt_regs *regs, void __user *frame, char *where) { struct task_struct *me = current; + if (show_unhandled_signals && printk_ratelimit()) { - printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", - me->comm, me->pid, where, frame, regs->ip, - regs->sp, regs->orig_ax); + printk(KERN_INFO + "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", + me->comm, me->pid, where, frame, + regs->ip, regs->sp, regs->orig_ax); print_vma_addr(" in ", regs->ip); - printk("\n"); + printk(KERN_CONT "\n"); } force_sig(SIGSEGV, me); -- cgit v1.2.3 From 0c40ed71736c20f447843b3c96b715bb9c4904d7 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 9 Sep 2008 17:21:17 -0700 Subject: x86: signal_64.c: arg for restore_i387_xstate() is void __user * restore_i387_xstate() is declared as: int restore_i387_xstate(void __user *buf); so, make the variable buf void __user *. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 552a331313f..321da93f2fb 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -94,7 +94,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, } { - struct _fpstate __user *buf; + void __user *buf; + err |= __get_user(buf, &sc->fpstate); err |= restore_i387_xstate(buf); } -- cgit v1.2.3 From 764e8d128f9343027cf09afe8a145e8ff186e129 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 9 Sep 2008 17:22:12 -0700 Subject: x86: signal_64.c: make handle_signal() similar Make handle_signal() same as 32bit. No change in functionality intended. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 57 +++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 321da93f2fb..43e6862fc7a 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -338,39 +338,40 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, ret = setup_rt_frame(sig, ka, info, oldset, regs); - if (ret == 0) { - /* - * This has nothing to do with segment registers, - * despite the name. This magic affects uaccess.h - * macros' behavior. Reset it to the normal setting. - */ - set_fs(USER_DS); + if (ret) + return ret; - /* - * Clear the direction flag as per the ABI for function entry. - */ - regs->flags &= ~X86_EFLAGS_DF; + /* + * This has nothing to do with segment registers, + * despite the name. This magic affects uaccess.h + * macros' behavior. Reset it to the normal setting. + */ + set_fs(USER_DS); - /* - * Clear TF when entering the signal handler, but - * notify any tracer that was single-stepping it. - * The tracer may want to single-step inside the - * handler too. - */ - regs->flags &= ~X86_EFLAGS_TF; + /* + * Clear the direction flag as per the ABI for function entry. + */ + regs->flags &= ~X86_EFLAGS_DF; - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked, sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + /* + * Clear TF when entering the signal handler, but + * notify any tracer that was single-stepping it. + * The tracer may want to single-step inside the + * handler too. + */ + regs->flags &= ~X86_EFLAGS_TF; - tracehook_signal_handler(sig, info, ka, regs, - test_thread_flag(TIF_SINGLESTEP)); - } + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked, sig); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); - return ret; + tracehook_signal_handler(sig, info, ka, regs, + test_thread_flag(TIF_SINGLESTEP)); + + return 0; } #define NR_restart_syscall \ -- cgit v1.2.3 From ac4ff656c07ada78316307b0c0ce8a8eb48aa6dd Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 01:06:47 +0900 Subject: x86: convert gart to use is_buffer_dma_capable helper function Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 0b99d4a06f7..1b0c412566e 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -214,24 +214,14 @@ static void iommu_full(struct device *dev, size_t size, int dir) static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) { - u64 mask = *dev->dma_mask; - int high = addr + size > mask; - int mmu = high; - - if (force_iommu) - mmu = 1; - - return mmu; + return force_iommu || + !is_buffer_dma_capable(*dev->dma_mask, addr, size); } static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) { - u64 mask = *dev->dma_mask; - int high = addr + size > mask; - int mmu = high; - - return mmu; + return !is_buffer_dma_capable(*dev->dma_mask, addr, size); } /* Map a single continuous physical area into the IOMMU. -- cgit v1.2.3 From 49fbf4e9f982c704dc365698c5b5efa780aadcb5 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 01:06:48 +0900 Subject: x86: convert pci-nommu to use is_buffer_dma_capable helper function Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 8e398b56f50..1c1c98a31d5 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -14,7 +14,7 @@ static int check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) { - if (hwdev && bus + size > *hwdev->dma_mask) { + if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { if (*hwdev->dma_mask >= DMA_32BIT_MASK) printk(KERN_ERR "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", @@ -79,6 +79,7 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, unsigned long dma_mask; int node; struct page *page; + dma_addr_t addr; dma_mask = dma_alloc_coherent_mask(hwdev, gfp); @@ -90,14 +91,15 @@ again: if (!page) return NULL; - if ((page_to_phys(page) + size > dma_mask) && !(gfp & GFP_DMA)) { + addr = page_to_phys(page); + if (!is_buffer_dma_capable(dma_mask, addr, size) && !(gfp & GFP_DMA)) { free_pages((unsigned long)page_address(page), get_order(size)); gfp |= GFP_DMA; goto again; } - *dma_addr = page_to_phys(page); - if (check_addr("alloc_coherent", hwdev, *dma_addr, size)) { + if (check_addr("alloc_coherent", hwdev, addr, size)) { + *dma_addr = addr; flush_write_buffers(); return page_address(page); } -- cgit v1.2.3 From e38e05a85828dac23540cd007df5f20985388afc Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 10 Sep 2008 18:53:34 +0800 Subject: x86: extended "flags" to show virtualization HW feature in /proc/cpuinfo The hardware virtualization technology evolves very fast. But currently it's hard to tell if your CPU support a certain kind of HW technology without digging into the source code. The patch add a new catagory in "flags" under /proc/cpuinfo. Now "flags" can indicate the (important) HW virtulization features the CPU supported as well. Current implementation just cover Intel VMX side. Signed-off-by: Sheng Yang Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5f76bf139fd..99468dbd08d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -196,6 +196,44 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) return 1; } +static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c) +{ + /* Intel VMX MSR indicated features */ +#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 +#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 +#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 +#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 +#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 +#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 + + u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; + + clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + clear_cpu_cap(c, X86_FEATURE_VNMI); + clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + clear_cpu_cap(c, X86_FEATURE_EPT); + clear_cpu_cap(c, X86_FEATURE_VPID); + + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + msr_ctl = vmx_msr_high | vmx_msr_low; + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) + set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) + set_cpu_cap(c, X86_FEATURE_VNMI); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, + vmx_msr_low, vmx_msr_high); + msr_ctl2 = vmx_msr_high | vmx_msr_low; + if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && + (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) + set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) + set_cpu_cap(c, X86_FEATURE_EPT); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) + set_cpu_cap(c, X86_FEATURE_VPID); + } +} + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; @@ -289,6 +327,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) /* Work around errata */ srat_detect_node(); + + if (cpu_has(c, X86_FEATURE_VMX)) + detect_vmx_virtcap(c); } #ifdef CONFIG_X86_32 -- cgit v1.2.3 From f461a1d80c865e5ec4d24107adbab8b010b60e32 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 10 Sep 2008 13:57:43 +0200 Subject: arch/x86/kernel/kdebugfs.c: introduce missing kfree Error handling code following a kmalloc should free the allocated data. Note that at the point of the change, node has not yet been stored in d, so it is not affected by the existing cleanup code. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,l; position p1,p2; expression *ptr != NULL; @@ ( if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S | x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S ) <... when != x when != if (...) { <+...x...+> } x->f = E ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: Ingo Molnar --- arch/x86/kernel/kdebugfs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index f2d43bc7551..ff7d3b0124f 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -139,6 +139,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) if (PageHighMem(pg)) { data = ioremap_cache(pa_data, sizeof(*data)); if (!data) { + kfree(node); error = -ENXIO; goto err_dir; } -- cgit v1.2.3 From 0ad5bce7409d681a5655c66e64bb0eb740b89c1f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 11 Sep 2008 16:42:00 -0700 Subject: x86: fix possible x86_64 and EFI regression Russ Anderson reported a boot crash with EFI and latest mainline: BIOS-e820: 00000000fffa0000 - 00000000fffac000 (reserved) Pid: 0, comm: swapper Not tainted 2.6.27-rc5-00100-gec0c15a-dirty #5 Call Trace: [] early_idt_handler+0x55/0x69 [] __memcpy+0x12/0xa4 [] efi_init+0xce/0x932 [] setup_early_serial8250_console+0x2d/0x36a [] __insert_resource+0x18/0xc8 [] setup_arch+0x3a7/0x632 [] start_kernel+0x91/0x367 [] x86_64_start_kernel+0xe3/0xe7 [] x86_64_start_kernel+0x0/0xe7 RIP 0x10 Such a crash is possible if the CPU in this system is a 64-bit processor which doesn't support NX (ie, old Intel P4 -based64-bit processors). Certainly, if we support such processors, then we should start with _PAGE_NX initially clear in __supported_pte_flags, and then set it once we've established that the processor does indeed support NX. That will prevent early_ioremap - or anything else - from trying to set it. The simple fix is to simply call check_efer() earlier. Reported-by: Russ Anderson Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 362d4e7f2d3..9838f2539df 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -670,6 +670,10 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); +#ifdef CONFIG_X86_64 + check_efer(); +#endif + #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) /* * Must be before kernel pagetables are setup @@ -738,7 +742,6 @@ void __init setup_arch(char **cmdline_p) #else num_physpages = max_pfn; - check_efer(); /* How many end-of-memory variables you have, grandma! */ /* need this before calling reserve_initrd */ -- cgit v1.2.3 From a0a29b62a9cac6b7d83b7514679f2ed8d33d4372 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Thu, 11 Sep 2008 23:27:52 +0200 Subject: x86, microcode rework, v2 this is a rework of the microcode splitup in tip/x86/microcode (1) I think this new interface is cleaner (look at the changes in 'struct microcode_ops' in microcode.h); (2) it's -64 lines of code; Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 84 ++++------ arch/x86/kernel/microcode_amd.c | 329 ++++++++++++++++++-------------------- arch/x86/kernel/microcode_intel.c | 220 +++++++++++-------------- 3 files changed, 285 insertions(+), 348 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index b2f84ce5eed..902dada2eb6 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -110,50 +110,28 @@ struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; EXPORT_SYMBOL_GPL(ucode_cpu_info); #ifdef CONFIG_MICROCODE_OLD_INTERFACE -void __user *user_buffer; /* user area microcode data buffer */ -EXPORT_SYMBOL_GPL(user_buffer); -unsigned int user_buffer_size; /* it's size */ -EXPORT_SYMBOL_GPL(user_buffer_size); - -static int do_microcode_update(void) +static int do_microcode_update(const void __user *buf, size_t size) { - long cursor = 0; + cpumask_t old; int error = 0; - void *new_mc = NULL; int cpu; - cpumask_t old; old = current->cpus_allowed; - while ((cursor = microcode_ops->get_next_ucode(&new_mc, cursor)) > 0) { - if (microcode_ops->microcode_sanity_check != NULL) - error = microcode_ops->microcode_sanity_check(new_mc); - if (error) + for_each_online_cpu(cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!uci->valid) + continue; + + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + error = microcode_ops->request_microcode_user(cpu, buf, size); + if (error < 0) goto out; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->valid) - continue; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - error = microcode_ops->get_matching_microcode(new_mc, - cpu); - if (error < 0) - goto out; - if (error == 1) - microcode_ops->apply_microcode(cpu); - } - vfree(new_mc); + if (!error) + microcode_ops->apply_microcode(cpu); } out: - if (cursor > 0) - vfree(new_mc); - if (cursor < 0) - error = cursor; set_cpus_allowed_ptr(current, &old); return error; } @@ -178,10 +156,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, get_online_cpus(); mutex_lock(µcode_mutex); - user_buffer = (void __user *) buf; - user_buffer_size = (int) len; - - ret = do_microcode_update(); + ret = do_microcode_update(buf, len); if (!ret) ret = (ssize_t)len; @@ -231,7 +206,6 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); /* fake device for request_firmware */ struct platform_device *microcode_pdev; -EXPORT_SYMBOL_GPL(microcode_pdev); static ssize_t reload_store(struct sys_device *dev, struct sysdev_attribute *attr, @@ -252,8 +226,12 @@ static ssize_t reload_store(struct sys_device *dev, if (cpu_online(cpu)) { set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); - if (uci->valid) - err = microcode_ops->cpu_request_microcode(cpu); + if (uci->valid) { + err = microcode_ops->request_microcode_fw(cpu, + µcode_pdev->dev); + if (!err) + microcode_ops->apply_microcode(cpu); + } mutex_unlock(µcode_mutex); set_cpus_allowed_ptr(current, &old); } @@ -315,7 +293,7 @@ static void collect_cpu_info(int cpu) uci->valid = 1; } -static void microcode_resume_cpu(int cpu) +static int microcode_resume_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct cpu_signature nsig; @@ -323,7 +301,7 @@ static void microcode_resume_cpu(int cpu) pr_debug("microcode: CPU%d resumed\n", cpu); if (!uci->mc.valid_mc) - return; + return 1; /* * Let's verify that the 'cached' ucode does belong @@ -331,21 +309,22 @@ static void microcode_resume_cpu(int cpu) */ if (microcode_ops->collect_cpu_info(cpu, &nsig)) { microcode_fini_cpu(cpu); - return; + return -1; } if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { microcode_fini_cpu(cpu); /* Should we look for a new ucode here? */ - return; + return 1; } - microcode_ops->apply_microcode(cpu); + return 0; } void microcode_update_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + int err = 0; /* We should bind the task to the CPU */ BUG_ON(raw_smp_processor_id() != cpu); @@ -356,12 +335,17 @@ void microcode_update_cpu(int cpu) * otherwise just request a firmware: */ if (uci->valid) { - microcode_resume_cpu(cpu); + err = microcode_resume_cpu(cpu); } else { collect_cpu_info(cpu); if (uci->valid && system_state == SYSTEM_RUNNING) - microcode_ops->cpu_request_microcode(cpu); + err = microcode_ops->request_microcode_fw(cpu, + µcode_pdev->dev); } + + if (!err) + microcode_ops->apply_microcode(cpu); + mutex_unlock(µcode_mutex); } @@ -414,7 +398,7 @@ static int mc_sysdev_resume(struct sys_device *dev) return 0; pr_debug("microcode: CPU%d resumed\n", cpu); /* only CPU 0 will apply ucode here */ - microcode_ops->apply_microcode(0); + microcode_update_cpu(0); return 0; } diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index d606a05545c..6815837a775 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -59,7 +59,7 @@ MODULE_LICENSE("GPL v2"); /* serialize access to the physical write */ static DEFINE_SPINLOCK(microcode_update_lock); -struct equiv_cpu_entry *equiv_cpu_table; +static struct equiv_cpu_entry *equiv_cpu_table; static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { @@ -83,36 +83,37 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) return 0; } -static int get_matching_microcode_amd(void *mc, int cpu) +static int get_matching_microcode(int cpu, void *mc, int rev) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct microcode_header_amd *mc_header = mc; - unsigned long total_size = get_totalsize(mc_header); - void *new_mc; struct pci_dev *nb_pci_dev, *sb_pci_dev; unsigned int current_cpu_id; unsigned int equiv_cpu_id = 0x00; unsigned int i = 0; - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); - - /* This is a tricky part. We might be called from a write operation */ - /* to the device file instead of the usual process of firmware */ - /* loading. This routine needs to be able to distinguish both */ -/* cases. This is done by checking if there alread is a equivalent */ - /* CPU table installed. If not, we're written through */ - /* /dev/cpu/microcode. */ -/* Since we ignore all checks. The error case in which going through */ -/* firmware loading and that table is not loaded has already been */ - /* checked earlier. */ + /* + * dimm: do we need this? Why an update via /dev/... is different + * from the one via firmware? + * + * This is a tricky part. We might be called from a write operation + * to the device file instead of the usual process of firmware + * loading. This routine needs to be able to distinguish both + * cases. This is done by checking if there alread is a equivalent + * CPU table installed. If not, we're written through + * /dev/cpu/microcode. + * Since we ignore all checks. The error case in which going through + * firmware loading and that table is not loaded has already been + * checked earlier. + */ + BUG_ON(equiv_cpu_table == NULL); +#if 0 if (equiv_cpu_table == NULL) { printk(KERN_INFO "microcode: CPU%d microcode update with " "version 0x%x (current=0x%x)\n", cpu, mc_header->patch_id, uci->cpu_sig.rev); goto out; } - +#endif current_cpu_id = cpuid_eax(0x00000001); while (equiv_cpu_table[i].installed_cpu != 0) { @@ -175,27 +176,9 @@ static int get_matching_microcode_amd(void *mc, int cpu) pci_dev_put(sb_pci_dev); } - if (mc_header->patch_id <= uci->cpu_sig.rev) + if (mc_header->patch_id <= rev) return 0; - printk(KERN_INFO "microcode: CPU%d found a matching microcode " - "update with version 0x%x (current=0x%x)\n", - cpu, mc_header->patch_id, uci->cpu_sig.rev); - -out: - new_mc = vmalloc(UCODE_MAX_SIZE); - if (!new_mc) { - printk(KERN_ERR "microcode: error, can't allocate memory\n"); - return -ENOMEM; - } - memset(new_mc, 0, UCODE_MAX_SIZE); - - /* free previous update file */ - vfree(uci->mc.mc_amd); - - memcpy(new_mc, mc, total_size); - - uci->mc.mc_amd = new_mc; return 1; } @@ -245,104 +228,65 @@ static void apply_microcode_amd(int cpu) uci->cpu_sig.rev = rev; } -#ifdef CONFIG_MICROCODE_OLD_INTERFACE -extern void __user *user_buffer; /* user area microcode data buffer */ -extern unsigned int user_buffer_size; /* it's size */ - -static long get_next_ucode_amd(void **mc, long offset) -{ - struct microcode_header_amd mc_header; - unsigned long total_size; - - /* No more data */ - if (offset >= user_buffer_size) - return 0; - if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - return -EFAULT; - } - total_size = get_totalsize(&mc_header); - if (offset + total_size > user_buffer_size) { - printk(KERN_ERR "microcode: error! Bad total size in microcode " - "data file\n"); - return -EINVAL; - } - *mc = vmalloc(UCODE_MAX_SIZE); - if (!*mc) - return -ENOMEM; - memset(*mc, 0, UCODE_MAX_SIZE); - - if (copy_from_user(*mc, user_buffer + offset, total_size)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - vfree(*mc); - return -EFAULT; - } - return offset + total_size; -} -#else -#define get_next_ucode_amd() NULL -#endif - -static long get_next_ucode_from_buffer_amd(void **mc, void *buf, - unsigned long size, long offset) +static void * get_next_ucode(u8 *buf, unsigned int size, + int (*get_ucode_data)(void *, const void *, size_t), + unsigned int *mc_size) { - struct microcode_header_amd *mc_header; - unsigned long total_size; - unsigned char *buf_pos = buf; + unsigned int total_size; +#define UCODE_UNKNOWN_HDR 8 + u8 hdr[UCODE_UNKNOWN_HDR]; + void *mc; - /* No more data */ - if (offset >= size) - return 0; + if (get_ucode_data(hdr, buf, UCODE_UNKNOWN_HDR)) + return NULL; - if (buf_pos[offset] != UCODE_UCODE_TYPE) { + if (hdr[0] != UCODE_UCODE_TYPE) { printk(KERN_ERR "microcode: error! " "Wrong microcode payload type field\n"); - return -EINVAL; + return NULL; } - mc_header = (struct microcode_header_amd *)(&buf_pos[offset+8]); + /* Why not by means of get_totalsize(hdr)? */ + total_size = (unsigned long) (hdr[4] + (hdr[5] << 8)); - total_size = (unsigned long) (buf_pos[offset+4] + - (buf_pos[offset+5] << 8)); + printk(KERN_INFO "microcode: size %u, total_size %u\n", + size, total_size); - printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n", - size, total_size, offset); - - if (offset + total_size > size) { + if (total_size > size || total_size > UCODE_MAX_SIZE) { printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); - return -EINVAL; + return NULL; } - *mc = vmalloc(UCODE_MAX_SIZE); - if (!*mc) { - printk(KERN_ERR "microcode: error! " - "Can not allocate memory for microcode patch\n"); - return -ENOMEM; + mc = vmalloc(UCODE_MAX_SIZE); + if (mc) { + memset(mc, 0, UCODE_MAX_SIZE); + if (get_ucode_data(mc, buf + UCODE_UNKNOWN_HDR, total_size)) { + vfree(mc); + mc = NULL; + } else + *mc_size = total_size + UCODE_UNKNOWN_HDR; } - - memset(*mc, 0, UCODE_MAX_SIZE); - memcpy(*mc, buf + offset + 8, total_size); - - return offset + total_size + 8; +#undef UCODE_UNKNOWN_HDR + return mc; } -static long install_equiv_cpu_table(void *buf, unsigned long size, long offset) + +static int install_equiv_cpu_table(u8 *buf, + int (*get_ucode_data)(void *, const void *, size_t)) { - unsigned int *buf_pos = buf; +#define UCODE_HEADER_SIZE 12 + u8 *hdr[UCODE_HEADER_SIZE]; + unsigned int *buf_pos = (unsigned int *)hdr; + unsigned long size; - /* No more data */ - if (offset >= size) + if (get_ucode_data(&hdr, buf, UCODE_HEADER_SIZE)) return 0; - if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE) { - printk(KERN_ERR "microcode: error! " - "Wrong microcode equivalnet cpu table type field\n"); - return 0; - } + size = buf_pos[2]; - if (size == 0) { + if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { printk(KERN_ERR "microcode: error! " - "Wrong microcode equivalnet cpu table length\n"); + "Wrong microcode equivalnet cpu table\n"); return 0; } @@ -352,79 +296,118 @@ static long install_equiv_cpu_table(void *buf, unsigned long size, long offset) return 0; } - memset(equiv_cpu_table, 0, size); - memcpy(equiv_cpu_table, &buf_pos[3], size); + buf += UCODE_HEADER_SIZE; + if (get_ucode_data(equiv_cpu_table, buf, size)) { + vfree(equiv_cpu_table); + return 0; + } - return size + 12; /* add header length */ + return size + UCODE_HEADER_SIZE; /* add header length */ +#undef UCODE_HEADER_SIZE } -/* fake device for request_firmware */ -extern struct platform_device *microcode_pdev; - -static int cpu_request_microcode_amd(int cpu) +static void free_equiv_cpu_table(void) { - char name[30]; - const struct firmware *firmware; - void *buf; - unsigned int *buf_pos; - unsigned long size; - long offset = 0; - int error; - void *mc; - - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); - - sprintf(name, "amd-ucode/microcode_amd.bin"); - error = request_firmware(&firmware, "amd-ucode/microcode_amd.bin", - µcode_pdev->dev); - if (error) { - printk(KERN_ERR "microcode: ucode data file %s load failed\n", - name); - return error; - } - - buf_pos = (unsigned int *)firmware->data; - buf = (void *)firmware->data; - size = firmware->size; - - if (buf_pos[0] != UCODE_MAGIC) { - printk(KERN_ERR "microcode: error! Wrong microcode patch file magic\n"); - return -EINVAL; + if (equiv_cpu_table) { + vfree(equiv_cpu_table); + equiv_cpu_table = NULL; } +} - offset = install_equiv_cpu_table(buf, buf_pos[2], offset); +static int generic_load_microcode(int cpu, void *data, size_t size, + int (*get_ucode_data)(void *, const void *, size_t)) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + u8 *ucode_ptr = data, *new_mc = NULL, *mc; + int new_rev = uci->cpu_sig.rev; + unsigned int leftover; + unsigned long offset; + offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data); if (!offset) { printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); return -EINVAL; } - while ((offset = - get_next_ucode_from_buffer_amd(&mc, buf, size, offset)) > 0) { - error = get_matching_microcode_amd(mc, cpu); - if (error < 0) + ucode_ptr += offset; + leftover = size - offset; + + while (leftover) { + unsigned int mc_size; + struct microcode_header_amd *mc_header; + + mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size); + if (!mc) break; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - if (error == 1) { - apply_microcode_amd(cpu); - error = 0; - } - vfree(mc); + + mc_header = (struct microcode_header_amd *)mc; + if (get_matching_microcode(cpu, mc, new_rev)) { + new_rev = mc_header->patch_id; + new_mc = mc; + } else + vfree(mc); + + ucode_ptr += mc_size; + leftover -= mc_size; } - if (offset > 0) { - vfree(mc); - vfree(equiv_cpu_table); - equiv_cpu_table = NULL; + + if (new_mc) { + if (!leftover) { + if (uci->mc.mc_amd) + vfree(uci->mc.mc_amd); + uci->mc.mc_amd = (struct microcode_amd *)new_mc; + pr_debug("microcode: CPU%d found a matching microcode update with" + " version 0x%x (current=0x%x)\n", + cpu, uci->mc.mc_amd->hdr.patch_id, uci->cpu_sig.rev); + } else + vfree(new_mc); } - if (offset < 0) - error = offset; + + free_equiv_cpu_table(); + + return (int)leftover; +} + +static int get_ucode_fw(void *to, const void *from, size_t n) +{ + memcpy(to, from, n); + return 0; +} + +static int request_microcode_fw(int cpu, struct device *device) +{ + const char *fw_name = "amd-ucode/microcode_amd.bin"; + const struct firmware *firmware; + int ret; + + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + + ret = request_firmware(&firmware, fw_name, device); + if (ret) { + printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name); + return ret; + } + + ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, + &get_ucode_fw); + release_firmware(firmware); - return error; + return ret; +} + +static int get_ucode_user(void *to, const void *from, size_t n) +{ + return copy_from_user(to, from, n); +} + +static int request_microcode_user(int cpu, const void __user *buf, size_t size) +{ + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + + return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); } static void microcode_fini_cpu_amd(int cpu) @@ -436,10 +419,8 @@ static void microcode_fini_cpu_amd(int cpu) } static struct microcode_ops microcode_amd_ops = { - .get_next_ucode = get_next_ucode_amd, - .get_matching_microcode = get_matching_microcode_amd, - .microcode_sanity_check = NULL, - .cpu_request_microcode = cpu_request_microcode_amd, + .request_microcode_user = request_microcode_user, + .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info_amd, .apply_microcode = apply_microcode_amd, .microcode_fini_cpu = microcode_fini_cpu_amd, diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index c9b53202ba3..f4930b55c6a 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -155,15 +155,15 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) return 0; } -static inline int microcode_update_match(int cpu_num, - struct microcode_header_intel *mc_header, int sig, int pf) +static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; +} - if (!sigmatch(sig, uci->cpu_sig.sig, pf, uci->cpu_sig.pf) - || mc_header->rev <= uci->cpu_sig.rev) - return 0; - return 1; +static inline int +update_match_revision(struct microcode_header_intel *mc_header, int rev) +{ + return (mc_header->rev <= rev) ? 0 : 1; } static int microcode_sanity_check(void *mc) @@ -248,51 +248,36 @@ static int microcode_sanity_check(void *mc) /* * return 0 - no update found * return 1 - found update - * return < 0 - error */ -static int get_matching_microcode(void *mc, int cpu) +static int +get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header; unsigned long total_size = get_totalsize(mc_header); int ext_sigcount, i; struct extended_signature *ext_sig; - void *new_mc; - if (microcode_update_match(cpu, mc_header, - mc_header->sig, mc_header->pf)) - goto find; + if (!update_match_revision(mc_header, rev)) + return 0; + + if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf)) + return 1; + /* Look for ext. headers: */ if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) return 0; ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; ext_sigcount = ext_header->count; ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + for (i = 0; i < ext_sigcount; i++) { - if (microcode_update_match(cpu, mc_header, - ext_sig->sig, ext_sig->pf)) - goto find; + if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf)) + return 1; ext_sig++; } return 0; -find: - pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", - cpu, mc_header->rev, uci->cpu_sig.rev); - new_mc = vmalloc(total_size); - if (!new_mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; - } - - /* free previous update file */ - vfree(uci->mc.mc_intel); - - memcpy(new_mc, mc, total_size); - uci->mc.mc_intel = new_mc; - return 1; } static void apply_microcode(int cpu) @@ -300,7 +285,7 @@ static void apply_microcode(int cpu) unsigned long flags; unsigned int val[2]; int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; /* We should bind the task to the CPU */ BUG_ON(cpu_num != cpu); @@ -338,116 +323,105 @@ static void apply_microcode(int cpu) uci->cpu_sig.rev = val[1]; } -#ifdef CONFIG_MICROCODE_OLD_INTERFACE -extern void __user *user_buffer; /* user area microcode data buffer */ -extern unsigned int user_buffer_size; /* it's size */ - -static long get_next_ucode(void **mc, long offset) +static int generic_load_microcode(int cpu, void *data, size_t size, + int (*get_ucode_data)(void *, const void *, size_t)) { - struct microcode_header_intel mc_header; - unsigned long total_size; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + u8 *ucode_ptr = data, *new_mc = NULL, *mc; + int new_rev = uci->cpu_sig.rev; + unsigned int leftover = size; - /* No more data */ - if (offset >= user_buffer_size) - return 0; - if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - return -EFAULT; - } - total_size = get_totalsize(&mc_header); - if (offset + total_size > user_buffer_size) { - printk(KERN_ERR "microcode: error! Bad total size in microcode " - "data file\n"); - return -EINVAL; - } - *mc = vmalloc(total_size); - if (!*mc) - return -ENOMEM; - if (copy_from_user(*mc, user_buffer + offset, total_size)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - vfree(*mc); - return -EFAULT; - } - return offset + total_size; -} -#endif + while (leftover) { + struct microcode_header_intel mc_header; + unsigned int mc_size; -static long get_next_ucode_from_buffer(void **mc, const u8 *buf, - unsigned long size, long offset) -{ - struct microcode_header_intel *mc_header; - unsigned long total_size; + if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header))) + break; - /* No more data */ - if (offset >= size) - return 0; - mc_header = (struct microcode_header_intel *)(buf + offset); - total_size = get_totalsize(mc_header); + mc_size = get_totalsize(&mc_header); + if (!mc_size || mc_size > leftover) { + printk(KERN_ERR "microcode: error!" + "Bad data in microcode data file\n"); + break; + } - if (offset + total_size > size) { - printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); - return -EINVAL; + mc = vmalloc(mc_size); + if (!mc) + break; + + if (get_ucode_data(mc, ucode_ptr, mc_size) || + microcode_sanity_check(mc) < 0) { + vfree(mc); + break; + } + + if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { + new_rev = mc_header.rev; + new_mc = mc; + } else + vfree(mc); + + ucode_ptr += mc_size; + leftover -= mc_size; } - *mc = vmalloc(total_size); - if (!*mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; + if (new_mc) { + if (!leftover) { + if (uci->mc.mc_intel) + vfree(uci->mc.mc_intel); + uci->mc.mc_intel = (struct microcode_intel *)new_mc; + pr_debug("microcode: CPU%d found a matching microcode update with" + " version 0x%x (current=0x%x)\n", + cpu, uci->mc.mc_intel->hdr.rev, uci->cpu_sig.rev); + } else + vfree(new_mc); } - memcpy(*mc, buf + offset, total_size); - return offset + total_size; + + return (int)leftover; } -/* fake device for request_firmware */ -extern struct platform_device *microcode_pdev; +static int get_ucode_fw(void *to, const void *from, size_t n) +{ + memcpy(to, from, n); + return 0; +} -static int cpu_request_microcode(int cpu) +static int request_microcode_fw(int cpu, struct device *device) { char name[30]; struct cpuinfo_x86 *c = &cpu_data(cpu); const struct firmware *firmware; - const u8 *buf; - unsigned long size; - long offset = 0; - int error; - void *mc; + int ret; /* We should bind the task to the CPU */ BUG_ON(cpu != raw_smp_processor_id()); sprintf(name, "intel-ucode/%02x-%02x-%02x", c->x86, c->x86_model, c->x86_mask); - error = request_firmware(&firmware, name, µcode_pdev->dev); - if (error) { + ret = request_firmware(&firmware, name, device); + if (ret) { pr_debug("microcode: data file %s load failed\n", name); - return error; - } - buf = firmware->data; - size = firmware->size; - while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) - > 0) { - error = microcode_sanity_check(mc); - if (error) - break; - error = get_matching_microcode(mc, cpu); - if (error < 0) - break; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - if (error == 1) { - apply_microcode(cpu); - error = 0; - } - vfree(mc); + return ret; } - if (offset > 0) - vfree(mc); - if (offset < 0) - error = offset; + + ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, + &get_ucode_fw); + release_firmware(firmware); - return error; + return ret; +} + +static int get_ucode_user(void *to, const void *from, size_t n) +{ + return copy_from_user(to, from, n); +} + +static int request_microcode_user(int cpu, const void __user *buf, size_t size) +{ + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + + return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); } static void microcode_fini_cpu(int cpu) @@ -459,10 +433,8 @@ static void microcode_fini_cpu(int cpu) } static struct microcode_ops microcode_intel_ops = { - .get_next_ucode = get_next_ucode, - .get_matching_microcode = get_matching_microcode, - .microcode_sanity_check = microcode_sanity_check, - .cpu_request_microcode = cpu_request_microcode, + .request_microcode_user = request_microcode_user, + .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info, .apply_microcode = apply_microcode, .microcode_fini_cpu = microcode_fini_cpu, -- cgit v1.2.3 From aef93c8bd506a78983d91cd576a97fee6e934ac1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 14 Sep 2008 02:33:15 -0700 Subject: x86: identify_cpu_without_cpuid v2 Krzysztof found some old cyrix cpu where an mtrr-alike cpu feature was not detected properly. this one is based on Krzysztof' patch, and we call ->c_identify() in early_identify_cpu. need to call c_identify() for cpus without cpuid even earlier ... v2: Krzysztof point out need to give cyrix another chance about cpuid checking again, after ->c_identify() enables cpuid for it Signed-off-by: Yinghai Lu Cc: Krzysztof Helt Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 57 +++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9a57106c199..f5f25b85be9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -485,6 +485,33 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_power = cpuid_edx(0x80000007); } + +static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_32 + int i; + + /* + * First of all, decide if this is a 486 or higher + * It's a 486 if we can modify the AC flag + */ + if (flag_is_changeable_p(X86_EFLAGS_AC)) + c->x86 = 4; + else + c->x86 = 3; + + for (i = 0; i < X86_VENDOR_NUM; i++) + if (cpu_devs[i] && cpu_devs[i]->c_identify) { + c->x86_vendor_id[0] = 0; + cpu_devs[i]->c_identify(c); + if (c->x86_vendor_id[0]) { + get_cpu_vendor(c); + break; + } + } +#endif +} + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -503,13 +530,16 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) #endif c->x86_cache_alignment = c->x86_clflush_size; - if (!have_cpuid_p()) - return; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - c->extended_cpuid_level = 0; + if (!have_cpuid_p()) + identify_cpu_without_cpuid(c); + + /* cyrix could have cpuid enabled via c_identify()*/ + if (!have_cpuid()) + return; + cpu_detect(c); get_cpu_vendor(c); @@ -583,10 +613,14 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { + c->extended_cpuid_level = 0; + if (!have_cpuid_p()) - return; + identify_cpu_without_cpuid(c); - c->extended_cpuid_level = 0; + /* cyrix could have cpuid enabled via c_identify()*/ + if (!have_cpuid()) + return; cpu_detect(c); @@ -639,17 +673,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); - if (!have_cpuid_p()) { - /* - * First of all, decide if this is a 486 or higher - * It's a 486 if we can modify the AC flag - */ - if (flag_is_changeable_p(X86_EFLAGS_AC)) - c->x86 = 4; - else - c->x86 = 3; - } - generic_identify(c); if (this_cpu->c_identify) -- cgit v1.2.3 From afae865613c8292e8bdcce4f0b161988d761f033 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 14 Sep 2008 02:33:16 -0700 Subject: x86: move transmeta cap read to early_init_transmeta() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 -------- arch/x86/kernel/cpu/transmeta.c | 28 +++++++++++++++------------- 2 files changed, 15 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f5f25b85be9..3e2ce4d33d3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -465,14 +465,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } #ifdef CONFIG_X86_64 - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - /* Don't set x86_cpuid_level here for now to not confuse. */ - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - if (c->extended_cpuid_level >= 0x80000008) { u32 eax = cpuid_eax(0x80000008); diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 738e03244f9..52b3fefbd5a 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -5,6 +5,18 @@ #include #include "cpu.h" +static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c) +{ + u32 xlvl; + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ((xlvl & 0xffff0000) == 0x80860000) { + if (xlvl >= 0x80860001) + c->x86_capability[2] = cpuid_edx(0x80860001); + } +} + static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) { unsigned int cap_mask, uk, max, dummy; @@ -12,6 +24,8 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; char cpu_info[65]; + early_init_transmeta(c); + display_cacheinfo(c); /* Print CMS and CPU revision */ @@ -84,23 +98,11 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) #endif } -static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c) -{ - u32 xlvl; - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } -} - static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { .c_vendor = "Transmeta", .c_ident = { "GenuineTMx86", "TransmetaCPU" }, + .c_early_init = early_init_transmeta, .c_init = init_transmeta, - .c_identify = transmeta_identify, .c_x86_vendor = X86_VENDOR_TRANSMETA, }; -- cgit v1.2.3 From a9853dd6d285c30a3ddeb3cce8c05e1678400bef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Sep 2008 14:46:58 +0200 Subject: x86: cpuid, fix typo Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3e2ce4d33d3..cef3519b3bb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -529,7 +529,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid()) + if (!have_cpuid_p()) return; cpu_detect(c); @@ -611,7 +611,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid()) + if (!have_cpuid_p()) return; cpu_detect(c); -- cgit v1.2.3 From a1c75cc5018f17ff6d80ce45a13435b1536f76db Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Sep 2008 14:50:26 +0200 Subject: x86, microcode rework, v2, fix based on patch from Dmitry Adamushko. - add missing vfree() - update debug printks Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode.c | 4 +--- arch/x86/kernel/microcode_amd.c | 6 ++++-- arch/x86/kernel/microcode_intel.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 902dada2eb6..0c2634f4fd7 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -70,8 +70,6 @@ * Fix sigmatch() macro to handle old CPUs with pf == 0. * Thanks to Stuart Swales for pointing out this bug. */ - -/* #define DEBUG pr_debug */ #include #include #include @@ -396,7 +394,7 @@ static int mc_sysdev_resume(struct sys_device *dev) if (!cpu_online(cpu)) return 0; - pr_debug("microcode: CPU%d resumed\n", cpu); + /* only CPU 0 will apply ucode here */ microcode_update_cpu(0); return 0; diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 6815837a775..48aec9f48e4 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -92,7 +92,7 @@ static int get_matching_microcode(int cpu, void *mc, int rev) unsigned int i = 0; /* - * dimm: do we need this? Why an update via /dev/... is different + * FIXME! dimm: do we need this? Why an update via /dev/... is different * from the one via firmware? * * This is a tricky part. We might be called from a write operation @@ -246,7 +246,7 @@ static void * get_next_ucode(u8 *buf, unsigned int size, return NULL; } - /* Why not by means of get_totalsize(hdr)? */ + /* FIXME! dimm: Why not by means of get_totalsize(hdr)? */ total_size = (unsigned long) (hdr[4] + (hdr[5] << 8)); printk(KERN_INFO "microcode: size %u, total_size %u\n", @@ -342,6 +342,8 @@ static int generic_load_microcode(int cpu, void *data, size_t size, mc_header = (struct microcode_header_amd *)mc; if (get_matching_microcode(cpu, mc, new_rev)) { + if (new_mc) + vfree(new_mc); new_rev = mc_header->patch_id; new_mc = mc; } else diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index f4930b55c6a..48ed3cef58c 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -70,8 +70,6 @@ * Fix sigmatch() macro to handle old CPUs with pf == 0. * Thanks to Stuart Swales for pointing out this bug. */ - -/* #define DEBUG */ /* pr_debug */ #include #include #include @@ -356,6 +354,8 @@ static int generic_load_microcode(int cpu, void *data, size_t size, } if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { + if (new_mc) + vfree(new_mc); new_rev = mc_header.rev; new_mc = mc; } else -- cgit v1.2.3 From 3d0aedd9538e6be8afec1a9d8b084bf90bc91495 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 12 Sep 2008 17:01:09 -0700 Subject: x86: signal: put give_sigsegv of setup frames together When setup frame fails, force_sigsegv is called and returns -EFAULT. There is similar code in ia32_setup_frame(), ia32_setup_rt_frame(), __setup_frame() and __setup_rt_frame(). Make them identical. No change in functionality intended. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 31 ++++++++++++++----------------- arch/x86/kernel/signal_64.c | 17 +++++++++-------- 2 files changed, 23 insertions(+), 25 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index b668efc18ea..1c22e0067fe 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -349,21 +349,21 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; + return -EFAULT; err = __put_user(sig, &frame->sig); if (err) - goto give_sigsegv; + return -EFAULT; err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); if (err) - goto give_sigsegv; + return -EFAULT; if (_NSIG_WORDS > 1) { err = __copy_to_user(&frame->extramask, &set->sig[1], sizeof(frame->extramask)); if (err) - goto give_sigsegv; + return -EFAULT; } if (current->mm->context.vdso) @@ -388,7 +388,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); if (err) - goto give_sigsegv; + return -EFAULT; /* Set up registers for signal handler */ regs->sp = (unsigned long)frame; @@ -403,10 +403,6 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, regs->cs = __USER_CS; return 0; - -give_sigsegv: - force_sigsegv(sig, current); - return -EFAULT; } static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, @@ -420,14 +416,14 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; + return -EFAULT; err |= __put_user(sig, &frame->sig); err |= __put_user(&frame->info, &frame->pinfo); err |= __put_user(&frame->uc, &frame->puc); err |= copy_siginfo_to_user(&frame->info, info); if (err) - goto give_sigsegv; + return -EFAULT; /* Create the ucontext. */ if (cpu_has_xsave) @@ -443,7 +439,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) - goto give_sigsegv; + return -EFAULT; /* Set up to return from userspace. */ restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); @@ -463,7 +459,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); if (err) - goto give_sigsegv; + return -EFAULT; /* Set up registers for signal handler */ regs->sp = (unsigned long)frame; @@ -478,10 +474,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->cs = __USER_CS; return 0; - -give_sigsegv: - force_sigsegv(sig, current); - return -EFAULT; } /* @@ -506,6 +498,11 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, else ret = __setup_frame(usig, ka, set, regs); + if (ret) { + force_sigsegv(sig, current); + return -EFAULT; + } + return ret; } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 43e6862fc7a..3b79e179ba3 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -215,12 +215,12 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; + return -EFAULT; if (ka->sa.sa_flags & SA_SIGINFO) { err |= copy_siginfo_to_user(&frame->info, info); if (err) - goto give_sigsegv; + return -EFAULT; } /* Create the ucontext. */ @@ -248,11 +248,11 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); } else { /* could use a vstub here */ - goto give_sigsegv; + return -EFAULT; } if (err) - goto give_sigsegv; + return -EFAULT; /* Set up registers for signal handler */ regs->di = sig; @@ -272,10 +272,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->cs = __USER_CS; return 0; - -give_sigsegv: - force_sigsegv(sig, current); - return -EFAULT; } /* @@ -297,6 +293,11 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, #endif ret = __setup_rt_frame(sig, ka, info, set, regs); + if (ret) { + force_sigsegv(sig, current); + return -EFAULT; + } + return ret; } -- cgit v1.2.3 From 2ba48e16e78216bb5b9fd08a088bfefda478df25 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 12 Sep 2008 17:02:53 -0700 Subject: x86: signal: remove unneeded err handling This patch eliminates unused or unneeded variable handling. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 11 ++++------- arch/x86/kernel/signal_64.c | 5 ++--- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 1c22e0067fe..d433861a659 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -351,18 +351,15 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; - err = __put_user(sig, &frame->sig); - if (err) + if (__put_user(sig, &frame->sig)) return -EFAULT; - err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); - if (err) + if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0])) return -EFAULT; if (_NSIG_WORDS > 1) { - err = __copy_to_user(&frame->extramask, &set->sig[1], - sizeof(frame->extramask)); - if (err) + if (__copy_to_user(&frame->extramask, &set->sig[1], + sizeof(frame->extramask))) return -EFAULT; } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 3b79e179ba3..a21c8519729 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -210,7 +210,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; if (save_i387_xstate(fp) < 0) - err |= -1; + return -EFAULT; } else frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; @@ -218,8 +218,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return -EFAULT; if (ka->sa.sa_flags & SA_SIGINFO) { - err |= copy_siginfo_to_user(&frame->info, info); - if (err) + if (copy_siginfo_to_user(&frame->info, info)) return -EFAULT; } -- cgit v1.2.3 From e6babb6b7fed93c93f8fc5ef8ebd3a474fc2df3e Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 12 Sep 2008 17:03:31 -0700 Subject: x86: signal: introduce do_rt_sigreturn() introduce do_rt_sigreturn(), to collect common part of sys_rt_sigreturn(). No change in functionality intended. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 12 +++++++++--- arch/x86/kernel/signal_64.c | 11 ++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index d433861a659..da3cf3270f8 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -215,9 +215,8 @@ badframe: return 0; } -asmlinkage int sys_rt_sigreturn(unsigned long __unused) +static long do_rt_sigreturn(struct pt_regs *regs) { - struct pt_regs *regs = (struct pt_regs *)&__unused; struct rt_sigframe __user *frame; unsigned long ax; sigset_t set; @@ -243,10 +242,17 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused) return ax; badframe: - signal_fault(regs, frame, "rt sigreturn"); + signal_fault(regs, frame, "rt_sigreturn"); return 0; } +asmlinkage int sys_rt_sigreturn(unsigned long __unused) +{ + struct pt_regs *regs = (struct pt_regs *)&__unused; + + return do_rt_sigreturn(regs); +} + /* * Set up a signal frame. */ diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index a21c8519729..bf77d4789a2 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -104,11 +104,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, return err; } -asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +static long do_rt_sigreturn(struct pt_regs *regs) { struct rt_sigframe __user *frame; - sigset_t set; unsigned long ax; + sigset_t set; frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) @@ -131,10 +131,15 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) return ax; badframe: - signal_fault(regs, frame, "sigreturn"); + signal_fault(regs, frame, "rt_sigreturn"); return 0; } +asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +{ + return do_rt_sigreturn(regs); +} + /* * Set up a signal frame. */ -- cgit v1.2.3 From bee44f294efd8417f5e68553778a6cc957af1547 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 12 Sep 2008 19:42:35 +0900 Subject: x86: make GART to respect device's dma_mask about virtual mappings Currently, GART IOMMU ingores device's dma_mask when it does virtual mappings. So it could give a device a virtual address that the device can't access to. This patch fixes the above problem. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 1b0c412566e..9972c42ac92 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -83,23 +83,34 @@ static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ static unsigned long alloc_iommu(struct device *dev, int size, - unsigned long align_mask) + unsigned long align_mask, u64 dma_mask) { unsigned long offset, flags; unsigned long boundary_size; unsigned long base_index; + unsigned long limit; base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; + limit = iommu_device_max_index(iommu_pages, + DIV_ROUND_UP(iommu_bus_base, PAGE_SIZE), + dma_mask >> PAGE_SHIFT); + spin_lock_irqsave(&iommu_bitmap_lock, flags); - offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, + + if (limit <= next_bit) { + need_flush = 1; + next_bit = 0; + } + + offset = iommu_area_alloc(iommu_gart_bitmap, limit, next_bit, size, base_index, boundary_size, align_mask); - if (offset == -1) { + if (offset == -1 && next_bit) { need_flush = 1; - offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, + offset = iommu_area_alloc(iommu_gart_bitmap, limit, 0, size, base_index, boundary_size, align_mask); } @@ -228,12 +239,14 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) * Caller needs to check if the iommu is needed and flush. */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir, unsigned long align_mask) + size_t size, int dir, unsigned long align_mask, + u64 dma_mask) { unsigned long npages = iommu_num_pages(phys_mem, size); - unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); + unsigned long iommu_page; int i; + iommu_page = alloc_iommu(dev, npages, align_mask, dma_mask); if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) return phys_mem; @@ -263,7 +276,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) if (!need_iommu(dev, paddr, size)) return paddr; - bus = dma_map_area(dev, paddr, size, dir, 0); + bus = dma_map_area(dev, paddr, size, dir, 0, dma_get_mask(dev)); flush_gart(); return bus; @@ -314,6 +327,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, { struct scatterlist *s; int i; + u64 dma_mask = dma_get_mask(dev); #ifdef CONFIG_IOMMU_DEBUG printk(KERN_DEBUG "dma_map_sg overflow\n"); @@ -323,7 +337,8 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir, 0); + addr = dma_map_area(dev, addr, s->length, dir, 0, + dma_mask); if (addr == bad_dma_address) { if (i > 0) gart_unmap_sg(dev, sg, i, dir); @@ -345,14 +360,16 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { - unsigned long iommu_start = alloc_iommu(dev, pages, 0); - unsigned long iommu_page = iommu_start; + unsigned long iommu_start; + unsigned long iommu_page; struct scatterlist *s; int i; + iommu_start = alloc_iommu(dev, pages, 0, dma_get_mask(dev)); if (iommu_start == -1) return -1; + iommu_page = iommu_start; for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; @@ -497,7 +514,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, align_mask = (1UL << get_order(size)) - 1; *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, - align_mask); + align_mask, dma_mask); flush_gart(); if (*dma_addr != bad_dma_address) -- cgit v1.2.3 From f10ac8a232496bf9271cfc67c6eea432891f04a6 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 11 Sep 2008 23:08:47 +0900 Subject: x86: avoid unnecessary low zone allocation in Calgary's alloc_coherent x86's common alloc_coherent (dma_alloc_coherent in dma-mapping.h) sets up the gfp flag according to the device dma_mask but Calgary doesn't need it because of virtual mappings. This patch avoids unnecessary low zone allocation. Signed-off-by: FUJITA Tomonori Acked-by: Muli Ben-Yehuda Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 8415d92853c..fe7695e4caa 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -491,6 +491,8 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, npages = size >> PAGE_SHIFT; order = get_order(size); + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + /* alloc enough pages (and possibly more) */ ret = (void *)__get_free_pages(flag, order); if (!ret) -- cgit v1.2.3 From f6a32a36ab96016675cd414802904feb288d7899 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 11 Sep 2008 23:08:48 +0900 Subject: x86: gart alloc_coherent does virtual mapppings only when necessary gart alloc_coherent need to do virtual mapppings only when an allocated buffer is not DMA-capable for a device. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9972c42ac92..9739d568209 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -505,15 +505,23 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { void *vaddr; + dma_addr_t paddr; unsigned long align_mask; + u64 dma_mask = dma_alloc_coherent_mask(dev, flag); vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); if (!vaddr) return NULL; + paddr = virt_to_phys(vaddr); + if (is_buffer_dma_capable(dma_mask, paddr, size)) { + *dma_addr = paddr; + return vaddr; + } + align_mask = (1UL << get_order(size)) - 1; - *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, + *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, align_mask, dma_mask); flush_gart(); -- cgit v1.2.3 From 8308c54d7e312f7a03e2ce2057d0837e6fe3843f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 11 Sep 2008 01:31:50 -0700 Subject: generic: redefine resource_size_t as phys_addr_t There's no good reason why a resource_size_t shouldn't just be a physical address, so simply redefine it in terms of phys_addr_t. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 66e48aa2dd1..477f4bb7e55 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1276,12 +1276,10 @@ void __init e820_reserve_resources(void) res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); for (i = 0; i < e820.nr_map; i++) { end = e820.map[i].addr + e820.map[i].size - 1; -#ifndef CONFIG_RESOURCES_64BIT - if (end > 0x100000000ULL) { + if (end != (resource_size_t)end) { res++; continue; } -#endif res->name = e820_type_to_string(e820.map[i].type); res->start = e820.map[i].addr; res->end = end; -- cgit v1.2.3 From 5649b7c30316a51792808422ac03ee825d26aa5e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 16 Sep 2008 09:29:09 +0200 Subject: x86: add DMI quirk for AMI BIOS which corrupts address 0xc000 during resume Alan Jenkins and Andy Wettstein reported a suspend/resume memory corruption bug and extensively documented it here: http://bugzilla.kernel.org/show_bug.cgi?id=11237 The bug is that the BIOS overwrites 1K of memory at 0xc000 physical, without registering it in e820 as reserved or giving the kernel any idea about this. Detect AMI BIOSen and reserve that 1K. We paint this bug around with a very broad brush (reserving that 1K on all AMI BIOS systems), as the bug was extremely hard to find and needed several weeks and lots of debugging and patching. The bug was found via the CONFIG_X86_CHECK_BIOS_CORRUPTION=y debug feature, if similar bugs are suspected then this feature can be enabled on other systems as well to scan low memory for corrupted memory. Reported-by: Alan Jenkins Reported-by: Andy Wettstein Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ec7e56c1b98..3109ca37a67 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -729,6 +729,29 @@ void start_periodic_check_for_corruption(void) } #endif +static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) +{ + printk(KERN_NOTICE + "%s detected: BIOS corrupts 0xc000, working it around.\n", + d->ident); + + reserve_early(0xc000, 0xc400, "BIOS quirk"); + + return 0; +} + +/* List of systems that have known low memory corruption BIOS problems */ +static struct dmi_system_id __initdata bad_bios_dmi_table[] = { + { + .callback = dmi_low_memory_corruption, + .ident = "AMI BIOS", + .matches = { + DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), + }, + }, + {} +}; + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -752,6 +775,8 @@ void __init setup_arch(char **cmdline_p) printk(KERN_INFO "Command line: %s\n", boot_command_line); #endif + dmi_check_system(bad_bios_dmi_table); + early_cpu_init(); early_ioremap_init(); @@ -1037,3 +1062,5 @@ void __init setup_arch(char **cmdline_p) #endif #endif } + + -- cgit v1.2.3 From 1e22436eba84edfec9c25e5a25d09062c4f91ca9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 16 Sep 2008 09:58:02 +0200 Subject: x86: reserve low 64K on AMI and Phoenix BIOS boxen there's multiple reports about suspend/resume related low memory corruption in this bugzilla: http://bugzilla.kernel.org/show_bug.cgi?id=11237 the common pattern is that the corruption is caused by the BIOS, and that it affects some portion of the first 64K of physical RAM. So add a DMI quirk This will waste 64K RAM on 'good' systems too, but without knowing the exact nature of this BIOS memory corruption this is the safest approach. This might as well solve a wide range of suspend/resume breakages under Linux. Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3109ca37a67..33719544a22 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -732,10 +732,10 @@ void start_periodic_check_for_corruption(void) static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) { printk(KERN_NOTICE - "%s detected: BIOS corrupts 0xc000, working it around.\n", + "%s detected: BIOS may corrupt low RAM, working it around.\n", d->ident); - reserve_early(0xc000, 0xc400, "BIOS quirk"); + reserve_early(0x0, 0x10000, "BIOS quirk"); return 0; } @@ -749,6 +749,13 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), }, }, + { + .callback = dmi_low_memory_corruption, + .ident = "Phoenix BIOS", + .matches = { + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), + }, + }, {} }; -- cgit v1.2.3 From fc38151947477596aa27df6c4306ad6008dc6711 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 16 Sep 2008 10:07:34 +0200 Subject: x86: add X86_RESERVE_LOW_64K This bugzilla: http://bugzilla.kernel.org/show_bug.cgi?id=11237 Documents a wide range of systems where the BIOS utilizes the first 64K of physical memory during suspend/resume and other hardware events. Currently we reserve this memory on all AMI and Phoenix BIOS systems. Life is too short to hunt subtle memory corruption problems like this, so we try to be robust by default. Still, allow this to be overriden: allow users who want that first 64K of memory to be available to the kernel disable the quirk, via CONFIG_X86_RESERVE_LOW_64K=n. Also, allow the early reservation to overlap with other early reservations. Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 33719544a22..786c1886ae5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -735,13 +735,14 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) "%s detected: BIOS may corrupt low RAM, working it around.\n", d->ident); - reserve_early(0x0, 0x10000, "BIOS quirk"); + reserve_early_overlap_ok(0x0, 0x10000, "BIOS quirk"); return 0; } /* List of systems that have known low memory corruption BIOS problems */ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { +#ifdef CONFIG_X86_RESERVE_LOW_64K { .callback = dmi_low_memory_corruption, .ident = "AMI BIOS", @@ -757,6 +758,7 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { }, }, {} +#endif }; /* -- cgit v1.2.3 From ba0593bf553c450a03dbc5f8c1f0ff58b778a0c8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 16 Sep 2008 09:29:40 -0700 Subject: x86: completely disable NOPL on 32 bits Completely disable NOPL on 32 bits. It turns out that Microsoft Virtual PC is so broken it can't even reliably *fail* in the presence of NOPL. This leaves the infrastructure in place but disables it unconditionally. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8aab8517642..4e456bd955b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -344,31 +344,15 @@ static void __init early_cpu_detect(void) /* * The NOPL instruction is supposed to exist on all CPUs with - * family >= 6, unfortunately, that's not true in practice because + * family >= 6; unfortunately, that's not true in practice because * of early VIA chips and (more importantly) broken virtualizers that - * are not easy to detect. Hence, probe for it based on first - * principles. + * are not easy to detect. In the latter case it doesn't even *fail* + * reliably, so probing for it doesn't even work. Disable it completely + * unless we can find a reliable way to detect all the broken cases. */ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) { - const u32 nopl_signature = 0x888c53b1; /* Random number */ - u32 has_nopl = nopl_signature; - clear_cpu_cap(c, X86_FEATURE_NOPL); - if (c->x86 >= 6) { - asm volatile("\n" - "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ - "2:\n" - " .section .fixup,\"ax\"\n" - "3: xor %0,%0\n" - " jmp 2b\n" - " .previous\n" - _ASM_EXTABLE(1b,3b) - : "+a" (has_nopl)); - - if (has_nopl == nopl_signature) - set_cpu_cap(c, X86_FEATURE_NOPL); - } } static void __cpuinit generic_identify(struct cpuinfo_x86 *c) -- cgit v1.2.3 From 90f7d25c6b672137344f447a30a9159945ffea72 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 16 Sep 2008 11:27:30 -0700 Subject: x86: print DMI information in the oops trace in order to diagnose hard system specific issues, it's useful to have the system name in the oops (as provided by DMI) Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0c3927accb0..7b9ee9f0963 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -160,6 +161,7 @@ void __show_registers(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned long sp; unsigned short ss, gs; + const char *board; if (user_mode_vm(regs)) { sp = regs->sp; @@ -172,11 +174,15 @@ void __show_registers(struct pt_regs *regs, int all) } printk("\n"); - printk("Pid: %d, comm: %s %s (%s %.*s)\n", + + board = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!board) + board = ""; + printk("Pid: %d, comm: %s %s (%s %.*s) %s\n", task_pid_nr(current), current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + init_utsname()->version, board); printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, -- cgit v1.2.3 From ee2fa7435b6dddf1ca119f298ad0100cf50c0397 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 13:47:25 +0200 Subject: AMD IOMMU: set iommu sunc flag after command queuing The iommu->need_sync flag must be set after the command is queued to avoid race conditions. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 69b4d060b21..a96d8c049a8 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -140,6 +140,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu) static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) { struct iommu_cmd cmd; + int ret; BUG_ON(iommu == NULL); @@ -147,9 +148,11 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); cmd.data[0] = devid; + ret = iommu_queue_command(iommu, &cmd); + iommu->need_sync = 1; - return iommu_queue_command(iommu, &cmd); + return ret; } /* @@ -159,6 +162,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, u64 address, u16 domid, int pde, int s) { struct iommu_cmd cmd; + int ret; memset(&cmd, 0, sizeof(cmd)); address &= PAGE_MASK; @@ -171,9 +175,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; + ret = iommu_queue_command(iommu, &cmd); + iommu->need_sync = 1; - return iommu_queue_command(iommu, &cmd); + return ret; } /* -- cgit v1.2.3 From 7e4f88da7bf1887563f70bd5edbbd0479e31dc12 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 14:19:15 +0200 Subject: AMD IOMMU: protect completion wait loop with iommu lock The unlocked polling of the ComWaitInt bit in the IOMMU completion wait path is racy. Protect it with the iommu lock. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a96d8c049a8..042fdc27bc9 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -101,10 +101,10 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) */ static int iommu_completion_wait(struct amd_iommu *iommu) { - int ret, ready = 0; + int ret = 0, ready = 0; unsigned status = 0; struct iommu_cmd cmd; - unsigned long i = 0; + unsigned long flags, i = 0; memset(&cmd, 0, sizeof(cmd)); cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; @@ -112,10 +112,12 @@ static int iommu_completion_wait(struct amd_iommu *iommu) iommu->need_sync = 0; - ret = iommu_queue_command(iommu, &cmd); + spin_lock_irqsave(&iommu->lock, flags); + + ret = __iommu_queue_command(iommu, &cmd); if (ret) - return ret; + goto out; while (!ready && (i < EXIT_LOOP_COUNT)) { ++i; @@ -130,6 +132,8 @@ static int iommu_completion_wait(struct amd_iommu *iommu) if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); +out: + spin_unlock_irqrestore(&iommu->lock, flags); return 0; } -- cgit v1.2.3 From 279b0bbba2bb647348ad90e183b3960aa99eccfd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 18 Sep 2008 23:55:27 -0700 Subject: x86: fix arch/x86/kernel/cpu/mtrr/main.c warning fix this warning reported by Andrew Morton: > arch/x86/kernel/cpu/mtrr/main.c: In function 'mtrr_bp_init': > arch/x86/kernel/cpu/mtrr/main.c:1170: warning: 'extra_remove_base' may be used uninitialized in this function the warning is bogus but the logic that prevents uninitialized use is a bit convoluted so simplify it all. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b117d7f8a56..8a7c79234be 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1218,11 +1218,10 @@ static int __init mtrr_cleanup(unsigned address_bits) memset(range, 0, sizeof(range)); extra_remove_size = 0; - if (mtrr_tom2) { - extra_remove_base = 1 << (32 - PAGE_SHIFT); + extra_remove_base = 1 << (32 - PAGE_SHIFT); + if (mtrr_tom2) extra_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; - } nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size); range_sums = sum_ranges(range, nr_range); -- cgit v1.2.3 From dbcc112e3b5367e81a845b082933506b0ff1d1e2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 15:04:26 +0200 Subject: AMD IOMMU: check for invalid device pointers Currently AMD IOMMU code triggers a BUG_ON if NULL is passed as the device. This is inconsistent with other IOMMU implementations. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 01c68c38840..695e0fc41b1 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -645,6 +645,18 @@ static void set_device_domain(struct amd_iommu *iommu, * *****************************************************************************/ +/* + * This function checks if the driver got a valid device from the caller to + * avoid dereferencing invalid pointers. + */ +static bool check_device(struct device *dev) +{ + if (!dev || !dev->dma_mask) + return false; + + return true; +} + /* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the @@ -661,18 +673,19 @@ static int get_device_resources(struct device *dev, struct pci_dev *pcidev; u16 _bdf; - BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); + *iommu = NULL; + *domain = NULL; + *bdf = 0xffff; + + if (dev->bus != &pci_bus_type) + return 0; pcidev = to_pci_dev(dev); _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); /* device not translated by any IOMMU in the system? */ - if (_bdf > amd_iommu_last_bdf) { - *iommu = NULL; - *domain = NULL; - *bdf = 0xffff; + if (_bdf > amd_iommu_last_bdf) return 0; - } *bdf = amd_iommu_alias_table[_bdf]; @@ -826,6 +839,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, u16 devid; dma_addr_t addr; + if (!check_device(dev)) + return bad_dma_address; + get_device_resources(dev, &iommu, &domain, &devid); if (iommu == NULL || domain == NULL) @@ -860,7 +876,8 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, struct protection_domain *domain; u16 devid; - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!check_device(dev) || + !get_device_resources(dev, &iommu, &domain, &devid)) /* device not handled by any AMD IOMMU */ return; @@ -910,6 +927,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, phys_addr_t paddr; int mapped_elems = 0; + if (!check_device(dev)) + return 0; + get_device_resources(dev, &iommu, &domain, &devid); if (!iommu || !domain) @@ -967,7 +987,8 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, u16 devid; int i; - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!check_device(dev) || + !get_device_resources(dev, &iommu, &domain, &devid)) return; spin_lock_irqsave(&domain->lock, flags); @@ -999,6 +1020,9 @@ static void *alloc_coherent(struct device *dev, size_t size, u16 devid; phys_addr_t paddr; + if (!check_device(dev)) + return NULL; + virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) return 0; @@ -1047,6 +1071,9 @@ static void free_coherent(struct device *dev, size_t size, struct protection_domain *domain; u16 devid; + if (!check_device(dev)) + return; + get_device_resources(dev, &iommu, &domain, &devid); if (!iommu || !domain) -- cgit v1.2.3 From 270cab2426cdc6307725e4f1f46ecf8ab8e69193 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 15:49:46 +0200 Subject: AMD IOMMU: move TLB flushing to the map/unmap helper functions This patch moves the invocation of the flushing functions to the map/unmap helpers because its common code in all dma_ops relevant mapping/unmapping code. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 695e0fc41b1..691e023695a 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -795,6 +795,9 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; + if (unlikely(iommu_has_npcache(iommu))) + iommu_flush_pages(iommu, dma_dom->domain.id, address, size); + out: return address; } @@ -825,6 +828,8 @@ static void __unmap_single(struct amd_iommu *iommu, } dma_ops_free_addresses(dma_dom, dma_addr, pages); + + iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); } /* @@ -853,9 +858,6 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, if (addr == bad_dma_address) goto out; - if (iommu_has_npcache(iommu)) - iommu_flush_pages(iommu, domain->id, addr, size); - if (iommu->need_sync) iommu_completion_wait(iommu); @@ -885,8 +887,6 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, __unmap_single(iommu, domain->priv, dma_addr, size, dir); - iommu_flush_pages(iommu, domain->id, dma_addr, size); - if (iommu->need_sync) iommu_completion_wait(iommu); @@ -948,9 +948,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, mapped_elems++; } else goto unmap; - if (iommu_has_npcache(iommu)) - iommu_flush_pages(iommu, domain->id, s->dma_address, - s->dma_length); } if (iommu->need_sync) @@ -996,8 +993,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, for_each_sg(sglist, s, nelems, i) { __unmap_single(iommu, domain->priv, s->dma_address, s->dma_length, dir); - iommu_flush_pages(iommu, domain->id, s->dma_address, - s->dma_length); s->dma_address = s->dma_length = 0; } @@ -1048,9 +1043,6 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out; } - if (iommu_has_npcache(iommu)) - iommu_flush_pages(iommu, domain->id, *dma_addr, size); - if (iommu->need_sync) iommu_completion_wait(iommu); @@ -1082,7 +1074,6 @@ static void free_coherent(struct device *dev, size_t size, spin_lock_irqsave(&domain->lock, flags); __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - iommu_flush_pages(iommu, domain->id, dma_addr, size); if (iommu->need_sync) iommu_completion_wait(iommu); -- cgit v1.2.3 From 2842e5bf3115193f05dc9dac20f940e7abf44c1a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 18 Sep 2008 15:23:43 +0200 Subject: x86: move GART TLB flushing options to generic code The GART currently implements the iommu=[no]fullflush command line parameters which influence its IO/TLB flushing strategy. This patch makes these parameters generic so that they can be used by the AMD IOMMU too. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 13 +++++++++++++ arch/x86/kernel/pci-gart_64.c | 13 ------------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0a1408abcc6..d2f2c0158dc 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -16,6 +16,15 @@ EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; +/* + * If this is disabled the IOMMU will use an optimized flushing strategy + * of only flushing when an mapping is reused. With it true the GART is + * flushed for every mapping. Problem is that doing the lazy flush seems + * to trigger bugs with some popular PCI cards, in particular 3ware (but + * has been also also seen with Qlogic at least). + */ +int iommu_fullflush; + #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; @@ -171,6 +180,10 @@ static __init int iommu_setup(char *p) } if (!strncmp(p, "nomerge", 7)) iommu_merge = 0; + if (!strncmp(p, "fullflush", 8)) + iommu_fullflush = 1; + if (!strncmp(p, "nofullflush", 11)) + iommu_fullflush = 0; if (!strncmp(p, "forcesac", 8)) iommu_sac_force = 1; if (!strncmp(p, "allowdac", 8)) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9739d568209..508ef470b27 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -45,15 +45,6 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ -/* - * If this is disabled the IOMMU will use an optimized flushing strategy - * of only flushing when an mapping is reused. With it true the GART is - * flushed for every mapping. Problem is that doing the lazy flush seems - * to trigger bugs with some popular PCI cards, in particular 3ware (but - * has been also also seen with Qlogic at least). - */ -int iommu_fullflush = 1; - /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); /* Guarded by iommu_bitmap_lock: */ @@ -901,10 +892,6 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; - if (!strncmp(p, "fullflush", 8)) - iommu_fullflush = 1; - if (!strncmp(p, "nofullflush", 11)) - iommu_fullflush = 0; if (!strncmp(p, "noagp", 5)) no_agp = 1; if (!strncmp(p, "noaperture", 10)) -- cgit v1.2.3 From 1c65577398589bb44ab0980f9b9d30804b48a5db Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 18:40:05 +0200 Subject: AMD IOMMU: implement lazy IO/TLB flushing The IO/TLB flushing on every unmaping operation is the most expensive part in AMD IOMMU code and not strictly necessary. It is sufficient to do the flush before any entries are reused. This is patch implements lazy IO/TLB flushing which does exactly this. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 26 ++++++++++++++++++++++---- arch/x86/kernel/amd_iommu_init.c | 7 ++++++- 2 files changed, 28 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 691e023695a..679f2a8e22e 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -203,6 +203,14 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, return 0; } +/* Flush the whole IO/TLB for a given protection domain */ +static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) +{ + u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + + iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); +} + /**************************************************************************** * * The functions below are used the create the page table mappings for @@ -386,14 +394,18 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, PAGE_SIZE) >> PAGE_SHIFT; limit = limit < size ? limit : size; - if (dom->next_bit >= limit) + if (dom->next_bit >= limit) { dom->next_bit = 0; + dom->need_flush = true; + } address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, 0 , boundary_size, 0); - if (address == -1) + if (address == -1) { address = iommu_area_alloc(dom->bitmap, limit, 0, pages, 0, boundary_size, 0); + dom->need_flush = true; + } if (likely(address != -1)) { dom->next_bit = address + pages; @@ -553,6 +565,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->bitmap[0] = 1; dma_dom->next_bit = 0; + dma_dom->need_flush = false; + /* Intialize the exclusion range if necessary */ if (iommu->exclusion_start && iommu->exclusion_start < dma_dom->aperture_size) { @@ -795,7 +809,10 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; - if (unlikely(iommu_has_npcache(iommu))) + if (unlikely(dma_dom->need_flush && !iommu_fullflush)) { + iommu_flush_tlb(iommu, dma_dom->domain.id); + dma_dom->need_flush = false; + } else if (unlikely(iommu_has_npcache(iommu))) iommu_flush_pages(iommu, dma_dom->domain.id, address, size); out: @@ -829,7 +846,8 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); - iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); + if (iommu_fullflush) + iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); } /* diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index a69cc0f5204..f2fa8dc81be 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -995,6 +995,11 @@ int __init amd_iommu_init(void) else printk("disabled\n"); + if (iommu_fullflush) + printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); + else + printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); + out: return ret; @@ -1057,7 +1062,7 @@ void __init amd_iommu_detect(void) static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { - if (strcmp(str, "isolate") == 0) + if (strncmp(str, "isolate", 7) == 0) amd_iommu_isolate = 1; } -- cgit v1.2.3 From 5507eef835c9c941e69d6d96e4b43af23eeb4ac9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 19:01:02 +0200 Subject: AMD IOMMU: add branch hints to completion wait checks This patch adds branch hints to the cecks if a completion_wait is necessary. The completion_waits in the mapping paths are unlikly because they will only happen on software implementations of AMD IOMMU which don't exists today or with lazy IO/TLB flushing when the allocator wraps around the address space. With lazy IO/TLB flushing the completion_wait in the unmapping path is unlikely too. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 679f2a8e22e..d743aa0adcc 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -876,7 +876,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, if (addr == bad_dma_address) goto out; - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); out: @@ -905,7 +905,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, __unmap_single(iommu, domain->priv, dma_addr, size, dir); - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); @@ -968,7 +968,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); out: @@ -1014,7 +1014,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); @@ -1061,7 +1061,7 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out; } - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); out: @@ -1093,7 +1093,7 @@ static void free_coherent(struct device *dev, size_t size, __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); -- cgit v1.2.3 From 6d4f343f84993eb0d5864c0823dc9babd171a33a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 19:18:02 +0200 Subject: AMD IOMMU: align alloc_coherent addresses properly The API definition for dma_alloc_coherent states that the bus address has to be aligned to the next power of 2 boundary greater than the allocation size. This is violated by AMD IOMMU so far and this patch fixes it. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index d743aa0adcc..15792ed082e 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -383,7 +383,8 @@ static unsigned long dma_mask_to_pages(unsigned long mask) */ static unsigned long dma_ops_alloc_addresses(struct device *dev, struct dma_ops_domain *dom, - unsigned int pages) + unsigned int pages, + unsigned long align_mask) { unsigned long limit = dma_mask_to_pages(*dev->dma_mask); unsigned long address; @@ -400,10 +401,10 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, } address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, - 0 , boundary_size, 0); + 0 , boundary_size, align_mask); if (address == -1) { address = iommu_area_alloc(dom->bitmap, limit, 0, pages, - 0, boundary_size, 0); + 0, boundary_size, align_mask); dom->need_flush = true; } @@ -787,17 +788,22 @@ static dma_addr_t __map_single(struct device *dev, struct dma_ops_domain *dma_dom, phys_addr_t paddr, size_t size, - int dir) + int dir, + bool align) { dma_addr_t offset = paddr & ~PAGE_MASK; dma_addr_t address, start; unsigned int pages; + unsigned long align_mask = 0; int i; pages = iommu_num_pages(paddr, size); paddr &= PAGE_MASK; - address = dma_ops_alloc_addresses(dev, dma_dom, pages); + if (align) + align_mask = (1UL << get_order(size)) - 1; + + address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask); if (unlikely(address == bad_dma_address)) goto out; @@ -872,7 +878,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, return (dma_addr_t)paddr; spin_lock_irqsave(&domain->lock, flags); - addr = __map_single(dev, iommu, domain->priv, paddr, size, dir); + addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false); if (addr == bad_dma_address) goto out; @@ -959,7 +965,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, paddr = sg_phys(s); s->dma_address = __map_single(dev, iommu, domain->priv, - paddr, s->length, dir); + paddr, s->length, dir, false); if (s->dma_address) { s->dma_length = s->length; @@ -1053,7 +1059,7 @@ static void *alloc_coherent(struct device *dev, size_t size, spin_lock_irqsave(&domain->lock, flags); *dma_addr = __map_single(dev, iommu, domain->priv, paddr, - size, DMA_BIDIRECTIONAL); + size, DMA_BIDIRECTIONAL, true); if (*dma_addr == bad_dma_address) { free_pages((unsigned long)virt_addr, get_order(size)); -- cgit v1.2.3 From 335503e57b6b8de04cec5d27eb2c3d09ff98905b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2008 14:29:07 +0200 Subject: AMD IOMMU: add event buffer allocation This patch adds the allocation of a event buffer for each AMD IOMMU in the system. The hardware will log events like device page faults or other errors to this buffer once this is enabled. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index f2fa8dc81be..41ce8d5d626 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -417,6 +417,30 @@ static void __init free_command_buffer(struct amd_iommu *iommu) free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } +/* allocates the memory where the IOMMU will log its events to */ +static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) +{ + u64 entry; + iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(EVT_BUFFER_SIZE)); + + if (iommu->evt_buf == NULL) + return NULL; + + entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; + memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, + &entry, sizeof(entry)); + + iommu->evt_buf_size = EVT_BUFFER_SIZE; + + return iommu->evt_buf; +} + +static void __init free_event_buffer(struct amd_iommu *iommu) +{ + free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); +} + /* sets a specific bit in the device table entry. */ static void set_dev_entry_bit(u16 devid, u8 bit) { @@ -622,6 +646,7 @@ static int __init init_iommu_devices(struct amd_iommu *iommu) static void __init free_iommu_one(struct amd_iommu *iommu) { free_command_buffer(iommu); + free_event_buffer(iommu); iommu_unmap_mmio_space(iommu); } @@ -661,6 +686,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->cmd_buf) return -ENOMEM; + iommu->evt_buf = alloc_event_buffer(iommu); + if (!iommu->evt_buf) + return -ENOMEM; + init_iommu_from_pci(iommu); init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); -- cgit v1.2.3 From ee893c24edb8ebab9a3fb66566855572579ad616 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Sep 2008 14:48:04 +0200 Subject: AMD IOMMU: save pci segment from ACPI tables This patch adds the pci_seg field to the amd_iommu structure and fills it with the corresponding value from the ACPI table. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 41ce8d5d626..b50234ef91e 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -676,6 +676,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) */ iommu->devid = h->devid; iommu->cap_ptr = h->cap_ptr; + iommu->pci_seg = h->pci_seg; iommu->mmio_phys = h->mmio_phys; iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); if (!iommu->mmio_base) -- cgit v1.2.3 From 3eaf28a1cd2686aaa185b54d5a5e18e91b41f7f2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Sep 2008 15:55:10 +0200 Subject: AMD IOMMU: save pci_dev instead of devid We need the pci_dev later anyways to enable MSI for the IOMMU hardware. So remove the devid pointing to the BDF and replace it with the pci_dev structure where the IOMMU is implemented. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index b50234ef91e..a7eb89d8923 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -242,9 +242,12 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) /* Function to enable the hardware */ void __init iommu_enable(struct amd_iommu *iommu) { - printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); - print_devid(iommu->devid, 0); - printk(" cap 0x%hx\n", iommu->cap_ptr); + printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " + "at %02x:%02x.%x cap 0x%hx\n", + iommu->dev->bus->number, + PCI_SLOT(iommu->dev->devfn), + PCI_FUNC(iommu->dev->devfn), + iommu->cap_ptr); iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } @@ -511,15 +514,14 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) */ static void __init init_iommu_from_pci(struct amd_iommu *iommu) { - int bus = PCI_BUS(iommu->devid); - int dev = PCI_SLOT(iommu->devid); - int fn = PCI_FUNC(iommu->devid); int cap_ptr = iommu->cap_ptr; u32 range; - iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, + &iommu->cap); + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, + &range); - range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); iommu->first_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_FD(range)); iommu->last_device = calc_devid(MMIO_GET_BUS(range), @@ -674,7 +676,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) /* * Copy data from ACPI table entry to the iommu struct */ - iommu->devid = h->devid; + iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff); + if (!iommu->dev) + return 1; + iommu->cap_ptr = h->cap_ptr; iommu->pci_seg = h->pci_seg; iommu->mmio_phys = h->mmio_phys; @@ -695,6 +700,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); + pci_enable_device(iommu->dev); + return 0; } -- cgit v1.2.3 From a80dc3e0e0dc8393158de317d66ae0f345dc58f9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 16:51:41 +0200 Subject: AMD IOMMU: add MSI interrupt support The AMD IOMMU can generate interrupts for various reasons. This patch adds the basic interrupt enabling infrastructure to the driver. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 11 +++++ arch/x86/kernel/amd_iommu_init.c | 99 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 109 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 15792ed082e..0e494b9d5f2 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -49,6 +49,17 @@ static int iommu_has_npcache(struct amd_iommu *iommu) return iommu->cap & IOMMU_CAP_NPCACHE; } +/**************************************************************************** + * + * Interrupt handling functions + * + ****************************************************************************/ + +irqreturn_t amd_iommu_int_handler(int irq, void *data) +{ + return IRQ_NONE; +} + /**************************************************************************** * * IOMMU command queuing functions diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index a7eb89d8923..14a06464a69 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -515,17 +517,20 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) static void __init init_iommu_from_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; - u32 range; + u32 range, misc; pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, &iommu->cap); pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, &range); + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, + &misc); iommu->first_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_FD(range)); iommu->last_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_LD(range)); + iommu->evt_msi_num = MMIO_MSI_NUM(misc); } /* @@ -696,6 +701,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->evt_buf) return -ENOMEM; + iommu->int_enabled = false; + init_iommu_from_pci(iommu); init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); @@ -741,6 +748,95 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } +/**************************************************************************** + * + * The following functions initialize the MSI interrupts for all IOMMUs + * in the system. Its a bit challenging because there could be multiple + * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per + * pci_dev. + * + ****************************************************************************/ + +static int __init iommu_setup_msix(struct amd_iommu *iommu) +{ + struct amd_iommu *curr; + struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */ + int nvec = 0, i; + + list_for_each_entry(curr, &amd_iommu_list, list) { + if (curr->dev == iommu->dev) { + entries[nvec].entry = curr->evt_msi_num; + entries[nvec].vector = 0; + curr->int_enabled = true; + nvec++; + } + } + + if (pci_enable_msix(iommu->dev, entries, nvec)) { + pci_disable_msix(iommu->dev); + return 1; + } + + for (i = 0; i < nvec; ++i) { + int r = request_irq(entries->vector, amd_iommu_int_handler, + IRQF_SAMPLE_RANDOM, + "AMD IOMMU", + NULL); + if (r) + goto out_free; + } + + return 0; + +out_free: + for (i -= 1; i >= 0; --i) + free_irq(entries->vector, NULL); + + pci_disable_msix(iommu->dev); + + return 1; +} + +static int __init iommu_setup_msi(struct amd_iommu *iommu) +{ + int r; + struct amd_iommu *curr; + + list_for_each_entry(curr, &amd_iommu_list, list) { + if (curr->dev == iommu->dev) + curr->int_enabled = true; + } + + + if (pci_enable_msi(iommu->dev)) + return 1; + + r = request_irq(iommu->dev->irq, amd_iommu_int_handler, + IRQF_SAMPLE_RANDOM, + "AMD IOMMU", + NULL); + + if (r) { + pci_disable_msi(iommu->dev); + return 1; + } + + return 0; +} + +static int __init iommu_init_msi(struct amd_iommu *iommu) +{ + if (iommu->int_enabled) + return 0; + + if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX)) + return iommu_setup_msix(iommu); + else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) + return iommu_setup_msi(iommu); + + return 1; +} + /**************************************************************************** * * The next functions belong to the third pass of parsing the ACPI @@ -862,6 +958,7 @@ static void __init enable_iommus(void) list_for_each_entry(iommu, &amd_iommu_list, list) { iommu_set_exclusion_range(iommu); + iommu_init_msi(iommu); iommu_enable(iommu); } } -- cgit v1.2.3 From 90008ee4b811c944455752dcb72b291a5ba81b53 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 16:41:05 +0200 Subject: AMD IOMMU: add event handling code This patch adds code for polling and printing out events generated by the AMD IOMMU. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 87 +++++++++++++++++++++++++++++++++++++++- arch/x86/kernel/amd_iommu_init.c | 1 - 2 files changed, 86 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0e494b9d5f2..0cb8fd2359f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -55,9 +55,94 @@ static int iommu_has_npcache(struct amd_iommu *iommu) * ****************************************************************************/ +static void iommu_print_event(void *__evt) +{ + u32 *event = __evt; + int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; + int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; + int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; + int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; + u64 address = (u64)(((u64)event[3]) << 32) | event[2]; + + printk(KERN_ERR "AMD IOMMU: Event logged ["); + + switch (type) { + case EVENT_TYPE_ILL_DEV: + printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + break; + case EVENT_TYPE_IO_FAULT: + printk("IO_PAGE_FAULT device=%02x:%02x.%x " + "domain=0x%04x address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + domid, address, flags); + break; + case EVENT_TYPE_DEV_TAB_ERR: + printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + break; + case EVENT_TYPE_PAGE_TAB_ERR: + printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + "domain=0x%04x address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + domid, address, flags); + break; + case EVENT_TYPE_ILL_CMD: + printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); + break; + case EVENT_TYPE_CMD_HARD_ERR: + printk("COMMAND_HARDWARE_ERROR address=0x%016llx " + "flags=0x%04x]\n", address, flags); + break; + case EVENT_TYPE_IOTLB_INV_TO: + printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x " + "address=0x%016llx]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address); + break; + case EVENT_TYPE_INV_DEV_REQ: + printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + break; + default: + printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); + } +} + +static void iommu_poll_events(struct amd_iommu *iommu) +{ + u32 head, tail; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + + head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); + + while (head != tail) { + iommu_print_event(iommu->evt_buf + head); + head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; + } + + writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + irqreturn_t amd_iommu_int_handler(int irq, void *data) { - return IRQ_NONE; + struct amd_iommu *iommu; + + list_for_each_entry(iommu, &amd_iommu_list, list) + iommu_poll_events(iommu); + + return IRQ_HANDLED; } /**************************************************************************** diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 14a06464a69..eed488892c0 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -32,7 +32,6 @@ /* * definitions for the ACPI scanning code */ -#define PCI_BUS(x) (((x) >> 8) & 0xff) #define IVRS_HEADER_LENGTH 48 #define ACPI_IVHD_TYPE 0x10 -- cgit v1.2.3 From 126c52be4b1d2eb667a1d140f0ceaff9d353f700 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 16:47:35 +0200 Subject: AMD IOMMU: enable event logging The code to log IOMMU events is in place now. So enable event logging with this patch. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index eed488892c0..1974b73fece 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -253,6 +253,13 @@ void __init iommu_enable(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } +/* Function to enable IOMMU event logging and event interrupts */ +void __init iommu_enable_event_logging(struct amd_iommu *iommu) +{ + iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); + iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); +} + /* * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in * the system has one. @@ -958,6 +965,7 @@ static void __init enable_iommus(void) list_for_each_entry(iommu, &amd_iommu_list, list) { iommu_set_exclusion_range(iommu); iommu_init_msi(iommu); + iommu_enable_event_logging(iommu); iommu_enable(iommu); } } -- cgit v1.2.3 From a22131a223147016041b5e5cd0ae5ab61ef4177e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 17:55:28 +0200 Subject: AMD IOMMU: allow IO page faults from devices There is a bit in the device entry to suppress all IO page faults generated by a device. This bit was set until now because there was no event logging. Now that there is event logging this patch allows IO page faults from devices to see them in the kernel log. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 1974b73fece..8c137598555 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -950,7 +950,6 @@ static void init_device_table(void) for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { set_dev_entry_bit(devid, DEV_ENTRY_VALID); set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); - set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); } } -- cgit v1.2.3 From b39ba6ad004a31bf2a08ba2b08c1e0f9b3530bb7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 18:40:46 +0200 Subject: AMD IOMMU: add dma_supported callback This function determines if the AMD IOMMU implementation is responsible for a given device. So the DMA layer can get this information from the driver. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0cb8fd2359f..a6a6f8ed1cf 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1204,6 +1204,30 @@ free_mem: free_pages((unsigned long)virt_addr, get_order(size)); } +/* + * This function is called by the DMA layer to find out if we can handle a + * particular device. It is part of the dma_ops. + */ +static int amd_iommu_dma_supported(struct device *dev, u64 mask) +{ + u16 bdf; + struct pci_dev *pcidev; + + /* No device or no PCI device */ + if (!dev || dev->bus != &pci_bus_type) + return 0; + + pcidev = to_pci_dev(dev); + + bdf = calc_devid(pcidev->bus->number, pcidev->devfn); + + /* Out of our scope? */ + if (bdf > amd_iommu_last_bdf) + return 0; + + return 1; +} + /* * The function for pre-allocating protection domains. * @@ -1247,6 +1271,7 @@ static struct dma_mapping_ops amd_iommu_dma_ops = { .unmap_single = unmap_single, .map_sg = map_sg, .unmap_sg = unmap_sg, + .dma_supported = amd_iommu_dma_supported, }; /* -- cgit v1.2.3 From bd60b735c658e6e8c656e89771d281bcfcf51279 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 10:24:48 +0200 Subject: AMD IOMMU: don't assign preallocated protection domains to devices In isolation mode the protection domains for the devices are preallocated and preassigned. This is bad if a device should be passed to a virtualization guest because the IOMMU code does not know if it is in use by a driver. This patch changes the code to assign the device to the preallocated domain only if there are dma mapping requests for it. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a6a6f8ed1cf..7c179144745 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -33,6 +33,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); +/* A list of preallocated protection domains */ +static LIST_HEAD(iommu_pd_list); +static DEFINE_SPINLOCK(iommu_pd_list_lock); + /* * general struct to manage commands send to an IOMMU */ @@ -663,6 +667,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->next_bit = 0; dma_dom->need_flush = false; + dma_dom->target_dev = 0xffff; /* Intialize the exclusion range if necessary */ if (iommu->exclusion_start && @@ -768,6 +773,33 @@ static bool check_device(struct device *dev) return true; } +/* + * In this function the list of preallocated protection domains is traversed to + * find the domain for a specific device + */ +static struct dma_ops_domain *find_protection_domain(u16 devid) +{ + struct dma_ops_domain *entry, *ret = NULL; + unsigned long flags; + + if (list_empty(&iommu_pd_list)) + return NULL; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + + list_for_each_entry(entry, &iommu_pd_list, list) { + if (entry->target_dev == devid) { + ret = entry; + list_del(&ret->list); + break; + } + } + + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + + return ret; +} + /* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the @@ -803,9 +835,11 @@ static int get_device_resources(struct device *dev, *iommu = amd_iommu_rlookup_table[*bdf]; if (*iommu == NULL) return 0; - dma_dom = (*iommu)->default_dom; *domain = domain_for_device(*bdf); if (*domain == NULL) { + dma_dom = find_protection_domain(*bdf); + if (!dma_dom) + dma_dom = (*iommu)->default_dom; *domain = &dma_dom->domain; set_device_domain(*iommu, *domain, *bdf); printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " @@ -1257,10 +1291,9 @@ void prealloc_protection_domains(void) if (!dma_dom) continue; init_unity_mappings_for_device(dma_dom, devid); - set_device_domain(iommu, &dma_dom->domain, devid); - printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ", - dma_dom->domain.id); - print_devid(devid, 1); + dma_dom->target_dev = devid; + + list_add_tail(&dma_dom->list, &iommu_pd_list); } } -- cgit v1.2.3 From 38ddf41b198e21d3ecbe5752e875857b7ce7589e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 10:38:32 +0200 Subject: AMD IOMMU: some set_device_domain cleanups Remove some magic numbers and split the pte_root using standard functions. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 7c179144745..a34d8e915e3 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -739,12 +739,13 @@ static void set_device_domain(struct amd_iommu *iommu, u64 pte_root = virt_to_phys(domain->pt_root); - pte_root |= (domain->mode & 0x07) << 9; - pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2; + pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) + << DEV_ENTRY_MODE_SHIFT; + pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - amd_iommu_dev_table[devid].data[0] = pte_root; - amd_iommu_dev_table[devid].data[1] = pte_root >> 32; + amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); + amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); amd_iommu_dev_table[devid].data[2] = domain->id; amd_iommu_pd_table[devid] = domain; -- cgit v1.2.3 From 13d9fead3daa0efa1b8bb6ae59650e4453b39128 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 20:19:40 +0900 Subject: AMD IOMMU: avoid unnecessary low zone allocation in alloc_coherent x86's common alloc_coherent (dma_alloc_coherent in dma-mapping.h) sets up the gfp flag according to the device dma_mask but AMD IOMMU doesn't need it for devices that the IOMMU can do virtual mappings for. This patch avoids unnecessary low zone allocation. Signed-off-by: FUJITA Tomonori Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a34d8e915e3..e4866660463 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1173,6 +1173,9 @@ static void *alloc_coherent(struct device *dev, size_t size, if (!check_device(dev)) return NULL; + if (!get_device_resources(dev, &iommu, &domain, &devid)) + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) return 0; @@ -1180,8 +1183,6 @@ static void *alloc_coherent(struct device *dev, size_t size, memset(virt_addr, 0, size); paddr = virt_to_phys(virt_addr); - get_device_resources(dev, &iommu, &domain, &devid); - if (!iommu || !domain) { *dma_addr = (dma_addr_t)paddr; return virt_addr; -- cgit v1.2.3 From c97ac5359e6897abe22770740294dda185bac30d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 10:59:15 +0200 Subject: AMD IOMMU: replace memset with __GFP_ZERO in alloc_coherent Remove the memset and use __GFP_ZERO at allocation time instead. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index e4866660463..f405a61f61f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1176,11 +1176,11 @@ static void *alloc_coherent(struct device *dev, size_t size, if (!get_device_resources(dev, &iommu, &domain, &devid)) flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + flag |= __GFP_ZERO; virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) return 0; - memset(virt_addr, 0, size); paddr = virt_to_phys(virt_addr); if (!iommu || !domain) { -- cgit v1.2.3 From 6754086ce67c0a1f5d7eac612102368781e14588 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 12:17:00 +0200 Subject: AMD IOMMU: simplify dma_mask_to_pages The current calculation is very complicated. This patch replaces it with a much simpler version. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f405a61f61f..db64482b179 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -472,8 +472,7 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, ****************************************************************************/ static unsigned long dma_mask_to_pages(unsigned long mask) { - return (mask >> PAGE_SHIFT) + - (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); + return PAGE_ALIGN(mask) >> PAGE_SHIFT; } /* -- cgit v1.2.3 From d58befd3a0110c93d70756537b4d01d05a9e6e12 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 12:19:58 +0200 Subject: AMD IOMMU: free domain bitmap with its allocation order The amd_iommu_pd_alloc_bitmap is allocated with a calculated order and freed with order 1. This is not a bug since the calculated order always evaluates to 1, but its unclean code. So replace the 1 with the calculation in the release path. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 8c137598555..e60f4cd29eb 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1144,7 +1144,8 @@ out: return ret; free: - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, + get_order(MAX_DOMAIN_ID/8)); free_pages((unsigned long)amd_iommu_pd_table, get_order(rlookup_table_size)); -- cgit v1.2.3 From 199d0d501202f077fe647a5c14fe046b17abc46b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 16:45:59 +0200 Subject: AMD IOMMU: remove unnecessary cast to u64 in the init code The ctrl variable is only u32 and readl also returns a 32 bit value. So the cast to u64 is pointless. Remove it with this patch. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index e60f4cd29eb..505fc04e896 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -235,7 +235,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) { u32 ctrl; - ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); ctrl &= ~(1 << bit); writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } -- cgit v1.2.3 From b514e55569855bbaab782a8ec073630ed4e99c68 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 17:14:27 +0200 Subject: AMD IOMMU: calculate IVHD size with a function The current calculation of the IVHD entry size is hard to read. So move this code to a seperate function to make it more clear what this calculation does. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 505fc04e896..80250e63bd0 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -296,6 +296,14 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) * ****************************************************************************/ +/* + * This function calculates the length of a given IVHD entry + */ +static inline int ivhd_entry_length(u8 *ivhd) +{ + return 0x04 << (*ivhd >> 6); +} + /* * This function reads the last device id the IOMMU has to handle from the PCI * capability header for this IOMMU @@ -340,7 +348,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) default: break; } - p += 0x04 << (*p >> 6); + p += ivhd_entry_length(p); } WARN_ON(p != end); @@ -641,7 +649,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, break; } - p += 0x04 << (e->type >> 6); + p += ivhd_entry_length(p); } } -- cgit v1.2.3 From 23c1713fe9e6ac886a4d44415298d0cbb2e83df2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 17:18:17 +0200 Subject: AMD IOMMU: use cmd_buf_size when freeing the command buffer The command buffer release function uses the CMD_BUF_SIZE macro for get_order. Replace this with iommu->cmd_buf_size which is more reliable about the actual size of the buffer. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 80250e63bd0..db0c83af44d 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -433,7 +433,8 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) static void __init free_command_buffer(struct amd_iommu *iommu) { - free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); + free_pages((unsigned long)iommu->cmd_buf, + get_order(iommu->cmd_buf_size)); } /* allocates the memory where the IOMMU will log its events to */ -- cgit v1.2.3 From 832a90c30485117d65180cc9a8d9869c1b158570 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 18 Sep 2008 15:54:23 +0200 Subject: AMD IOMMU: use coherent_dma_mask in alloc_coherent The alloc_coherent implementation for AMD IOMMU currently uses *dev->dma_mask per default. This patch changes it to prefer dev->coherent_dma_mask if it is set. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index db64482b179..6f7b9744573 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -483,9 +483,10 @@ static unsigned long dma_mask_to_pages(unsigned long mask) static unsigned long dma_ops_alloc_addresses(struct device *dev, struct dma_ops_domain *dom, unsigned int pages, - unsigned long align_mask) + unsigned long align_mask, + u64 dma_mask) { - unsigned long limit = dma_mask_to_pages(*dev->dma_mask); + unsigned long limit = dma_mask_to_pages(dma_mask); unsigned long address; unsigned long size = dom->aperture_size >> PAGE_SHIFT; unsigned long boundary_size; @@ -919,7 +920,8 @@ static dma_addr_t __map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir, - bool align) + bool align, + u64 dma_mask) { dma_addr_t offset = paddr & ~PAGE_MASK; dma_addr_t address, start; @@ -933,7 +935,8 @@ static dma_addr_t __map_single(struct device *dev, if (align) align_mask = (1UL << get_order(size)) - 1; - address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask); + address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, + dma_mask); if (unlikely(address == bad_dma_address)) goto out; @@ -997,10 +1000,13 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, struct protection_domain *domain; u16 devid; dma_addr_t addr; + u64 dma_mask; if (!check_device(dev)) return bad_dma_address; + dma_mask = *dev->dma_mask; + get_device_resources(dev, &iommu, &domain, &devid); if (iommu == NULL || domain == NULL) @@ -1008,7 +1014,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, return (dma_addr_t)paddr; spin_lock_irqsave(&domain->lock, flags); - addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false); + addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, + dma_mask); if (addr == bad_dma_address) goto out; @@ -1080,10 +1087,13 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, struct scatterlist *s; phys_addr_t paddr; int mapped_elems = 0; + u64 dma_mask; if (!check_device(dev)) return 0; + dma_mask = *dev->dma_mask; + get_device_resources(dev, &iommu, &domain, &devid); if (!iommu || !domain) @@ -1095,7 +1105,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, paddr = sg_phys(s); s->dma_address = __map_single(dev, iommu, domain->priv, - paddr, s->length, dir, false); + paddr, s->length, dir, false, + dma_mask); if (s->dma_address) { s->dma_length = s->length; @@ -1168,6 +1179,7 @@ static void *alloc_coherent(struct device *dev, size_t size, struct protection_domain *domain; u16 devid; phys_addr_t paddr; + u64 dma_mask = dev->coherent_dma_mask; if (!check_device(dev)) return NULL; @@ -1187,10 +1199,13 @@ static void *alloc_coherent(struct device *dev, size_t size, return virt_addr; } + if (!dma_mask) + dma_mask = *dev->dma_mask; + spin_lock_irqsave(&domain->lock, flags); *dma_addr = __map_single(dev, iommu, domain->priv, paddr, - size, DMA_BIDIRECTIONAL, true); + size, DMA_BIDIRECTIONAL, true, dma_mask); if (*dma_addr == bad_dma_address) { free_pages((unsigned long)virt_addr, get_order(size)); -- cgit v1.2.3 From 5871c6b0a52bb052c3891a787655043b90ae18e3 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 20 Sep 2008 20:35:44 -0700 Subject: x86: use round_jiffies() for the corruption check timer the exact timing of the corruption check isn't too important (it's once a minute timer), use round_jiffies() to align it and avoid extra wakeups. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 786c1886ae5..161f1b33ece 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -712,7 +712,7 @@ void check_for_bios_corruption(void) static void periodic_check_for_corruption(unsigned long data) { check_for_bios_corruption(); - mod_timer(&periodic_check_timer, jiffies + corruption_check_period*HZ); + mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ)); } void start_periodic_check_for_corruption(void) -- cgit v1.2.3 From 2216d199b1430d1c0affb1498a9ebdbd9c0de439 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 22 Sep 2008 02:52:26 -0700 Subject: x86: fix CONFIG_X86_RESERVE_LOW_64K=y The bad_bios_dmi_table() quirk never triggered because we do DMI setup too late. Move it a bit earlier. Also change the CONFIG_X86_RESERVE_LOW_64K quirk to operate on the e820 table directly instead of messing with early reservations - this handles overlaps (which do occur in this low range of RAM) more gracefully. Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 161f1b33ece..d29951c11be 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -735,7 +735,8 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) "%s detected: BIOS may corrupt low RAM, working it around.\n", d->ident); - reserve_early_overlap_ok(0x0, 0x10000, "BIOS quirk"); + e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); return 0; } @@ -784,8 +785,6 @@ void __init setup_arch(char **cmdline_p) printk(KERN_INFO "Command line: %s\n", boot_command_line); #endif - dmi_check_system(bad_bios_dmi_table); - early_cpu_init(); early_ioremap_init(); @@ -880,6 +879,10 @@ void __init setup_arch(char **cmdline_p) finish_e820_parsing(); + dmi_scan_machine(); + + dmi_check_system(bad_bios_dmi_table); + #ifdef CONFIG_X86_32 probe_roms(); #endif @@ -967,8 +970,6 @@ void __init setup_arch(char **cmdline_p) vsmp_init(); #endif - dmi_scan_machine(); - io_delay_init(); /* -- cgit v1.2.3 From af2d237bf574f89ae5a1b67f2556a324c8f64ff5 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 21 Sep 2008 23:27:13 +0900 Subject: x86: check for ioremap() failure in copy_oldmem_page() Add a check for ioremap() failure in copy_oldmem_page(). This patch also includes small coding style fixes. Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash_dump_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 15e6c6bc4a4..280d6ef3af0 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -33,14 +33,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, return 0; vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!vaddr) + return -ENOMEM; if (userbuf) { - if (copy_to_user(buf, (vaddr + offset), csize)) { + if (copy_to_user(buf, vaddr + offset, csize)) { iounmap(vaddr); return -EFAULT; } } else - memcpy(buf, (vaddr + offset), csize); + memcpy(buf, vaddr + offset, csize); iounmap(vaddr); return csize; -- cgit v1.2.3 From 153dab77e228931f3aff74f21b762927ac710ca7 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 21 Sep 2008 23:25:40 +0900 Subject: x86: use platform_device_register_simple() Cleanup pcspeaker.c Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/pcspeaker.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c index bc1f2d3ea27..a311ffcaad1 100644 --- a/arch/x86/kernel/pcspeaker.c +++ b/arch/x86/kernel/pcspeaker.c @@ -1,20 +1,13 @@ #include -#include +#include #include static __init int add_pcspkr(void) { struct platform_device *pd; - int ret; - pd = platform_device_alloc("pcspkr", -1); - if (!pd) - return -ENOMEM; + pd = platform_device_register_simple("pcspkr", -1, NULL, 0); - ret = platform_device_add(pd); - if (ret) - platform_device_put(pd); - - return ret; + return IS_ERR(pd) ? PTR_ERR(pd) : 0; } device_initcall(add_pcspkr); -- cgit v1.2.3 From 16dc552f35bc0ec6fec8ef83f8032eee352d17f5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 19 Sep 2008 11:45:04 -0700 Subject: x86: use WARN_ONCE in workaround for mtrr mask so could help catch attention about bug in bios about mtrr mask setting. WARN_ONCE got into mainline already, lets use it. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index cb7d3b6a80e..4e8d77f01ee 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -401,12 +401,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, tmp |= ~((1<<(hi - 1)) - 1); if (tmp != mask_lo) { - static int once = 1; - - if (once) { - printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); - once = 0; - } + WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); mask_lo = tmp; } } -- cgit v1.2.3 From d26dbc5cf94b0a28acc947285c3b54814a73cb2e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 22 Sep 2008 22:35:07 +0900 Subject: iommu: export iommu_area_reserve helper function x86 has set_bit_string() that does the exact same thing that set_bit_area() in lib/iommu-helper.c does. This patch exports set_bit_area() in lib/iommu-helper.c as iommu_area_reserve(), converts GART, Calgary, and AMD IOMMU to use it. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 +- arch/x86/kernel/pci-calgary_64.c | 2 +- arch/x86/kernel/pci-gart_64.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 6f7b9744573..70537d117a9 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -572,7 +572,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, if (start_page + pages > last_page) pages = last_page - start_page; - set_bit_string(dom->bitmap, start_page, pages); + iommu_area_reserve(dom->bitmap, start_page, pages); } static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index fe7695e4caa..080d1d27f37 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -261,7 +261,7 @@ static void iommu_range_reserve(struct iommu_table *tbl, badbit, tbl, start_addr, npages); } - set_bit_string(tbl->it_map, index, npages); + iommu_area_reserve(tbl->it_map, index, npages); spin_unlock_irqrestore(&tbl->it_lock, flags); } diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 508ef470b27..3dcb1ad86e3 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -827,7 +827,7 @@ void __init gart_iommu_init(void) * Out of IOMMU space handling. * Reserve some invalid pages at the beginning of the GART. */ - set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); + iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); agp_memory_reserved = iommu_size; printk(KERN_INFO -- cgit v1.2.3 From 28b166a700899a0f88b1cc283c449fb5bf72a635 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 22 Sep 2008 13:14:13 -0400 Subject: x86, NMI watchdog: when booting with reset_devices, clear the performance counters P4s have a quirk that makes necessary to clear P4_CCCR_OVF bit on the CCCR everytime the PMI is triggered. When booting the kernel with reset_devices (more specific kdump case), the counters reach zero and the PMI will be generated. This is not a problem on other processors but on P4s, it'll continue to generate NMIs until that bit is cleared. Since there may be other users of the performance counters, clear and disable all of them when booting with reset_devices option. We have a P4 box here that crashes because of this problem. Since the kdump kernel usually boots with only one processor active, the second logical unit won't be set up, therefore, MSR_P4_IQ_CCCR1 (and other performance counter registers) won't be cleared and P4_CCCR_OVF may be still set because the previous kernel was using this register. An NMI is triggered because of the MSR_P4_IQ_CCCR1 right after the NMI delivery is enabled, triggering the race fixed on my previous email. Signed-off-by: Aristeu Rozanski Acked-by: Don Zickus Acked-by: Prarit Bhargava Acked-by: Vivek Goyal Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perfctr-watchdog.c | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 05cc22dbd4f..62c01006397 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -432,6 +432,27 @@ static const struct wd_ops p6_wd_ops = { #define P4_CCCR_ENABLE (1 << 12) #define P4_CCCR_OVF (1 << 31) +#define P4_CONTROLS 18 +static unsigned int p4_controls[18] = { + MSR_P4_BPU_CCCR0, + MSR_P4_BPU_CCCR1, + MSR_P4_BPU_CCCR2, + MSR_P4_BPU_CCCR3, + MSR_P4_MS_CCCR0, + MSR_P4_MS_CCCR1, + MSR_P4_MS_CCCR2, + MSR_P4_MS_CCCR3, + MSR_P4_FLAME_CCCR0, + MSR_P4_FLAME_CCCR1, + MSR_P4_FLAME_CCCR2, + MSR_P4_FLAME_CCCR3, + MSR_P4_IQ_CCCR0, + MSR_P4_IQ_CCCR1, + MSR_P4_IQ_CCCR2, + MSR_P4_IQ_CCCR3, + MSR_P4_IQ_CCCR4, + MSR_P4_IQ_CCCR5, +}; /* * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter * CRU_ESCR0 (with any non-null event selector) through a complemented @@ -473,6 +494,26 @@ static int setup_p4_watchdog(unsigned nmi_hz) evntsel_msr = MSR_P4_CRU_ESCR0; cccr_msr = MSR_P4_IQ_CCCR0; cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); + + /* + * If we're on the kdump kernel or other situation, we may + * still have other performance counter registers set to + * interrupt and they'll keep interrupting forever because + * of the P4_CCCR_OVF quirk. So we need to ACK all the + * pending interrupts and disable all the registers here, + * before reenabling the NMI delivery. Refer to p4_rearm() + * about the P4_CCCR_OVF quirk. + */ + if (reset_devices) { + unsigned int low, high; + int i; + + for (i = 0; i < P4_CONTROLS; i++) { + rdmsr(p4_controls[i], low, high); + low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); + wrmsr(p4_controls[i], low, high); + } + } } else { /* logical cpu 1 */ perfctr_msr = MSR_P4_IQ_PERFCTR1; -- cgit v1.2.3 From b3e15bdef689641e7f1bb03efbe56112c3ee82e2 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 22 Sep 2008 13:13:59 -0400 Subject: x86, NMI watchdog: setup before enabling NMI watchdog There's a small window when NMI watchdog is being set up that if any NMIs are triggered, the NMI code will make make use of not initalized wd_ops elements: void setup_apic_nmi_watchdog(void *unused) { if (__get_cpu_var(wd_enabled)) return; /* cheap hack to support suspend/resume */ /* if cpu0 is not active neither should the other cpus */ if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) return; switch (nmi_watchdog) { case NMI_LOCAL_APIC: /* enable it before to avoid race with handler */ --> __get_cpu_var(wd_enabled) = 1; --> if (lapic_watchdog_init(nmi_hz) < 0) { (...) asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) { (...) if (nmi_watchdog_tick(regs, reason)) return; (...) notrace __kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { (...) if (!__get_cpu_var(wd_enabled)) return rc; switch (nmi_watchdog) { case NMI_LOCAL_APIC: rc |= lapic_wd_event(nmi_hz); (...) int lapic_wd_event(unsigned nmi_hz) { struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 ctr; --> rdmsrl(wd->perfctr_msr, ctr); and wd->*_msr will be initialized on each processor type specific setup, after enabling NMIs for PMIs. Since the counter was just set, the chances of an performance counter generated NMI is minimal, but any other unknown NMI would trigger the problem. This patch fixes the problem by setting everything up before enabling performance counter generated NMIs and will set wd_enabled using a callback function. Signed-off-by: Aristeu Rozanski Acked-by: Don Zickus Acked-by: Prarit Bhargava Acked-by: Vivek Goyal Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perfctr-watchdog.c | 45 +++++++++++++++++++++++++--------- arch/x86/kernel/nmi.c | 11 +++++++-- 2 files changed, 42 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 62c01006397..6bff382094f 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -295,13 +295,19 @@ static int setup_k7_watchdog(unsigned nmi_hz) /* setup the timer */ wrmsr(evntsel_msr, evntsel, 0); write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= K7_EVNTSEL_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); + /* initialize the wd struct before enabling */ wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= K7_EVNTSEL_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + return 1; } @@ -379,13 +385,19 @@ static int setup_p6_watchdog(unsigned nmi_hz) wrmsr(evntsel_msr, evntsel, 0); nmi_hz = adjust_for_32bit_ctr(nmi_hz); write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= P6_EVNTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); + /* initialize the wd struct before enabling */ wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= P6_EVNTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + return 1; } @@ -540,12 +552,17 @@ static int setup_p4_watchdog(unsigned nmi_hz) wrmsr(evntsel_msr, evntsel, 0); wrmsr(cccr_msr, cccr_val, 0); write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - cccr_val |= P4_CCCR_ENABLE; - wrmsr(cccr_msr, cccr_val, 0); + wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = cccr_msr; + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + cccr_val |= P4_CCCR_ENABLE; + wrmsr(cccr_msr, cccr_val, 0); return 1; } @@ -661,13 +678,17 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) wrmsr(evntsel_msr, evntsel, 0); nmi_hz = adjust_for_32bit_ctr(nmi_hz); write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); return 1; } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index abb78a2cc4a..2c97f07f1c2 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -299,6 +299,15 @@ void acpi_nmi_disable(void) on_each_cpu(__acpi_nmi_disable, NULL, 1); } +/* + * This function is called as soon the LAPIC NMI watchdog driver has everything + * in place and it's ready to check if the NMIs belong to the NMI watchdog + */ +void cpu_nmi_set_wd_enabled(void) +{ + __get_cpu_var(wd_enabled) = 1; +} + void setup_apic_nmi_watchdog(void *unused) { if (__get_cpu_var(wd_enabled)) @@ -311,8 +320,6 @@ void setup_apic_nmi_watchdog(void *unused) switch (nmi_watchdog) { case NMI_LOCAL_APIC: - /* enable it before to avoid race with handler */ - __get_cpu_var(wd_enabled) = 1; if (lapic_watchdog_init(nmi_hz) < 0) { __get_cpu_var(wd_enabled) = 0; return; -- cgit v1.2.3 From afa9fdc2f5f8e4d98f3e77bfa204412cbc181346 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 20 Sep 2008 01:23:30 +0900 Subject: iommu: remove fullflush and nofullflush in IOMMU generic option This patch against tip/x86/iommu virtually reverts 2842e5bf3115193f05dc9dac20f940e7abf44c1a. But just reverting the commit breaks AMD IOMMU so this patch also includes some fixes. The above commit adds new two options to x86 IOMMU generic kernel boot options, fullflush and nofullflush. But such change that affects all the IOMMUs needs more discussion (all IOMMU parties need the chance to discuss it): http://lkml.org/lkml/2008/9/19/106 Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 4 ++-- arch/x86/kernel/amd_iommu_init.c | 5 ++++- arch/x86/kernel/pci-dma.c | 13 ------------- arch/x86/kernel/pci-gart_64.c | 13 +++++++++++++ 4 files changed, 19 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 70537d117a9..c19212191c9 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -948,7 +948,7 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; - if (unlikely(dma_dom->need_flush && !iommu_fullflush)) { + if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { iommu_flush_tlb(iommu, dma_dom->domain.id); dma_dom->need_flush = false; } else if (unlikely(iommu_has_npcache(iommu))) @@ -985,7 +985,7 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); - if (iommu_fullflush) + if (amd_iommu_unmap_flush) iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); } diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index db0c83af44d..148fcfe22f1 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -122,6 +122,7 @@ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ int amd_iommu_isolate; /* if 1, device isolation is enabled */ +bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ @@ -1144,7 +1145,7 @@ int __init amd_iommu_init(void) else printk("disabled\n"); - if (iommu_fullflush) + if (amd_iommu_unmap_flush) printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); else printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); @@ -1214,6 +1215,8 @@ static int __init parse_amd_iommu_options(char *str) for (; *str; ++str) { if (strncmp(str, "isolate", 7) == 0) amd_iommu_isolate = 1; + if (strncmp(str, "fullflush", 11) == 0) + amd_iommu_unmap_flush = true; } return 1; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index d2f2c0158dc..0a1408abcc6 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -16,15 +16,6 @@ EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; -/* - * If this is disabled the IOMMU will use an optimized flushing strategy - * of only flushing when an mapping is reused. With it true the GART is - * flushed for every mapping. Problem is that doing the lazy flush seems - * to trigger bugs with some popular PCI cards, in particular 3ware (but - * has been also also seen with Qlogic at least). - */ -int iommu_fullflush; - #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; @@ -180,10 +171,6 @@ static __init int iommu_setup(char *p) } if (!strncmp(p, "nomerge", 7)) iommu_merge = 0; - if (!strncmp(p, "fullflush", 8)) - iommu_fullflush = 1; - if (!strncmp(p, "nofullflush", 11)) - iommu_fullflush = 0; if (!strncmp(p, "forcesac", 8)) iommu_sac_force = 1; if (!strncmp(p, "allowdac", 8)) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 3dcb1ad86e3..9e390f1bd46 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -45,6 +45,15 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ +/* + * If this is disabled the IOMMU will use an optimized flushing strategy + * of only flushing when an mapping is reused. With it true the GART is + * flushed for every mapping. Problem is that doing the lazy flush seems + * to trigger bugs with some popular PCI cards, in particular 3ware (but + * has been also also seen with Qlogic at least). + */ +int iommu_fullflush = 1; + /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); /* Guarded by iommu_bitmap_lock: */ @@ -892,6 +901,10 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; + if (!strncmp(p, "fullflush", 8)) + iommu_fullflush = 1; + if (!strncmp(p, "nofullflush", 11)) + iommu_fullflush = 0; if (!strncmp(p, "noagp", 5)) no_agp = 1; if (!strncmp(p, "noaperture", 10)) -- cgit v1.2.3 From a8b71a2810386a5ac8f43d2095fe3355f0d8db37 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 23 Sep 2008 00:35:33 -0700 Subject: x86: fix macro with bad_bios_dmi_table DMI tables need a blank NULL tail. fixes the crash on Ingo's test box. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d29951c11be..3ce3edfcc3c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -758,8 +758,8 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), }, }, - {} #endif + {} }; /* -- cgit v1.2.3 From 05e12e1c4c09cd35ac9f4e6af1e42b0036375d72 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Mon, 22 Sep 2008 22:58:47 -0700 Subject: x86: fix 27-rc crash on vsmp due to paravirt during module load 27-rc fails to boot up if configured to use modules. Turns out vsmp_patch was marked __init, and vsmp_patch being the pvops 'patch' routine for vsmp, a call to vsmp_patch just turns out to execute a code page with series of 0xcc (POISON_FREE_INITMEM -- int3). vsmp_patch has been marked with __init ever since pvops, however, apply_paravirt can be called during module load causing calls to freed memory location. Since apply_paravirt can only be called during init/module load, make vsmp_patch with "__init_or_module" Signed-off-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsmp_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 0c029e8959c..7766d36983f 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -61,7 +61,7 @@ static void vsmp_irq_enable(void) native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); } -static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, +static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { switch (type) { -- cgit v1.2.3 From 4faac97d44ac27bdbb010a9c3597401a8f89341f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Sep 2008 18:54:29 +0200 Subject: x86: prevent stale state of c1e_mask across CPU offline/online Impact: hang which happens across CPU offline/online on AMD C1E systems. When a CPU goes offline then the corresponding bit in the broadcast mask is cleared. For AMD C1E enabled CPUs we do not reenable the broadcast when the CPU comes online again as we do not clear the corresponding bit in the c1e_mask, which keeps track which CPUs have been switched to broadcast already. So on those !$@#& machines we never switch back to broadcasting after a CPU offline/online cycle. Clear the bit when the CPU plays dead. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 11 ++++++++--- arch/x86/kernel/process_32.c | 1 + arch/x86/kernel/process_64.c | 2 ++ 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc4d5b0a6a..2e2247117f6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -246,6 +246,14 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) return 1; } +static cpumask_t c1e_mask = CPU_MASK_NONE; +static int c1e_detected; + +void c1e_remove_cpu(int cpu) +{ + cpu_clear(cpu, c1e_mask); +} + /* * C1E aware idle routine. We check for C1E active in the interrupt * pending message MSR. If we detect C1E, then we handle it the same @@ -253,9 +261,6 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) */ static void c1e_idle(void) { - static cpumask_t c1e_mask = CPU_MASK_NONE; - static int c1e_detected; - if (need_resched()) return; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3b7a1ddcc0b..4b3cfdf5421 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -88,6 +88,7 @@ static void cpu_exit_clear(void) cpu_clear(cpu, cpu_callin_map); numa_remove_cpu(cpu); + c1e_remove_cpu(cpu); } /* We don't actually take CPU down, just spin without interrupts. */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 71553b664e2..e12e0e4dd25 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -93,6 +93,8 @@ DECLARE_PER_CPU(int, cpu_state); static inline void play_dead(void) { idle_task_exit(); + c1e_remove_cpu(raw_smp_processor_id()); + mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; -- cgit v1.2.3 From a8d6829044901a67732904be5f1eacdf8539604f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Sep 2008 19:02:25 +0200 Subject: x86: prevent C-states hang on AMD C1E enabled machines Impact: System hang when AMD C1E machines switch into C2/C3 AMD C1E enabled systems do not work with normal ACPI C-states even if the BIOS is advertising them. Limit the C-states to C1 for the ACPI processor idle code. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2e2247117f6..d8c2a299bfe 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -272,6 +272,7 @@ static void c1e_idle(void) c1e_detected = 1; mark_tsc_unstable("TSC halt in C1E"); printk(KERN_INFO "System has C1E enabled\n"); + set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); } } -- cgit v1.2.3 From 09bfeea13cea843fb03eaa96b5d891fa0abdcc90 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 18 Sep 2008 21:12:10 +0200 Subject: x86: c1e_idle: don't mark TSC unstable if CPU has invariant TSC Impact: Functional TSC is marked unstable on AMD family 0x10 and 0x11 CPUs. This would be wrong because for those CPUs "invariant TSC" means: "The TSC counts at the same rate in all P-states, all C states, S0, or S1" (See "Processor BIOS and Kernel Developer's Guides" for those CPUs.) [ tglx: Changed C1E to AMD C1E in the printks to avoid confusion with Intel C1E ] Signed-off-by: Andreas Herrmann Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d8c2a299bfe..876e9189077 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -270,8 +270,9 @@ static void c1e_idle(void) rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); if (lo & K8_INTP_C1E_ACTIVE_MASK) { c1e_detected = 1; - mark_tsc_unstable("TSC halt in C1E"); - printk(KERN_INFO "System has C1E enabled\n"); + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + mark_tsc_unstable("TSC halt in AMD C1E"); + printk(KERN_INFO "System has AMD C1E enabled\n"); set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); } } -- cgit v1.2.3 From 18dbc9160507dc7df998e00cd1dcd7889557307b Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Tue, 23 Sep 2008 12:08:44 +0200 Subject: x86: moved microcode.c to microcode_intel.c Combine both generic and arch-specific parts of microcode into a single module (arch-specific parts are config-dependent). Also while we are at it, move arch-specific parts from microcode.h into their respective arch-specific .c files. Signed-off-by: Dmitry Adamushko Cc: "Peter Oruba" Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 8 +- arch/x86/kernel/microcode.c | 505 ------------------------------------- arch/x86/kernel/microcode_amd.c | 72 +++--- arch/x86/kernel/microcode_core.c | 509 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/microcode_intel.c | 79 +++--- 5 files changed, 606 insertions(+), 567 deletions(-) delete mode 100644 arch/x86/kernel/microcode.c create mode 100644 arch/x86/kernel/microcode_core.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index be454f348c3..f891996f684 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -51,9 +51,6 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_MICROCODE) += microcode.o -obj-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o -obj-$(CONFIG_MICROCODE_AMD) += microcode_amd.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o @@ -101,6 +98,11 @@ scx200-y += scx200_32.o obj-$(CONFIG_OLPC) += olpc.o +microcode-y := microcode_core.o +microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o +microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o +obj-$(CONFIG_MICROCODE) += microcode.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c deleted file mode 100644 index 0c2634f4fd7..00000000000 --- a/arch/x86/kernel/microcode.c +++ /dev/null @@ -1,505 +0,0 @@ -/* - * Intel CPU Microcode Update Driver for Linux - * - * Copyright (C) 2000-2006 Tigran Aivazian - * 2006 Shaohua Li - * - * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture - * Software Developer's Manual - * Order Number 253668 or free download from: - * - * http://developer.intel.com/design/pentium4/manuals/253668.htm - * - * For more information, go to http://www.urbanmyth.org/microcode - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 1.0 16 Feb 2000, Tigran Aivazian - * Initial release. - * 1.01 18 Feb 2000, Tigran Aivazian - * Added read() support + cleanups. - * 1.02 21 Feb 2000, Tigran Aivazian - * Added 'device trimming' support. open(O_WRONLY) zeroes - * and frees the saved copy of applied microcode. - * 1.03 29 Feb 2000, Tigran Aivazian - * Made to use devfs (/dev/cpu/microcode) + cleanups. - * 1.04 06 Jun 2000, Simon Trimmer - * Added misc device support (now uses both devfs and misc). - * Added MICROCODE_IOCFREE ioctl to clear memory. - * 1.05 09 Jun 2000, Simon Trimmer - * Messages for error cases (non Intel & no suitable microcode). - * 1.06 03 Aug 2000, Tigran Aivazian - * Removed ->release(). Removed exclusive open and status bitmap. - * Added microcode_rwsem to serialize read()/write()/ioctl(). - * Removed global kernel lock usage. - * 1.07 07 Sep 2000, Tigran Aivazian - * Write 0 to 0x8B msr and then cpuid before reading revision, - * so that it works even if there were no update done by the - * BIOS. Otherwise, reading from 0x8B gives junk (which happened - * to be 0 on my machine which is why it worked even when I - * disabled update by the BIOS) - * Thanks to Eric W. Biederman for the fix. - * 1.08 11 Dec 2000, Richard Schaal and - * Tigran Aivazian - * Intel Pentium 4 processor support and bugfixes. - * 1.09 30 Oct 2001, Tigran Aivazian - * Bugfix for HT (Hyper-Threading) enabled processors - * whereby processor resources are shared by all logical processors - * in a single CPU package. - * 1.10 28 Feb 2002 Asit K Mallick and - * Tigran Aivazian , - * Serialize updates as required on HT processors due to - * speculative nature of implementation. - * 1.11 22 Mar 2002 Tigran Aivazian - * Fix the panic when writing zero-length microcode chunk. - * 1.12 29 Sep 2003 Nitin Kamble , - * Jun Nakajima - * Support for the microcode updates in the new format. - * 1.13 10 Oct 2003 Tigran Aivazian - * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl - * because we no longer hold a copy of applied microcode - * in kernel memory. - * 1.14 25 Jun 2004 Tigran Aivazian - * Fix sigmatch() macro to handle old CPUs with pf == 0. - * Thanks to Stuart Swales for pointing out this bug. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -MODULE_DESCRIPTION("Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian "); -MODULE_LICENSE("GPL"); - -#define MICROCODE_VERSION "2.00" - -struct microcode_ops *microcode_ops; - -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DEFINE_MUTEX(microcode_mutex); - -struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -EXPORT_SYMBOL_GPL(ucode_cpu_info); - -#ifdef CONFIG_MICROCODE_OLD_INTERFACE -static int do_microcode_update(const void __user *buf, size_t size) -{ - cpumask_t old; - int error = 0; - int cpu; - - old = current->cpus_allowed; - - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->valid) - continue; - - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - error = microcode_ops->request_microcode_user(cpu, buf, size); - if (error < 0) - goto out; - if (!error) - microcode_ops->apply_microcode(cpu); - } -out: - set_cpus_allowed_ptr(current, &old); - return error; -} - -static int microcode_open(struct inode *unused1, struct file *unused2) -{ - cycle_kernel_lock(); - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} - -static ssize_t microcode_write(struct file *file, const char __user *buf, - size_t len, loff_t *ppos) -{ - ssize_t ret; - - if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", - num_physpages); - return -EINVAL; - } - - get_online_cpus(); - mutex_lock(µcode_mutex); - - ret = do_microcode_update(buf, len); - if (!ret) - ret = (ssize_t)len; - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - return ret; -} - -static const struct file_operations microcode_fops = { - .owner = THIS_MODULE, - .write = microcode_write, - .open = microcode_open, -}; - -static struct miscdevice microcode_dev = { - .minor = MICROCODE_MINOR, - .name = "microcode", - .fops = µcode_fops, -}; - -static int __init microcode_dev_init(void) -{ - int error; - - error = misc_register(µcode_dev); - if (error) { - printk(KERN_ERR - "microcode: can't misc_register on minor=%d\n", - MICROCODE_MINOR); - return error; - } - - return 0; -} - -static void microcode_dev_exit(void) -{ - misc_deregister(µcode_dev); -} - -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); -#else -#define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while (0) -#endif - -/* fake device for request_firmware */ -struct platform_device *microcode_pdev; - -static ssize_t reload_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, size_t sz) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - char *end; - unsigned long val = simple_strtoul(buf, &end, 0); - int err = 0; - int cpu = dev->id; - - if (end == buf) - return -EINVAL; - if (val == 1) { - cpumask_t old = current->cpus_allowed; - - get_online_cpus(); - if (cpu_online(cpu)) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - mutex_lock(µcode_mutex); - if (uci->valid) { - err = microcode_ops->request_microcode_fw(cpu, - µcode_pdev->dev); - if (!err) - microcode_ops->apply_microcode(cpu); - } - mutex_unlock(µcode_mutex); - set_cpus_allowed_ptr(current, &old); - } - put_online_cpus(); - } - if (err) - return err; - return sz; -} - -static ssize_t version_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); -} - -static ssize_t pf_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); -} - -static SYSDEV_ATTR(reload, 0200, NULL, reload_store); -static SYSDEV_ATTR(version, 0400, version_show, NULL); -static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); - -static struct attribute *mc_default_attrs[] = { - &attr_reload.attr, - &attr_version.attr, - &attr_processor_flags.attr, - NULL -}; - -static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", -}; - -static void microcode_fini_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - mutex_lock(µcode_mutex); - microcode_ops->microcode_fini_cpu(cpu); - uci->valid = 0; - mutex_unlock(µcode_mutex); -} - -static void collect_cpu_info(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - memset(uci, 0, sizeof(*uci)); - if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig)) - uci->valid = 1; -} - -static int microcode_resume_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct cpu_signature nsig; - - pr_debug("microcode: CPU%d resumed\n", cpu); - - if (!uci->mc.valid_mc) - return 1; - - /* - * Let's verify that the 'cached' ucode does belong - * to this cpu (a bit of paranoia): - */ - if (microcode_ops->collect_cpu_info(cpu, &nsig)) { - microcode_fini_cpu(cpu); - return -1; - } - - if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { - microcode_fini_cpu(cpu); - /* Should we look for a new ucode here? */ - return 1; - } - - return 0; -} - -void microcode_update_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - int err = 0; - - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu); - - mutex_lock(µcode_mutex); - /* - * Check if the system resume is in progress (uci->valid != NULL), - * otherwise just request a firmware: - */ - if (uci->valid) { - err = microcode_resume_cpu(cpu); - } else { - collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING) - err = microcode_ops->request_microcode_fw(cpu, - µcode_pdev->dev); - } - - if (!err) - microcode_ops->apply_microcode(cpu); - - mutex_unlock(µcode_mutex); -} - -static void microcode_init_cpu(int cpu) -{ - cpumask_t old = current->cpus_allowed; - - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - microcode_update_cpu(cpu); - set_cpus_allowed_ptr(current, &old); -} - -static int mc_sysdev_add(struct sys_device *sys_dev) -{ - int err, cpu = sys_dev->id; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d added\n", cpu); - memset(uci, 0, sizeof(*uci)); - - err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); - if (err) - return err; - - microcode_init_cpu(cpu); - return 0; -} - -static int mc_sysdev_remove(struct sys_device *sys_dev) -{ - int cpu = sys_dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d removed\n", cpu); - microcode_fini_cpu(cpu); - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - return 0; -} - -static int mc_sysdev_resume(struct sys_device *dev) -{ - int cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - - /* only CPU 0 will apply ucode here */ - microcode_update_cpu(0); - return 0; -} - -static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, - .resume = mc_sysdev_resume, -}; - -static __cpuinit int -mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - microcode_init_cpu(cpu); - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - pr_debug("microcode: CPU%d added\n", cpu); - if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - printk(KERN_ERR "microcode: Failed to create the sysfs " - "group for CPU%d\n", cpu); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - pr_debug("microcode: CPU%d removed\n", cpu); - break; - case CPU_DEAD: - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __refdata mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, -}; - -int microcode_init(void *opaque, struct module *module) -{ - struct microcode_ops *ops = (struct microcode_ops *)opaque; - int error; - - if (microcode_ops) { - printk(KERN_ERR "microcode: already loaded the other module\n"); - return -EEXIST; - } - - microcode_ops = ops; - - error = microcode_dev_init(); - if (error) - return error; - microcode_pdev = platform_device_register_simple("microcode", -1, - NULL, 0); - if (IS_ERR(microcode_pdev)) { - microcode_dev_exit(); - return PTR_ERR(microcode_pdev); - } - - get_online_cpus(); - error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - if (error) { - microcode_dev_exit(); - platform_device_unregister(microcode_pdev); - return error; - } - - register_hotcpu_notifier(&mc_cpu_notifier); - - printk(KERN_INFO - "Microcode Update Driver: v" MICROCODE_VERSION - " " - " \n"); - - return 0; -} -EXPORT_SYMBOL_GPL(microcode_init); - -void __exit microcode_exit(void) -{ - microcode_dev_exit(); - - unregister_hotcpu_notifier(&mc_cpu_notifier); - - get_online_cpus(); - sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - - platform_device_unregister(microcode_pdev); - - microcode_ops = NULL; - - printk(KERN_INFO - "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); -} -EXPORT_SYMBOL_GPL(microcode_exit); diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 48aec9f48e4..03ea4e52e87 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -46,6 +46,35 @@ MODULE_LICENSE("GPL v2"); #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 #define UCODE_UCODE_TYPE 0x00000001 +struct equiv_cpu_entry { + unsigned int installed_cpu; + unsigned int fixed_errata_mask; + unsigned int fixed_errata_compare; + unsigned int equiv_cpu; +}; + +struct microcode_header_amd { + unsigned int data_code; + unsigned int patch_id; + unsigned char mc_patch_data_id[2]; + unsigned char mc_patch_data_len; + unsigned char init_flag; + unsigned int mc_patch_data_checksum; + unsigned int nb_dev_id; + unsigned int sb_dev_id; + unsigned char processor_rev_id[2]; + unsigned char nb_rev_id; + unsigned char sb_rev_id; + unsigned char bios_api_rev; + unsigned char reserved1[3]; + unsigned int match_reg[8]; +}; + +struct microcode_amd { + struct microcode_header_amd hdr; + unsigned int mpb[0]; +}; + #define UCODE_MAX_SIZE (2048) #define DEFAULT_UCODE_DATASIZE (896) #define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) @@ -189,17 +218,18 @@ static void apply_microcode_amd(int cpu) unsigned int rev; int cpu_num = raw_smp_processor_id(); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + struct microcode_amd *mc_amd = uci->mc; unsigned long addr; /* We should bind the task to the CPU */ BUG_ON(cpu_num != cpu); - if (uci->mc.mc_amd == NULL) + if (mc_amd == NULL) return; spin_lock_irqsave(µcode_update_lock, flags); - addr = (unsigned long)&uci->mc.mc_amd->hdr.data_code; + addr = (unsigned long)&mc_amd->hdr.data_code; edx = (unsigned int)(((unsigned long)upper_32_bits(addr))); eax = (unsigned int)(((unsigned long)lower_32_bits(addr))); @@ -214,16 +244,16 @@ static void apply_microcode_amd(int cpu) spin_unlock_irqrestore(µcode_update_lock, flags); /* check current patch id and patch's id for match */ - if (rev != uci->mc.mc_amd->hdr.patch_id) { + if (rev != mc_amd->hdr.patch_id) { printk(KERN_ERR "microcode: CPU%d update from revision " "0x%x to 0x%x failed\n", cpu_num, - uci->mc.mc_amd->hdr.patch_id, rev); + mc_amd->hdr.patch_id, rev); return; } printk(KERN_INFO "microcode: CPU%d updated from revision " "0x%x to 0x%x \n", - cpu_num, uci->cpu_sig.rev, uci->mc.mc_amd->hdr.patch_id); + cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id); uci->cpu_sig.rev = rev; } @@ -355,12 +385,12 @@ static int generic_load_microcode(int cpu, void *data, size_t size, if (new_mc) { if (!leftover) { - if (uci->mc.mc_amd) - vfree(uci->mc.mc_amd); - uci->mc.mc_amd = (struct microcode_amd *)new_mc; + if (uci->mc) + vfree(uci->mc); + uci->mc = new_mc; pr_debug("microcode: CPU%d found a matching microcode update with" " version 0x%x (current=0x%x)\n", - cpu, uci->mc.mc_amd->hdr.patch_id, uci->cpu_sig.rev); + cpu, new_rev, uci->cpu_sig.rev); } else vfree(new_mc); } @@ -416,8 +446,8 @@ static void microcode_fini_cpu_amd(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - vfree(uci->mc.mc_amd); - uci->mc.mc_amd = NULL; + vfree(uci->mc); + uci->mc = NULL; } static struct microcode_ops microcode_amd_ops = { @@ -428,23 +458,7 @@ static struct microcode_ops microcode_amd_ops = { .microcode_fini_cpu = microcode_fini_cpu_amd, }; -static int __init microcode_amd_module_init(void) +struct microcode_ops * __init init_amd_microcode(void) { - struct cpuinfo_x86 *c = &cpu_data(0); - - equiv_cpu_table = NULL; - if (c->x86_vendor != X86_VENDOR_AMD) { - printk(KERN_ERR "microcode: CPU platform is not AMD-capable\n"); - return -ENODEV; - } - - return microcode_init(µcode_amd_ops, THIS_MODULE); -} - -static void __exit microcode_amd_module_exit(void) -{ - microcode_exit(); + return µcode_amd_ops; } - -module_init(microcode_amd_module_init) -module_exit(microcode_amd_module_exit) diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c new file mode 100644 index 00000000000..ff031dbccdf --- /dev/null +++ b/arch/x86/kernel/microcode_core.c @@ -0,0 +1,509 @@ +/* + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2000-2006 Tigran Aivazian + * 2006 Shaohua Li + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/design/pentium4/manuals/253668.htm + * + * For more information, go to http://www.urbanmyth.org/microcode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 1.0 16 Feb 2000, Tigran Aivazian + * Initial release. + * 1.01 18 Feb 2000, Tigran Aivazian + * Added read() support + cleanups. + * 1.02 21 Feb 2000, Tigran Aivazian + * Added 'device trimming' support. open(O_WRONLY) zeroes + * and frees the saved copy of applied microcode. + * 1.03 29 Feb 2000, Tigran Aivazian + * Made to use devfs (/dev/cpu/microcode) + cleanups. + * 1.04 06 Jun 2000, Simon Trimmer + * Added misc device support (now uses both devfs and misc). + * Added MICROCODE_IOCFREE ioctl to clear memory. + * 1.05 09 Jun 2000, Simon Trimmer + * Messages for error cases (non Intel & no suitable microcode). + * 1.06 03 Aug 2000, Tigran Aivazian + * Removed ->release(). Removed exclusive open and status bitmap. + * Added microcode_rwsem to serialize read()/write()/ioctl(). + * Removed global kernel lock usage. + * 1.07 07 Sep 2000, Tigran Aivazian + * Write 0 to 0x8B msr and then cpuid before reading revision, + * so that it works even if there were no update done by the + * BIOS. Otherwise, reading from 0x8B gives junk (which happened + * to be 0 on my machine which is why it worked even when I + * disabled update by the BIOS) + * Thanks to Eric W. Biederman for the fix. + * 1.08 11 Dec 2000, Richard Schaal and + * Tigran Aivazian + * Intel Pentium 4 processor support and bugfixes. + * 1.09 30 Oct 2001, Tigran Aivazian + * Bugfix for HT (Hyper-Threading) enabled processors + * whereby processor resources are shared by all logical processors + * in a single CPU package. + * 1.10 28 Feb 2002 Asit K Mallick and + * Tigran Aivazian , + * Serialize updates as required on HT processors due to + * speculative nature of implementation. + * 1.11 22 Mar 2002 Tigran Aivazian + * Fix the panic when writing zero-length microcode chunk. + * 1.12 29 Sep 2003 Nitin Kamble , + * Jun Nakajima + * Support for the microcode updates in the new format. + * 1.13 10 Oct 2003 Tigran Aivazian + * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl + * because we no longer hold a copy of applied microcode + * in kernel memory. + * 1.14 25 Jun 2004 Tigran Aivazian + * Fix sigmatch() macro to handle old CPUs with pf == 0. + * Thanks to Stuart Swales for pointing out this bug. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Microcode Update Driver"); +MODULE_AUTHOR("Tigran Aivazian "); +MODULE_LICENSE("GPL"); + +#define MICROCODE_VERSION "2.00" + +struct microcode_ops *microcode_ops; + +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +static DEFINE_MUTEX(microcode_mutex); + +struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +EXPORT_SYMBOL_GPL(ucode_cpu_info); + +#ifdef CONFIG_MICROCODE_OLD_INTERFACE +static int do_microcode_update(const void __user *buf, size_t size) +{ + cpumask_t old; + int error = 0; + int cpu; + + old = current->cpus_allowed; + + for_each_online_cpu(cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!uci->valid) + continue; + + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + error = microcode_ops->request_microcode_user(cpu, buf, size); + if (error < 0) + goto out; + if (!error) + microcode_ops->apply_microcode(cpu); + } +out: + set_cpus_allowed_ptr(current, &old); + return error; +} + +static int microcode_open(struct inode *unused1, struct file *unused2) +{ + cycle_kernel_lock(); + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +static ssize_t microcode_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + ssize_t ret; + + if ((len >> PAGE_SHIFT) > num_physpages) { + printk(KERN_ERR "microcode: too much data (max %ld pages)\n", + num_physpages); + return -EINVAL; + } + + get_online_cpus(); + mutex_lock(µcode_mutex); + + ret = do_microcode_update(buf, len); + if (!ret) + ret = (ssize_t)len; + + mutex_unlock(µcode_mutex); + put_online_cpus(); + + return ret; +} + +static const struct file_operations microcode_fops = { + .owner = THIS_MODULE, + .write = microcode_write, + .open = microcode_open, +}; + +static struct miscdevice microcode_dev = { + .minor = MICROCODE_MINOR, + .name = "microcode", + .fops = µcode_fops, +}; + +static int __init microcode_dev_init(void) +{ + int error; + + error = misc_register(µcode_dev); + if (error) { + printk(KERN_ERR + "microcode: can't misc_register on minor=%d\n", + MICROCODE_MINOR); + return error; + } + + return 0; +} + +static void microcode_dev_exit(void) +{ + misc_deregister(µcode_dev); +} + +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +#else +#define microcode_dev_init() 0 +#define microcode_dev_exit() do { } while (0) +#endif + +/* fake device for request_firmware */ +struct platform_device *microcode_pdev; + +static ssize_t reload_store(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t sz) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + char *end; + unsigned long val = simple_strtoul(buf, &end, 0); + int err = 0; + int cpu = dev->id; + + if (end == buf) + return -EINVAL; + if (val == 1) { + cpumask_t old = current->cpus_allowed; + + get_online_cpus(); + if (cpu_online(cpu)) { + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + mutex_lock(µcode_mutex); + if (uci->valid) { + err = microcode_ops->request_microcode_fw(cpu, + µcode_pdev->dev); + if (!err) + microcode_ops->apply_microcode(cpu); + } + mutex_unlock(µcode_mutex); + set_cpus_allowed_ptr(current, &old); + } + put_online_cpus(); + } + if (err) + return err; + return sz; +} + +static ssize_t version_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); +} + +static ssize_t pf_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); +} + +static SYSDEV_ATTR(reload, 0200, NULL, reload_store); +static SYSDEV_ATTR(version, 0400, version_show, NULL); +static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); + +static struct attribute *mc_default_attrs[] = { + &attr_reload.attr, + &attr_version.attr, + &attr_processor_flags.attr, + NULL +}; + +static struct attribute_group mc_attr_group = { + .attrs = mc_default_attrs, + .name = "microcode", +}; + +static void microcode_fini_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + mutex_lock(µcode_mutex); + microcode_ops->microcode_fini_cpu(cpu); + uci->valid = 0; + mutex_unlock(µcode_mutex); +} + +static void collect_cpu_info(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + memset(uci, 0, sizeof(*uci)); + if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig)) + uci->valid = 1; +} + +static int microcode_resume_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct cpu_signature nsig; + + pr_debug("microcode: CPU%d resumed\n", cpu); + + if (!uci->mc) + return 1; + + /* + * Let's verify that the 'cached' ucode does belong + * to this cpu (a bit of paranoia): + */ + if (microcode_ops->collect_cpu_info(cpu, &nsig)) { + microcode_fini_cpu(cpu); + return -1; + } + + if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) { + microcode_fini_cpu(cpu); + /* Should we look for a new ucode here? */ + return 1; + } + + return 0; +} + +void microcode_update_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + int err = 0; + + /* We should bind the task to the CPU */ + BUG_ON(raw_smp_processor_id() != cpu); + + mutex_lock(µcode_mutex); + /* + * Check if the system resume is in progress (uci->valid != NULL), + * otherwise just request a firmware: + */ + if (uci->valid) { + err = microcode_resume_cpu(cpu); + } else { + collect_cpu_info(cpu); + if (uci->valid && system_state == SYSTEM_RUNNING) + err = microcode_ops->request_microcode_fw(cpu, + µcode_pdev->dev); + } + + if (!err) + microcode_ops->apply_microcode(cpu); + + mutex_unlock(µcode_mutex); +} + +static void microcode_init_cpu(int cpu) +{ + cpumask_t old = current->cpus_allowed; + + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + microcode_update_cpu(cpu); + set_cpus_allowed_ptr(current, &old); +} + +static int mc_sysdev_add(struct sys_device *sys_dev) +{ + int err, cpu = sys_dev->id; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d added\n", cpu); + memset(uci, 0, sizeof(*uci)); + + err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); + if (err) + return err; + + microcode_init_cpu(cpu); + return 0; +} + +static int mc_sysdev_remove(struct sys_device *sys_dev) +{ + int cpu = sys_dev->id; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d removed\n", cpu); + microcode_fini_cpu(cpu); + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + return 0; +} + +static int mc_sysdev_resume(struct sys_device *dev) +{ + int cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + + /* only CPU 0 will apply ucode here */ + microcode_update_cpu(0); + return 0; +} + +static struct sysdev_driver mc_sysdev_driver = { + .add = mc_sysdev_add, + .remove = mc_sysdev_remove, + .resume = mc_sysdev_resume, +}; + +static __cpuinit int +mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + microcode_init_cpu(cpu); + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + pr_debug("microcode: CPU%d added\n", cpu); + if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) + printk(KERN_ERR "microcode: Failed to create the sysfs " + "group for CPU%d\n", cpu); + break; + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + /* Suspend is in progress, only remove the interface */ + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + pr_debug("microcode: CPU%d removed\n", cpu); + break; + case CPU_DEAD: + case CPU_UP_CANCELED_FROZEN: + /* The CPU refused to come up during a system resume */ + microcode_fini_cpu(cpu); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __refdata mc_cpu_notifier = { + .notifier_call = mc_cpu_callback, +}; + +static int __init microcode_init(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + int error; + + if (c->x86_vendor == X86_VENDOR_INTEL) + microcode_ops = init_intel_microcode(); + else if (c->x86_vendor != X86_VENDOR_AMD) + microcode_ops = init_amd_microcode(); + + if (!microcode_ops) { + printk(KERN_ERR "microcode: no support for this CPU vendor\n"); + return -ENODEV; + } + + error = microcode_dev_init(); + if (error) + return error; + microcode_pdev = platform_device_register_simple("microcode", -1, + NULL, 0); + if (IS_ERR(microcode_pdev)) { + microcode_dev_exit(); + return PTR_ERR(microcode_pdev); + } + + get_online_cpus(); + error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + if (error) { + microcode_dev_exit(); + platform_device_unregister(microcode_pdev); + return error; + } + + register_hotcpu_notifier(&mc_cpu_notifier); + + printk(KERN_INFO + "Microcode Update Driver: v" MICROCODE_VERSION + " " + " \n"); + + return 0; +} + +static void __exit microcode_exit(void) +{ + microcode_dev_exit(); + + unregister_hotcpu_notifier(&mc_cpu_notifier); + + get_online_cpus(); + sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + + platform_device_unregister(microcode_pdev); + + microcode_ops = NULL; + + printk(KERN_INFO + "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); +} + +module_init(microcode_init); +module_exit(microcode_exit); diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 48ed3cef58c..622dc4a2178 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -97,6 +97,38 @@ MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); +struct microcode_header_intel { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int datasize; + unsigned int totalsize; + unsigned int reserved[3]; +}; + +struct microcode_intel { + struct microcode_header_intel hdr; + unsigned int bits[0]; +}; + +/* microcode format is extended from prescott processors */ +struct extended_signature { + unsigned int sig; + unsigned int pf; + unsigned int cksum; +}; + +struct extended_sigtable { + unsigned int count; + unsigned int cksum; + unsigned int reserved[3]; + struct extended_signature sigs[0]; +}; + #define DEFAULT_UCODE_DATASIZE (2000) #define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) @@ -284,11 +316,12 @@ static void apply_microcode(int cpu) unsigned int val[2]; int cpu_num = raw_smp_processor_id(); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct microcode_intel *mc_intel = uci->mc; /* We should bind the task to the CPU */ BUG_ON(cpu_num != cpu); - if (uci->mc.mc_intel == NULL) + if (mc_intel == NULL) return; /* serialize access to the physical write to MSR 0x79 */ @@ -296,8 +329,8 @@ static void apply_microcode(int cpu) /* write microcode via MSR 0x79 */ wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) uci->mc.mc_intel->bits, - (unsigned long) uci->mc.mc_intel->bits >> 16 >> 16); + (unsigned long) mc_intel->bits, + (unsigned long) mc_intel->bits >> 16 >> 16); wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ @@ -307,7 +340,7 @@ static void apply_microcode(int cpu) rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); spin_unlock_irqrestore(µcode_update_lock, flags); - if (val[1] != uci->mc.mc_intel->hdr.rev) { + if (val[1] != mc_intel->hdr.rev) { printk(KERN_ERR "microcode: CPU%d update from revision " "0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]); return; @@ -315,9 +348,9 @@ static void apply_microcode(int cpu) printk(KERN_INFO "microcode: CPU%d updated from revision " "0x%x to 0x%x, date = %04x-%02x-%02x \n", cpu_num, uci->cpu_sig.rev, val[1], - uci->mc.mc_intel->hdr.date & 0xffff, - uci->mc.mc_intel->hdr.date >> 24, - (uci->mc.mc_intel->hdr.date >> 16) & 0xff); + mc_intel->hdr.date & 0xffff, + mc_intel->hdr.date >> 24, + (mc_intel->hdr.date >> 16) & 0xff); uci->cpu_sig.rev = val[1]; } @@ -367,12 +400,12 @@ static int generic_load_microcode(int cpu, void *data, size_t size, if (new_mc) { if (!leftover) { - if (uci->mc.mc_intel) - vfree(uci->mc.mc_intel); - uci->mc.mc_intel = (struct microcode_intel *)new_mc; + if (uci->mc) + vfree(uci->mc); + uci->mc = (struct microcode_intel *)new_mc; pr_debug("microcode: CPU%d found a matching microcode update with" " version 0x%x (current=0x%x)\n", - cpu, uci->mc.mc_intel->hdr.rev, uci->cpu_sig.rev); + cpu, new_rev, uci->cpu_sig.rev); } else vfree(new_mc); } @@ -428,11 +461,11 @@ static void microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - vfree(uci->mc.mc_intel); - uci->mc.mc_intel = NULL; + vfree(uci->mc); + uci->mc = NULL; } -static struct microcode_ops microcode_intel_ops = { +struct microcode_ops microcode_intel_ops = { .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info, @@ -440,22 +473,8 @@ static struct microcode_ops microcode_intel_ops = { .microcode_fini_cpu = microcode_fini_cpu, }; -static int __init microcode_intel_module_init(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - if (c->x86_vendor != X86_VENDOR_INTEL) { - printk(KERN_ERR "microcode: CPU platform is not Intel-capable\n"); - return -ENODEV; - } - - return microcode_init(µcode_intel_ops, THIS_MODULE); -} - -static void __exit microcode_intel_module_exit(void) +struct microcode_ops * __init init_intel_microcode(void) { - microcode_exit(); + return µcode_intel_ops; } -module_init(microcode_intel_module_init) -module_exit(microcode_intel_module_exit) -- cgit v1.2.3 From da654b74bda14c45a7d98c731bf3c1a43b6b74e2 Mon Sep 17 00:00:00 2001 From: Srinivasa Ds Date: Tue, 23 Sep 2008 15:23:52 +0530 Subject: signals: demultiplexing SIGTRAP signal Currently a SIGTRAP can denote any one of below reasons. - Breakpoint hit - H/W debug register hit - Single step - Signal sent through kill() or rasie() Architectures like powerpc/parisc provides infrastructure to demultiplex SIGTRAP signal by passing down the information for receiving SIGTRAP through si_code of siginfot_t structure. Here is an attempt is generalise this infrastructure by extending it to x86 and x86_64 archs. Signed-off-by: Srinivasa DS Cc: Roland McGrath Cc: akpm@linux-foundation.org Cc: paulus@samba.org Cc: linuxppc-dev@ozlabs.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 7 ++++--- arch/x86/kernel/traps_32.c | 4 +++- arch/x86/kernel/traps_64.c | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 9e43a48ad6e..bf45cdf1aac 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1358,7 +1358,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) #endif } -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) +void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, + int error_code, int si_code) { struct siginfo info; @@ -1367,7 +1368,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) memset(&info, 0, sizeof(info)); info.si_signo = SIGTRAP; - info.si_code = TRAP_BRKPT; + info.si_code = si_code; /* User-mode ip? */ info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; @@ -1454,5 +1455,5 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) */ if (test_thread_flag(TIF_SINGLESTEP) && tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL)) - send_sigtrap(current, regs, 0); + send_sigtrap(current, regs, 0, TRAP_BRKPT); } diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index da5a5964fcc..0429c5de5ea 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -891,6 +891,7 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; unsigned int condition; + int si_code; trace_hardirqs_fixup(); @@ -935,8 +936,9 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) goto clear_TF_reenable; } + si_code = get_si_code((unsigned long)condition); /* Ok, finally something we can handle */ - send_sigtrap(tsk, regs, error_code); + send_sigtrap(tsk, regs, error_code, si_code); /* * Disable additional traps. They'll be re-enabled when diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 56d6f114778..011d8e1fac6 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -941,7 +941,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs *regs, tsk->thread.error_code = error_code; info.si_signo = SIGTRAP; info.si_errno = 0; - info.si_code = TRAP_BRKPT; + info.si_code = get_si_code(condition); info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; force_sig_info(SIGTRAP, &info, tsk); -- cgit v1.2.3 From b6cffde1a20409f9720d5c9aba28d3efd3a4f04e Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Wed, 17 Sep 2008 15:05:52 +0200 Subject: x86, microcode rework, v2, renaming Renaming based on patch from Dmitry Adamushko. Made code more readable by renaming define and variables related to microcode _container_file_ header to make it distinguishable from microcode _patch_ header. Signed-off-by: Peter Oruba Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 03ea4e52e87..62058e9c8b4 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -304,12 +304,12 @@ static void * get_next_ucode(u8 *buf, unsigned int size, static int install_equiv_cpu_table(u8 *buf, int (*get_ucode_data)(void *, const void *, size_t)) { -#define UCODE_HEADER_SIZE 12 - u8 *hdr[UCODE_HEADER_SIZE]; - unsigned int *buf_pos = (unsigned int *)hdr; +#define UCODE_CONTAINER_HEADER_SIZE 12 + u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; + unsigned int *buf_pos = (unsigned int *)container_hdr; unsigned long size; - if (get_ucode_data(&hdr, buf, UCODE_HEADER_SIZE)) + if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE)) return 0; size = buf_pos[2]; @@ -326,14 +326,14 @@ static int install_equiv_cpu_table(u8 *buf, return 0; } - buf += UCODE_HEADER_SIZE; + buf += UCODE_CONTAINER_HEADER_SIZE; if (get_ucode_data(equiv_cpu_table, buf, size)) { vfree(equiv_cpu_table); return 0; } - return size + UCODE_HEADER_SIZE; /* add header length */ -#undef UCODE_HEADER_SIZE + return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ +#undef UCODE_CONTAINER_HEADER_SIZE } static void free_equiv_cpu_table(void) -- cgit v1.2.3 From d4738792fb86600b6cb7220459d9c47e819b3580 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Wed, 17 Sep 2008 15:39:18 +0200 Subject: x86, microcode rework, v2, renaming cont. Renaming based on patch from Dmitry Adamushko. Further clarification by renaming define and variable related to microcode container file. Signed-off-by: Peter Oruba Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 62058e9c8b4..829415208ff 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -263,21 +263,20 @@ static void * get_next_ucode(u8 *buf, unsigned int size, unsigned int *mc_size) { unsigned int total_size; -#define UCODE_UNKNOWN_HDR 8 - u8 hdr[UCODE_UNKNOWN_HDR]; +#define UCODE_CONTAINER_SECTION_HDR 8 + u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; void *mc; - if (get_ucode_data(hdr, buf, UCODE_UNKNOWN_HDR)) + if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR)) return NULL; - if (hdr[0] != UCODE_UCODE_TYPE) { + if (section_hdr[0] != UCODE_UCODE_TYPE) { printk(KERN_ERR "microcode: error! " "Wrong microcode payload type field\n"); return NULL; } - /* FIXME! dimm: Why not by means of get_totalsize(hdr)? */ - total_size = (unsigned long) (hdr[4] + (hdr[5] << 8)); + total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); printk(KERN_INFO "microcode: size %u, total_size %u\n", size, total_size); @@ -290,13 +289,13 @@ static void * get_next_ucode(u8 *buf, unsigned int size, mc = vmalloc(UCODE_MAX_SIZE); if (mc) { memset(mc, 0, UCODE_MAX_SIZE); - if (get_ucode_data(mc, buf + UCODE_UNKNOWN_HDR, total_size)) { + if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) { vfree(mc); mc = NULL; } else - *mc_size = total_size + UCODE_UNKNOWN_HDR; + *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; } -#undef UCODE_UNKNOWN_HDR +#undef UCODE_CONTAINER_SECTION_HDR return mc; } -- cgit v1.2.3 From 1eda81495a49a4ee91d8863b0a441a624375efea Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Tue, 23 Sep 2008 22:40:02 -0400 Subject: x86: prevent stale state of c1e_mask across CPU offline/online, fix Fix build error introduced by commit 4faac97d44ac27 ("x86: prevent stale state of c1e_mask across CPU offline/online"). process_32.c needs to include idle.h to get the prototype for c1e_remove_cpu() Signed-off-by: Marc Dionne Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4b3cfdf5421..31f40b24bf5 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,6 +55,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -- cgit v1.2.3 From 77a9a768b7374cd23d1f400097eede9f1547f508 Mon Sep 17 00:00:00 2001 From: Jeremy Katz Date: Tue, 23 Sep 2008 21:54:00 -0400 Subject: x86: disable apm on the olpc The OLPC doesn't support APM but also doesn't have DMI, so we can't detect and disable it based on DMI data. So, just disable based on machine_is_olpc() Signed-off-by: Jeremy Katz Signed-off-by: Ingo Molnar --- arch/x86/kernel/apm_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 9ee24e6bc4b..732d1f4e10e 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -234,6 +234,7 @@ #include #include #include +#include #include #include @@ -2217,7 +2218,7 @@ static int __init apm_init(void) dmi_check_system(apm_dmi_table); - if (apm_info.bios.version == 0 || paravirt_enabled()) { + if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { printk(KERN_INFO "apm: BIOS not found.\n"); return -ENODEV; } -- cgit v1.2.3 From 5fd933303bd1efacbd0acbe452ba9b889440eb40 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 23 Sep 2008 17:19:44 -0700 Subject: x86: signal: cosmetic unification of do_signal() Make do_signal() same. Thia patch modifies only comments in signal_64.c. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index bf77d4789a2..5a5fbc3b1ee 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -410,7 +410,8 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - /* Re-enable any watchpoints before delivering the + /* + * Re-enable any watchpoints before delivering the * signal to user space. The processor register will * have been cleared if the watchpoint triggered * inside the kernel. @@ -418,7 +419,7 @@ static void do_signal(struct pt_regs *regs) if (current->thread.debugreg7) set_debugreg(current->thread.debugreg7, 7); - /* Whee! Actually deliver the signal. */ + /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { /* * A signal was successfully delivered; the saved @@ -441,6 +442,7 @@ static void do_signal(struct pt_regs *regs) regs->ax = regs->orig_ax; regs->ip -= 2; break; + case -ERESTART_RESTARTBLOCK: regs->ax = NR_restart_syscall; regs->ip -= 2; -- cgit v1.2.3 From ee847c54ba7fc09f85f13a5bf18f45ea6c19aa83 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 23 Sep 2008 17:21:45 -0700 Subject: x86: signal: cosmetic unification of do_notify_resume() Make do_notify_resume() same. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 8 ++++++++ arch/x86/kernel/signal_64.c | 16 ++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index da3cf3270f8..b94463f264b 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -663,6 +663,12 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) + /* notify userspace of pending MCEs */ + if (thread_info_flags & _TIF_MCE_NOTIFY) + mce_notify_user(); +#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ + /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); @@ -672,7 +678,9 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) tracehook_notify_resume(regs); } +#ifdef CONFIG_X86_32 clear_thread_flag(TIF_IRET); +#endif /* CONFIG_X86_32 */ } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 5a5fbc3b1ee..9087752f410 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -460,14 +460,18 @@ static void do_signal(struct pt_regs *regs) } } -void do_notify_resume(struct pt_regs *regs, void *unused, - __u32 thread_info_flags) +/* + * notification of userspace execution resumption + * - triggered by the TIF_WORK_MASK flags + */ +void +do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { -#ifdef CONFIG_X86_MCE +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) mce_notify_user(); -#endif /* CONFIG_X86_MCE */ +#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) @@ -477,6 +481,10 @@ void do_notify_resume(struct pt_regs *regs, void *unused, clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); } + +#ifdef CONFIG_X86_32 + clear_thread_flag(TIF_IRET); +#endif /* CONFIG_X86_32 */ } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) -- cgit v1.2.3 From 86d3237cd1a09136b4fb3a1d73d3c3fd6331cb14 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 23 Sep 2008 17:22:32 -0700 Subject: x86: signal: cosmetic unification of handle_signal() Make handle_signal() same. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 9 +++++++++ arch/x86/kernel/signal_64.c | 2 ++ 2 files changed, 11 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index b94463f264b..bb05917f232 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -550,6 +550,15 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, if (ret) return ret; +#ifdef CONFIG_X86_64 + /* + * This has nothing to do with segment registers, + * despite the name. This magic affects uaccess.h + * macros' behavior. Reset it to the normal setting. + */ + set_fs(USER_DS); +#endif + /* * Clear the direction flag as per the ABI for function entry. */ diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 9087752f410..963236f2c3c 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -346,12 +346,14 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, if (ret) return ret; +#ifdef CONFIG_X86_64 /* * This has nothing to do with segment registers, * despite the name. This magic affects uaccess.h * macros' behavior. Reset it to the normal setting. */ set_fs(USER_DS); +#endif /* * Clear the direction flag as per the ABI for function entry. -- cgit v1.2.3 From 493cd9122af5bd0b219974a48f0e31da0c29ff7e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 23 Sep 2008 14:56:44 -0700 Subject: x86: ds.c ptrace.c integer as NULL pointer sparse fixes fix: arch/x86/kernel/ptrace.c:763:29: warning: Using plain integer as NULL pointer arch/x86/kernel/ptrace.c:777:46: warning: Using plain integer as NULL pointer arch/x86/kernel/ptrace.c:1115:45: warning: Using plain integer as NULL pointer arch/x86/kernel/ds.c:482:26: warning: Using plain integer as NULL pointer arch/x86/kernel/ds.c:487:25: warning: Using plain integer as NULL pointer Signed-off-by: Harvey Harrison Acked-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 4 ++-- arch/x86/kernel/ptrace.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index ab21c270bfa..2b69994fd3a 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -479,12 +479,12 @@ static int ds_release(struct task_struct *task, enum ds_qualifier qual) goto out; kfree(context->buffer[qual]); - context->buffer[qual] = 0; + context->buffer[qual] = NULL; current->mm->total_vm -= context->pages[qual]; current->mm->locked_vm -= context->pages[qual]; context->pages[qual] = 0; - context->owner[qual] = 0; + context->owner[qual] = NULL; /* * we put the context twice: diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index ba19bb49bd0..5df6093ac77 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -734,7 +734,7 @@ static int ptrace_bts_config(struct task_struct *child, goto errout; if (cfg.flags & PTRACE_BTS_O_ALLOC) { - ds_ovfl_callback_t ovfl = 0; + ds_ovfl_callback_t ovfl = NULL; unsigned int sig = 0; /* we ignore the error in case we were not tracing child */ @@ -748,7 +748,7 @@ static int ptrace_bts_config(struct task_struct *child, ovfl = ptrace_bts_ovfl; } - error = ds_request_bts(child, /* base = */ 0, cfg.size, ovfl); + error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); if (error < 0) goto errout; @@ -1086,7 +1086,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_SIZE: - ret = ds_get_bts_index(child, /* pos = */ 0); + ret = ds_get_bts_index(child, /* pos = */ NULL); break; case PTRACE_BTS_GET: -- cgit v1.2.3 From e51a1ac2dfca9ad869471e88f828281db7e810c0 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 23 Sep 2008 15:20:09 -0700 Subject: x86, olpc: fix endian bug in openfirmware workaround Boardrev is always treated as a u32 everywhere else, no reason to byteswap the 0xc2 value. The only use is to print out if it is a prerelease board, the test being: (olpc_platform_info.boardrev & 0xf) < 8 Which is currently always true as be32_to_cpu(0xc2) & 0xf = 0 but I doubt that was the intention here. The consequences of the bug are pretty minor though (incorrect boardrev displayed in dmesg when ofw support not configured) Also annotate the temporary used to read the boardrev in the ofw case. The confusion was noticed by Sparse: arch/x86/kernel/olpc.c:206:32: warning: cast to restricted __be32 Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/kernel/olpc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 3e667227480..7a13fac63a1 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c @@ -190,12 +190,12 @@ EXPORT_SYMBOL_GPL(olpc_ec_cmd); static void __init platform_detect(void) { size_t propsize; - u32 rev; + __be32 rev; if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, &propsize) || propsize != 4) { printk(KERN_ERR "ofw: getprop call failed!\n"); - rev = 0; + rev = cpu_to_be32(0); } olpc_platform_info.boardrev = be32_to_cpu(rev); } @@ -203,7 +203,7 @@ static void __init platform_detect(void) static void __init platform_detect(void) { /* stopgap until OFW support is added to the kernel */ - olpc_platform_info.boardrev = be32_to_cpu(0xc2); + olpc_platform_info.boardrev = 0xc2; } #endif -- cgit v1.2.3 From 2f9284e4e3be7b0b4d8d0638f9805603069a762d Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Tue, 23 Sep 2008 22:56:35 +0200 Subject: x86, microcode_amd: cleanup, mark request_microcode_user() as unsupported (1) mark mc_size in generic_load_microcode() as unitialized_var to avoid gcc's (false) warning; (2) mark request_microcode_user() as unsupported. The required changes can be added later. Note, we don't break any user-space interfaces here, as there were no kernels with support for AMD-specific ucode update yet. The ucode has to be updated via 'firmware'. Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 36 ++++-------------------------------- 1 file changed, 4 insertions(+), 32 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 829415208ff..7a1f8eeac2c 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -120,29 +120,7 @@ static int get_matching_microcode(int cpu, void *mc, int rev) unsigned int equiv_cpu_id = 0x00; unsigned int i = 0; - /* - * FIXME! dimm: do we need this? Why an update via /dev/... is different - * from the one via firmware? - * - * This is a tricky part. We might be called from a write operation - * to the device file instead of the usual process of firmware - * loading. This routine needs to be able to distinguish both - * cases. This is done by checking if there alread is a equivalent - * CPU table installed. If not, we're written through - * /dev/cpu/microcode. - * Since we ignore all checks. The error case in which going through - * firmware loading and that table is not loaded has already been - * checked earlier. - */ BUG_ON(equiv_cpu_table == NULL); -#if 0 - if (equiv_cpu_table == NULL) { - printk(KERN_INFO "microcode: CPU%d microcode update with " - "version 0x%x (current=0x%x)\n", - cpu, mc_header->patch_id, uci->cpu_sig.rev); - goto out; - } -#endif current_cpu_id = cpuid_eax(0x00000001); while (equiv_cpu_table[i].installed_cpu != 0) { @@ -362,7 +340,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size, leftover = size - offset; while (leftover) { - unsigned int mc_size; + unsigned int uninitialized_var(mc_size); struct microcode_header_amd *mc_header; mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size); @@ -428,17 +406,11 @@ static int request_microcode_fw(int cpu, struct device *device) return ret; } -static int get_ucode_user(void *to, const void *from, size_t n) -{ - return copy_from_user(to, from, n); -} - static int request_microcode_user(int cpu, const void __user *buf, size_t size) { - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); - - return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); + printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode" + "is not supported\n"); + return -1; } static void microcode_fini_cpu_amd(int cpu) -- cgit v1.2.3 From 82b078659ed04e1ecdebf8326e189cf76ed361af Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Wed, 24 Sep 2008 11:50:35 +0200 Subject: x86: microcode patch loader bugfix Corrected CPU vendor check condition for AMD microcode patch loader initialization. Signed-off-by: Peter Oruba Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index ff031dbccdf..8db2eb55e9e 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -450,7 +450,7 @@ static int __init microcode_init(void) if (c->x86_vendor == X86_VENDOR_INTEL) microcode_ops = init_intel_microcode(); - else if (c->x86_vendor != X86_VENDOR_AMD) + else if (c->x86_vendor == X86_VENDOR_AMD) microcode_ops = init_amd_microcode(); if (!microcode_ops) { -- cgit v1.2.3 From 8d8c13bdb53977319593d9efc4971a4cacc8bd03 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 24 Sep 2008 19:10:29 -0700 Subject: x86: signal_32.c: introduce signr_convert() Introduce signr_convert(). This function will help unification of setup_rt_frame(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index bb05917f232..b1bc90f19b9 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -482,18 +482,21 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* * OK, we're invoking a handler: */ +static int signr_convert(int sig) +{ + struct thread_info *info = current_thread_info(); + + if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) + return info->exec_domain->signal_invmap[sig]; + return sig; +} + static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { + int usig = signr_convert(sig); int ret; - int usig; - - usig = current_thread_info()->exec_domain - && current_thread_info()->exec_domain->signal_invmap - && sig < 32 - ? current_thread_info()->exec_domain->signal_invmap[sig] - : sig; /* Set up the stack frame */ if (ka->sa.sa_flags & SA_SIGINFO) -- cgit v1.2.3 From b94fd69827b2104a2a4d9d0bc05a8e908a937ae8 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 24 Sep 2008 19:12:54 -0700 Subject: x86: signal_64.c: introduce helper function signr_convert() This helper function is for unification of setup_rt_frame(). No effect in binary. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_64.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 963236f2c3c..c5d80024a8f 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -281,18 +281,24 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* * OK, we're invoking a handler */ +static int signr_convert(int sig) +{ + return sig; +} + static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { + int usig = signr_convert(sig); int ret; #ifdef CONFIG_IA32_EMULATION if (test_thread_flag(TIF_IA32)) { if (ka->sa.sa_flags & SA_SIGINFO) - ret = ia32_setup_rt_frame(sig, ka, info, set, regs); + ret = ia32_setup_rt_frame(usig, ka, info, set, regs); else - ret = ia32_setup_frame(sig, ka, set, regs); + ret = ia32_setup_frame(usig, ka, set, regs); } else #endif ret = __setup_rt_frame(sig, ka, info, set, regs); -- cgit v1.2.3 From 455edbc423db282bb64dec2d7c8968498ea8e619 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 24 Sep 2008 19:13:11 -0700 Subject: x86: signal: introduce helper macro is_ia32 Introduce new macro is_ia32 for unification of setup_rt_frame(). No effect in binary, compiler will optimize. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 13 +++++++++---- arch/x86/kernel/signal_64.c | 13 +++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index b1bc90f19b9..cf62f70cc2a 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -491,6 +491,8 @@ static int signr_convert(int sig) return sig; } +#define is_ia32 1 + static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) @@ -499,10 +501,13 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, int ret; /* Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - ret = __setup_rt_frame(usig, ka, info, set, regs); - else - ret = __setup_frame(usig, ka, set, regs); + if (is_ia32) { + if (ka->sa.sa_flags & SA_SIGINFO) + ret = __setup_rt_frame(usig, ka, info, set, regs); + else + ret = __setup_frame(usig, ka, set, regs); + } else + ret = __setup_rt_frame(sig, ka, info, set, regs); if (ret) { force_sigsegv(sig, current); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index c5d80024a8f..53f86d95985 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -286,6 +286,12 @@ static int signr_convert(int sig) return sig; } +#ifdef CONFIG_IA32_EMULATION +#define is_ia32 test_thread_flag(TIF_IA32) +#else +#define is_ia32 0 +#endif + static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) @@ -293,15 +299,14 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, int usig = signr_convert(sig); int ret; -#ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) { + /* Set up the stack frame */ + if (is_ia32) { if (ka->sa.sa_flags & SA_SIGINFO) ret = ia32_setup_rt_frame(usig, ka, info, set, regs); else ret = ia32_setup_frame(usig, ka, set, regs); } else -#endif - ret = __setup_rt_frame(sig, ka, info, set, regs); + ret = __setup_rt_frame(sig, ka, info, set, regs); if (ret) { force_sigsegv(sig, current); -- cgit v1.2.3 From 4694d2391221e14fe671602fe34ea7f24f5a561f Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 24 Sep 2008 19:13:29 -0700 Subject: x86: signal_32.c: introduce macro ia32_setup_frame and ia32_setup_rt_frame Make 32-bit setup_rt_frame() look like 64-bit version for unification. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index cf62f70cc2a..4337cd510f0 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -492,6 +492,8 @@ static int signr_convert(int sig) } #define is_ia32 1 +#define ia32_setup_frame __setup_frame +#define ia32_setup_rt_frame __setup_rt_frame static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, @@ -503,9 +505,9 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up the stack frame */ if (is_ia32) { if (ka->sa.sa_flags & SA_SIGINFO) - ret = __setup_rt_frame(usig, ka, info, set, regs); + ret = ia32_setup_rt_frame(usig, ka, info, set, regs); else - ret = __setup_frame(usig, ka, set, regs); + ret = ia32_setup_frame(usig, ka, set, regs); } else ret = __setup_rt_frame(sig, ka, info, set, regs); -- cgit v1.2.3 From 9f6ac57729724b58df81ca5dc005326759a806fe Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 20:48:35 +0900 Subject: x86: export pci-nommu's alloc_coherent This patch exports nommu_alloc_coherent (renamed dma_generic_alloc_coherent). GART needs this function. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 31 +++++++++++++++++++++++++++++++ arch/x86/kernel/pci-nommu.c | 39 +-------------------------------------- 2 files changed, 32 insertions(+), 38 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0a1408abcc6..4e612d20170 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -134,6 +134,37 @@ unsigned long iommu_num_pages(unsigned long addr, unsigned long len) EXPORT_SYMBOL(iommu_num_pages); #endif +void *dma_generic_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag) +{ + unsigned long dma_mask; + struct page *page; + dma_addr_t addr; + + dma_mask = dma_alloc_coherent_mask(dev, flag); + + flag |= __GFP_ZERO; +again: + page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); + if (!page) + return NULL; + + addr = page_to_phys(page); + if (!is_buffer_dma_capable(dma_mask, addr, size)) { + __free_pages(page, get_order(size)); + + if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) { + flag = (flag & ~GFP_DMA32) | GFP_DMA; + goto again; + } + + return NULL; + } + + *dma_addr = addr; + return page_address(page); +} + /* * See for the iommu kernel parameter * documentation. diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 1c1c98a31d5..c70ab5a5d4c 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -72,43 +72,6 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } -static void * -nommu_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_addr, gfp_t gfp) -{ - unsigned long dma_mask; - int node; - struct page *page; - dma_addr_t addr; - - dma_mask = dma_alloc_coherent_mask(hwdev, gfp); - - gfp |= __GFP_ZERO; - - node = dev_to_node(hwdev); -again: - page = alloc_pages_node(node, gfp, get_order(size)); - if (!page) - return NULL; - - addr = page_to_phys(page); - if (!is_buffer_dma_capable(dma_mask, addr, size) && !(gfp & GFP_DMA)) { - free_pages((unsigned long)page_address(page), get_order(size)); - gfp |= GFP_DMA; - goto again; - } - - if (check_addr("alloc_coherent", hwdev, addr, size)) { - *dma_addr = addr; - flush_write_buffers(); - return page_address(page); - } - - free_pages((unsigned long)page_address(page), get_order(size)); - - return NULL; -} - static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr) { @@ -116,7 +79,7 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, } struct dma_mapping_ops nommu_dma_ops = { - .alloc_coherent = nommu_alloc_coherent, + .alloc_coherent = dma_generic_alloc_coherent, .free_coherent = nommu_free_coherent, .map_single = nommu_map_single, .map_sg = nommu_map_sg, -- cgit v1.2.3 From ecef533ea68b2fb3baaf459beb2f802a240bdb16 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 20:48:36 +0900 Subject: revert "x86: make GART to respect device's dma_mask about virtual mappings" This reverts: commit bee44f294efd8417f5e68553778a6cc957af1547 Author: FUJITA Tomonori Date: Fri Sep 12 19:42:35 2008 +0900 x86: make GART to respect device's dma_mask about virtual mappings I wrote the above commit to fix a GART alloc_coherent regression, that can't handle a device having dma_masks > 24bit < 32bits, introduced by the alloc_coherent rewrite: http://lkml.org/lkml/2008/8/12/200 After the alloc_coherent rewrite, GART alloc_coherent tried to allocate pages with GFP_DMA32. If GART got an address that a device can't access to, GART mapped the address to a virtual I/O address. But GART mapping mechanism didn't take account of dma mask, so GART could use a virtual I/O address that the device can't access to again. Alan pointed out: " This is indeed a specific problem found with things like older AACRAID where control blocks must be below 31bits and the GART is above 0x80000000. " The above commit modified GART mapping mechanism to take care of dma mask. But Andi pointed out, "The GART is somewhere in the 4GB range so you cannot use it to map anything < 4GB. Also GART is pretty small." http://lkml.org/lkml/2008/9/12/43 That means it's possible that GART doesn't have virtual I/O address space that a device can access to. The above commit (to modify GART mapping mechanism to take care of dma mask) can't fix the regression reliably so let's avoid making GART more complicated. We need a solution that always works for dma_masks > 24bit < 32bits. That's how GART worked before the alloc_coherent rewrite. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Acked-by: Alan Cox Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9e390f1bd46..7e08e466b8a 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -83,34 +83,23 @@ static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ static unsigned long alloc_iommu(struct device *dev, int size, - unsigned long align_mask, u64 dma_mask) + unsigned long align_mask) { unsigned long offset, flags; unsigned long boundary_size; unsigned long base_index; - unsigned long limit; base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; - limit = iommu_device_max_index(iommu_pages, - DIV_ROUND_UP(iommu_bus_base, PAGE_SIZE), - dma_mask >> PAGE_SHIFT); - spin_lock_irqsave(&iommu_bitmap_lock, flags); - - if (limit <= next_bit) { - need_flush = 1; - next_bit = 0; - } - - offset = iommu_area_alloc(iommu_gart_bitmap, limit, next_bit, + offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, size, base_index, boundary_size, align_mask); - if (offset == -1 && next_bit) { + if (offset == -1) { need_flush = 1; - offset = iommu_area_alloc(iommu_gart_bitmap, limit, 0, + offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, size, base_index, boundary_size, align_mask); } @@ -239,14 +228,12 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) * Caller needs to check if the iommu is needed and flush. */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir, unsigned long align_mask, - u64 dma_mask) + size_t size, int dir, unsigned long align_mask) { unsigned long npages = iommu_num_pages(phys_mem, size); - unsigned long iommu_page; + unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); int i; - iommu_page = alloc_iommu(dev, npages, align_mask, dma_mask); if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) return phys_mem; @@ -276,7 +263,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) if (!need_iommu(dev, paddr, size)) return paddr; - bus = dma_map_area(dev, paddr, size, dir, 0, dma_get_mask(dev)); + bus = dma_map_area(dev, paddr, size, dir, 0); flush_gart(); return bus; @@ -327,7 +314,6 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, { struct scatterlist *s; int i; - u64 dma_mask = dma_get_mask(dev); #ifdef CONFIG_IOMMU_DEBUG printk(KERN_DEBUG "dma_map_sg overflow\n"); @@ -337,8 +323,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir, 0, - dma_mask); + addr = dma_map_area(dev, addr, s->length, dir, 0); if (addr == bad_dma_address) { if (i > 0) gart_unmap_sg(dev, sg, i, dir); @@ -360,16 +345,14 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { - unsigned long iommu_start; - unsigned long iommu_page; + unsigned long iommu_start = alloc_iommu(dev, pages, 0); + unsigned long iommu_page = iommu_start; struct scatterlist *s; int i; - iommu_start = alloc_iommu(dev, pages, 0, dma_get_mask(dev)); if (iommu_start == -1) return -1; - iommu_page = iommu_start; for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; @@ -522,7 +505,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, align_mask = (1UL << get_order(size)) - 1; *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, - align_mask, dma_mask); + align_mask); flush_gart(); if (*dma_addr != bad_dma_address) -- cgit v1.2.3 From 1d990882153f36723f9e8717c4401689e64c7a36 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 20:48:37 +0900 Subject: x86: restore old GART alloc_coherent behavior Currently, GART alloc_coherent tries to allocate pages with GFP_DMA32 for a device having dma_masks > 24bit < 32bits. If GART gets an address that a device can't access to, GART try to map the address to a virtual I/O address that the device can access to. But Andi pointed out, "The GART is somewhere in the 4GB range so you cannot use it to map anything < 4GB. Also GART is pretty small." http://lkml.org/lkml/2008/9/12/43 That is, it's possible that GART doesn't have virtual I/O address space that a device can access to. The above behavior doesn't work for a device having dma_masks > 24bit < 32bits. This patch restores old GART alloc_coherent behavior (before the alloc_coherent rewrite). Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 7e08e466b8a..25c94fb96d7 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -487,31 +487,28 @@ static void * gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { - void *vaddr; dma_addr_t paddr; unsigned long align_mask; - u64 dma_mask = dma_alloc_coherent_mask(dev, flag); - - vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); - if (!vaddr) - return NULL; - - paddr = virt_to_phys(vaddr); - if (is_buffer_dma_capable(dma_mask, paddr, size)) { - *dma_addr = paddr; - return vaddr; - } - - align_mask = (1UL << get_order(size)) - 1; - - *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, - align_mask); - flush_gart(); - - if (*dma_addr != bad_dma_address) - return vaddr; - - free_pages((unsigned long)vaddr, get_order(size)); + struct page *page; + + if (force_iommu && !(flag & GFP_DMA)) { + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + page = alloc_pages(flag | __GFP_ZERO, get_order(size)); + if (!page) + return NULL; + + align_mask = (1UL << get_order(size)) - 1; + paddr = dma_map_area(dev, page_to_phys(page), size, + DMA_BIDIRECTIONAL, align_mask); + + flush_gart(); + if (paddr != bad_dma_address) { + *dma_addr = paddr; + return page_address(page); + } + __free_pages(page, get_order(size)); + } else + return dma_generic_alloc_coherent(dev, size, dma_addr, flag); return NULL; } -- cgit v1.2.3 From 1615965e54eb94d7bcd298d2163739bd79f602d4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 22:41:10 +0900 Subject: x86 gart: remove unnecessary initialization There is no point to have such initialization in struct dma_mapping_ops. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 25c94fb96d7..b85a2f9bb34 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -715,12 +715,6 @@ extern int agp_amd64_init(void); static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, .unmap_single = gart_unmap_single, - .sync_single_for_cpu = NULL, - .sync_single_for_device = NULL, - .sync_single_range_for_cpu = NULL, - .sync_single_range_for_device = NULL, - .sync_sg_for_cpu = NULL, - .sync_sg_for_device = NULL, .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, .alloc_coherent = gart_alloc_coherent, -- cgit v1.2.3 From f6476774f1fe32593d3d71903b1e98514efbf685 Mon Sep 17 00:00:00 2001 From: Bill Nottingham Date: Wed, 24 Sep 2008 14:35:17 -0400 Subject: x86_64: be less annoying on boot Remove mostly useless message on every boot. Signed-off-by: Bill Nottingham Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 9bfc4d72fb2..11aa501c9f4 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -112,8 +112,6 @@ void __init x86_64_start_kernel(char * real_mode_data) x86_64_init_pda(); - early_printk("Kernel really alive\n"); - x86_64_start_reservations(real_mode_data); } -- cgit v1.2.3 From d7161a65341556bacb5e6654e133803f46f51063 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 26 Sep 2008 10:36:41 -0500 Subject: kgdb, x86, arm, mips, powerpc: ignore user space single stepping On the x86 arch, user space single step exceptions should be ignored if they occur in the kernel space, such as ptrace stepping through a system call. First check if it is kgdb that is executing a single step, then ensure it is not an accidental traversal into the user space, while in kgdb, any other time the TIF_SINGLESTEP is set, kgdb should ignore the exception. On x86, arm, mips and powerpc, the kgdb_contthread usage was inconsistent with the way single stepping is implemented in the kgdb core. The arch specific stub should always set the kgdb_cpu_doing_single_step correctly if it is single stepping. This allows kgdb to correctly process an instruction steps if ptrace happens to be requesting an instruction step over a system call. Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index f47f0eb886b..00f7896c9a1 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -378,10 +378,8 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, if (remcomInBuffer[0] == 's') { linux_regs->flags |= X86_EFLAGS_TF; kgdb_single_step = 1; - if (kgdb_contthread) { - atomic_set(&kgdb_cpu_doing_single_step, - raw_smp_processor_id()); - } + atomic_set(&kgdb_cpu_doing_single_step, + raw_smp_processor_id()); } get_debugreg(dr6, 6); @@ -466,9 +464,15 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) case DIE_DEBUG: if (atomic_read(&kgdb_cpu_doing_single_step) == - raw_smp_processor_id() && - user_mode(regs)) - return single_step_cont(regs, args); + raw_smp_processor_id()) { + if (user_mode(regs)) + return single_step_cont(regs, args); + break; + } else if (test_thread_flag(TIF_SINGLESTEP)) + /* This means a user thread is single stepping + * a system call which should be ignored + */ + return NOTIFY_DONE; /* fall through */ default: if (user_mode(regs)) -- cgit v1.2.3 From 703a1edcd1534468fc18f733c03bd91a65c8c6f0 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 26 Sep 2008 10:36:42 -0500 Subject: kgdb, x86_64: fix PS CS SS registers in gdb serial On x86_64 the gdb serial register structure defines the PS (also known as eflags), CS and SS registers as 4 bytes entities. This patch splits the x86_64 regnames enum into a 32 and 64 version to account for the 32 bit entities in the gdb serial packets. Also the program counter is properly filled in for the sleeping threads. Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 00f7896c9a1..8282a213968 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -69,6 +69,9 @@ static int gdb_x86vector = -1; */ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) { +#ifndef CONFIG_X86_32 + u32 *gdb_regs32 = (u32 *)gdb_regs; +#endif gdb_regs[GDB_AX] = regs->ax; gdb_regs[GDB_BX] = regs->bx; gdb_regs[GDB_CX] = regs->cx; @@ -76,9 +79,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_SI] = regs->si; gdb_regs[GDB_DI] = regs->di; gdb_regs[GDB_BP] = regs->bp; - gdb_regs[GDB_PS] = regs->flags; gdb_regs[GDB_PC] = regs->ip; #ifdef CONFIG_X86_32 + gdb_regs[GDB_PS] = regs->flags; gdb_regs[GDB_DS] = regs->ds; gdb_regs[GDB_ES] = regs->es; gdb_regs[GDB_CS] = regs->cs; @@ -94,6 +97,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_R13] = regs->r13; gdb_regs[GDB_R14] = regs->r14; gdb_regs[GDB_R15] = regs->r15; + gdb_regs32[GDB_PS] = regs->flags; + gdb_regs32[GDB_CS] = regs->cs; + gdb_regs32[GDB_SS] = regs->ss; #endif gdb_regs[GDB_SP] = regs->sp; } @@ -112,6 +118,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) */ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) { +#ifndef CONFIG_X86_32 + u32 *gdb_regs32 = (u32 *)gdb_regs; +#endif gdb_regs[GDB_AX] = 0; gdb_regs[GDB_BX] = 0; gdb_regs[GDB_CX] = 0; @@ -129,8 +138,10 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_FS] = 0xFFFF; gdb_regs[GDB_GS] = 0xFFFF; #else - gdb_regs[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); - gdb_regs[GDB_PC] = 0; + gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); + gdb_regs32[GDB_CS] = __KERNEL_CS; + gdb_regs32[GDB_SS] = __KERNEL_DS; + gdb_regs[GDB_PC] = p->thread.ip; gdb_regs[GDB_R8] = 0; gdb_regs[GDB_R9] = 0; gdb_regs[GDB_R10] = 0; @@ -153,6 +164,9 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) */ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) { +#ifndef CONFIG_X86_32 + u32 *gdb_regs32 = (u32 *)gdb_regs; +#endif regs->ax = gdb_regs[GDB_AX]; regs->bx = gdb_regs[GDB_BX]; regs->cx = gdb_regs[GDB_CX]; @@ -160,9 +174,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) regs->si = gdb_regs[GDB_SI]; regs->di = gdb_regs[GDB_DI]; regs->bp = gdb_regs[GDB_BP]; - regs->flags = gdb_regs[GDB_PS]; regs->ip = gdb_regs[GDB_PC]; #ifdef CONFIG_X86_32 + regs->flags = gdb_regs[GDB_PS]; regs->ds = gdb_regs[GDB_DS]; regs->es = gdb_regs[GDB_ES]; regs->cs = gdb_regs[GDB_CS]; @@ -175,6 +189,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) regs->r13 = gdb_regs[GDB_R13]; regs->r14 = gdb_regs[GDB_R14]; regs->r15 = gdb_regs[GDB_R15]; + regs->flags = gdb_regs32[GDB_PS]; + regs->cs = gdb_regs32[GDB_CS]; + regs->ss = gdb_regs32[GDB_SS]; #endif } -- cgit v1.2.3 From 7fc2368d1d0dce7a778beb2fba3acac8fa7a34b6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 27 Sep 2008 00:30:07 -0700 Subject: x86: don't need to go to chunksize to 4G change back chunksize max to 2g otherwise will get strange layout in 2G ram system like 0 - 4g WB, 2040M - 2048M UC, 2048M - 4G NC instead of 0 - 2g WB, 2040M - 2048M UC Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b117d7f8a56..cc135572882 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1155,10 +1155,10 @@ struct mtrr_cleanup_result { /* * gran_size: 1M, 2M, ..., 2G - * chunk size: gran_size, ..., 4G - * so we need (2+13)*6 + * chunk size: gran_size, ..., 2G + * so we need (1+12)*6 */ -#define NUM_RESULT 90 +#define NUM_RESULT 78 #define PSHIFT (PAGE_SHIFT - 10) static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; @@ -1276,7 +1276,7 @@ static int __init mtrr_cleanup(unsigned address_bits) memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); memset(result, 0, sizeof(result)); for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { - for (chunk_size = gran_size; chunk_size < (1ULL<<33); + for (chunk_size = gran_size; chunk_size < (1ULL<<32); chunk_size <<= 1) { int num_reg; -- cgit v1.2.3 From 2313c2793d290a8cc37c428f8622c53f3fe1d6dc Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 27 Sep 2008 00:30:08 -0700 Subject: x86: mtrr_cleanup optimization, v2 fix hpa's t61 with 4g ram: change layout from (n - 1)*chunksize + chunk_size - NC to n*chunksize - NC Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 79 +++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 42 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index cc135572882..93d575ac61a 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -970,6 +970,8 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, /* try to append some small hole */ range0_basek = state->range_startk; range0_sizek = ALIGN(state->range_sizek, chunk_sizek); + + /* no increase */ if (range0_sizek == state->range_sizek) { if (debug_print) printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", @@ -980,13 +982,13 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, return 0; } - range0_sizek -= chunk_sizek; - if (range0_sizek && sizek) { - while (range0_basek + range0_sizek > (basek + sizek)) { - range0_sizek -= chunk_sizek; - if (!range0_sizek) - break; - } + /* only cut back, when it is not the last */ + if (sizek) { + while (range0_basek + range0_sizek > (basek + sizek)) { + range0_sizek -= chunk_sizek; + if (!range0_sizek) + break; + } } if (range0_sizek) { @@ -1000,46 +1002,39 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, } range_basek = range0_basek + range0_sizek; - range_sizek = chunk_sizek; - if (range_basek + range_sizek > basek && - range_basek + range_sizek <= (basek + sizek)) { - /* one hole */ - second_basek = basek; - second_sizek = range_basek + range_sizek - basek; - } + /* one hole in the middle */ + if (range_basek > basek && range_basek <= (basek + sizek)) + second_sizek = range_basek - basek; - /* if last piece, only could one hole near end */ - if ((second_basek || !basek) && - range_sizek - (state->range_sizek - range0_sizek) - second_sizek < - (chunk_sizek >> 1)) { - /* - * one hole in middle (second_sizek is 0) or at end - * (second_sizek is 0 ) - */ - hole_sizek = range_sizek - (state->range_sizek - range0_sizek) - - second_sizek; - hole_basek = range_basek + range_sizek - hole_sizek - - second_sizek; - } else { - /* fallback for big hole, or several holes */ + if (range0_sizek > state->range_sizek) { + unsigned long hole_basek, hole_sizek; + + /* one hole in middle or at end */ + hole_sizek = range0_sizek - state->range_sizek - second_sizek; + if (hole_sizek) { + hole_basek = range_basek - hole_sizek - second_sizek; + if (debug_print) + printk(KERN_DEBUG "hole: %016lx - %016lx\n", + hole_basek<<10, + (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, + hole_sizek, + MTRR_TYPE_UNCACHABLE); + } + } else { + /* need to handle left over */ range_sizek = state->range_sizek - range0_sizek; - second_basek = 0; - second_sizek = 0; - } - if (debug_print) - printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, - (range_basek + range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, + if (range_sizek) { + if (debug_print) + printk(KERN_DEBUG "range: %016lx - %016lx\n", + range_basek<<10, + (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, + range_sizek, MTRR_TYPE_WRBACK); - if (hole_sizek) { - if (debug_print) - printk(KERN_DEBUG "hole: %016lx - %016lx\n", - hole_basek<<10, (hole_basek + hole_sizek)<<10); - state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, - MTRR_TYPE_UNCACHABLE); - + } } return second_sizek; -- cgit v1.2.3 From 54d45ff4208836e62536fc40b0141586dbf6641f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 27 Sep 2008 00:30:06 -0700 Subject: x86: add mtrr_cleanup_debug command line add mtrr_cleanup_debug to print out more info about layout Signed-off-by: Yinghai Lu Cc: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 93d575ac61a..aff46b99ff0 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -836,6 +836,13 @@ static int __init enable_mtrr_cleanup_setup(char *str) } early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); +static int __init mtrr_cleanup_debug_setup(char *str) +{ + debug_print = 1; + return 0; +} +early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); + struct var_mtrr_state { unsigned long range_startk; unsigned long range_sizek; @@ -1227,7 +1234,7 @@ static int __init mtrr_cleanup(unsigned address_bits) if (mtrr_chunk_size && mtrr_gran_size) { int num_reg; - debug_print = 1; + debug_print++; /* convert ranges to var ranges state */ num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, mtrr_gran_size); @@ -1263,7 +1270,7 @@ static int __init mtrr_cleanup(unsigned address_bits) } printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " "will find optimal one\n"); - debug_print = 0; + debug_print--; memset(result, 0, sizeof(result[0])); } @@ -1366,8 +1373,9 @@ static int __init mtrr_cleanup(unsigned address_bits) chunk_size <<= 10; gran_size = result[i].gran_sizek; gran_size <<= 10; - debug_print = 1; + debug_print++; x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); + debug_print--; set_var_mtrr_all(address_bits); return 1; } -- cgit v1.2.3 From 237a62247c2879331986a300d6ab36ad21264c68 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 25 Sep 2008 12:13:53 +0200 Subject: x86/iommu: make GART driver checkpatch clean Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index b85a2f9bb34..aa569db73a2 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -27,8 +27,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -175,7 +175,8 @@ static void dump_leak(void) iommu_leak_pages); for (i = 0; i < iommu_leak_pages; i += 2) { printk(KERN_DEBUG "%lu: ", iommu_pages-i); - printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0); + printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], + 0); printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); } printk(KERN_DEBUG "\n"); @@ -688,7 +689,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) if (!error) error = sysdev_register(&device_gart); if (error) - panic("Could not register gart_sysdev -- would corrupt data on next suspend"); + panic("Could not register gart_sysdev -- " + "would corrupt data on next suspend"); flush_gart(); @@ -710,8 +712,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) return -1; } -extern int agp_amd64_init(void); - static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, .unmap_single = gart_unmap_single, @@ -777,8 +777,8 @@ void __init gart_iommu_init(void) (no_agp && init_k8_gatt(&info) < 0)) { if (max_pfn > MAX_DMA32_PFN) { printk(KERN_WARNING "More than 4GB of memory " - "but GART IOMMU not available.\n" - KERN_WARNING "falling back to iommu=soft.\n"); + "but GART IOMMU not available.\n"); + printk(KERN_WARNING "falling back to iommu=soft.\n"); } return; } @@ -868,7 +868,8 @@ void __init gart_parse_options(char *p) if (!strncmp(p, "leak", 4)) { leak_trace = 1; p += 4; - if (*p == '=') ++p; + if (*p == '=') + ++p; if (isdigit(*p) && get_option(&p, &arg)) iommu_leak_pages = arg; } -- cgit v1.2.3 From 3610f2116e961cdcbd3546a3828470f7aa636212 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 25 Sep 2008 12:13:54 +0200 Subject: x86/iommu: convert GART need_flush to bool Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index aa569db73a2..aecea068f58 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -80,7 +80,7 @@ AGPEXTERN int agp_memory_reserved; AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ -static int need_flush; /* global flush state. set for each gart wrap */ +static bool need_flush; /* global flush state. set for each gart wrap */ static unsigned long alloc_iommu(struct device *dev, int size, unsigned long align_mask) @@ -98,7 +98,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, size, base_index, boundary_size, align_mask); if (offset == -1) { - need_flush = 1; + need_flush = true; offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, size, base_index, boundary_size, align_mask); @@ -107,11 +107,11 @@ static unsigned long alloc_iommu(struct device *dev, int size, next_bit = offset+size; if (next_bit >= iommu_pages) { next_bit = 0; - need_flush = 1; + need_flush = true; } } if (iommu_fullflush) - need_flush = 1; + need_flush = true; spin_unlock_irqrestore(&iommu_bitmap_lock, flags); return offset; @@ -136,7 +136,7 @@ static void flush_gart(void) spin_lock_irqsave(&iommu_bitmap_lock, flags); if (need_flush) { k8_flush_garts(); - need_flush = 0; + need_flush = false; } spin_unlock_irqrestore(&iommu_bitmap_lock, flags); } -- cgit v1.2.3 From 0114267be1bebc2e9c913b19579900153583d617 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 25 Sep 2008 12:42:12 +0200 Subject: x86/iommu: use __GFP_ZERO instead of memset for GART Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index aecea068f58..d077116fec1 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -674,13 +674,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) info->aper_size = aper_size >> 20; gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); - gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); + gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(gatt_size)); if (!gatt) panic("Cannot allocate GATT table"); if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) panic("Could not set GART PTEs to uncacheable pages"); - memset(gatt, 0, gatt_size); agp_gatt_table = gatt; enable_gart_translations(); @@ -788,19 +788,16 @@ void __init gart_iommu_init(void) iommu_size = check_iommu_size(info.aper_base, aper_size); iommu_pages = iommu_size >> PAGE_SHIFT; - iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, + iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(iommu_pages/8)); if (!iommu_gart_bitmap) panic("Cannot allocate iommu bitmap\n"); - memset(iommu_gart_bitmap, 0, iommu_pages/8); #ifdef CONFIG_IOMMU_LEAK if (leak_trace) { - iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, + iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, get_order(iommu_pages*sizeof(void *))); - if (iommu_leak_tab) - memset(iommu_leak_tab, 0, iommu_pages * 8); - else + if (!iommu_leak_tab) printk(KERN_DEBUG "PCI-DMA: Cannot allocate leak trace area\n"); } -- cgit v1.2.3 From 8f0afaa58e912bbe7d5b0bad9fb024337edf363e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 27 Sep 2008 20:26:06 -0700 Subject: x86: mtrr_cleanup hole size should be less than half of chunk_size, v2 v2: should check with half of range0 size instead of chunk_size So don't have silly big hole. in hpa's case we could auto detect instead of adding mtrr_chunk_size in command line. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 74 ++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 31 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index aff46b99ff0..bccf57f5b61 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -992,22 +992,17 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, /* only cut back, when it is not the last */ if (sizek) { while (range0_basek + range0_sizek > (basek + sizek)) { - range0_sizek -= chunk_sizek; + if (range0_sizek >= chunk_sizek) + range0_sizek -= chunk_sizek; + else + range0_sizek = 0; + if (!range0_sizek) break; } } - if (range0_sizek) { - if (debug_print) - printk(KERN_DEBUG "range0: %016lx - %016lx\n", - range0_basek<<10, - (range0_basek + range0_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range0_basek, - range0_sizek, MTRR_TYPE_WRBACK); - - } - +second_try: range_basek = range0_basek + range0_sizek; /* one hole in the middle */ @@ -1015,33 +1010,50 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, second_sizek = range_basek - basek; if (range0_sizek > state->range_sizek) { - unsigned long hole_basek, hole_sizek; /* one hole in middle or at end */ hole_sizek = range0_sizek - state->range_sizek - second_sizek; - if (hole_sizek) { - hole_basek = range_basek - hole_sizek - second_sizek; - if (debug_print) - printk(KERN_DEBUG "hole: %016lx - %016lx\n", - hole_basek<<10, - (hole_basek + hole_sizek)<<10); - state->reg = range_to_mtrr(state->reg, hole_basek, - hole_sizek, - MTRR_TYPE_UNCACHABLE); + + /* hole size should be less than half of range0 size */ + if (hole_sizek > (range0_sizek >> 1) && + range0_sizek >= chunk_sizek) { + range0_sizek -= chunk_sizek; + second_sizek = 0; + hole_sizek = 0; + + goto second_try; } - } else { + } + + if (range0_sizek) { + if (debug_print) + printk(KERN_DEBUG "range0: %016lx - %016lx\n", + range0_basek<<10, + (range0_basek + range0_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + range0_sizek, MTRR_TYPE_WRBACK); + } + + if (range0_sizek < state->range_sizek) { /* need to handle left over */ range_sizek = state->range_sizek - range0_sizek; - if (range_sizek) { - if (debug_print) - printk(KERN_DEBUG "range: %016lx - %016lx\n", - range_basek<<10, - (range_basek + range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range_basek, - range_sizek, - MTRR_TYPE_WRBACK); - } + if (debug_print) + printk(KERN_DEBUG "range: %016lx - %016lx\n", + range_basek<<10, + (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, + range_sizek, MTRR_TYPE_WRBACK); + } + + if (hole_sizek) { + hole_basek = range_basek - hole_sizek - second_sizek; + if (debug_print) + printk(KERN_DEBUG "hole: %016lx - %016lx\n", + hole_basek<<10, + (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, + hole_sizek, MTRR_TYPE_UNCACHABLE); } return second_sizek; -- cgit v1.2.3 From 12544697f12e0ecdcf971075415c7678fae502af Mon Sep 17 00:00:00 2001 From: dcg Date: Sun, 28 Sep 2008 18:49:46 +0200 Subject: x86_64: be less annoying on boot, v2 Honour "quiet" boot parameter in early_printk() calls Signed-off-by: Diego Calleja Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 11aa501c9f4..d16084f9064 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -108,7 +108,8 @@ void __init x86_64_start_kernel(char * real_mode_data) } load_idt((const struct desc_ptr *)&idt_descr); - early_printk("Kernel alive\n"); + if (console_loglevel == 10) + early_printk("Kernel alive\n"); x86_64_init_pda(); -- cgit v1.2.3 From 73436a1d2501575f9c2e6ddb26889145e23cefd8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 29 Sep 2008 13:39:17 -0700 Subject: x86: mtrr_cleanup safe to get more spare regs now Delay exit to make sure we can actually get the optimal result in as many cases as possible. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index bccf57f5b61..ae0ca97e20b 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1353,10 +1353,8 @@ static int __init mtrr_cleanup(unsigned address_bits) nr_mtrr_spare_reg = num_var_ranges - 1; num_reg_good = -1; for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { - if (!min_loss_pfn[i]) { + if (!min_loss_pfn[i]) num_reg_good = i; - break; - } } index_good = -1; -- cgit v1.2.3 From dd7e52224fd7438d701b4bb9834a9ddc06828210 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 29 Sep 2008 18:54:11 -0700 Subject: x86: mtrr_cleanup prepare to make gran_size to less 1M make the print out right with size < 1M Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 103 ++++++++++++++++++++++++++++++---------- 1 file changed, 79 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index ae0ca97e20b..b1bd1038c91 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -905,6 +905,27 @@ set_var_mtrr_all(unsigned int address_bits) } } +static unsigned long to_size_factor(unsigned long sizek, char *factorp) +{ + char factor; + unsigned long base = sizek; + + if (base & ((1<<10) - 1)) { + /* not MB alignment */ + factor = 'K'; + } else if (base & ((1<<20) - 1)){ + factor = 'M'; + base >>= 10; + } else { + factor = 'G'; + base >>= 20; + } + + *factorp = factor; + + return base; +} + static unsigned int __init range_to_mtrr(unsigned int reg, unsigned long range_startk, unsigned long range_sizek, unsigned char type) @@ -926,13 +947,21 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, align = max_align; sizek = 1 << align; - if (debug_print) + if (debug_print) { + char start_factor = 'K', size_factor = 'K'; + unsigned long start_base, size_base; + + start_base = to_size_factor(range_startk, &start_factor), + size_base = to_size_factor(sizek, &size_factor), + printk(KERN_DEBUG "Setting variable MTRR %d, " - "base: %ldMB, range: %ldMB, type %s\n", - reg, range_startk >> 10, sizek >> 10, + "base: %ld%cB, range: %ld%cB, type %s\n", + reg, start_base, start_factor, + size_base, size_factor, (type == MTRR_TYPE_UNCACHABLE)?"UC": ((type == MTRR_TYPE_WRBACK)?"WB":"Other") ); + } save_var_mtrr(reg++, range_startk, sizek, type); range_startk += sizek; range_sizek -= sizek; @@ -1245,6 +1274,8 @@ static int __init mtrr_cleanup(unsigned address_bits) if (mtrr_chunk_size && mtrr_gran_size) { int num_reg; + char gran_factor, chunk_factor, lose_factor; + unsigned long gran_base, chunk_base, lose_base; debug_print++; /* convert ranges to var ranges state */ @@ -1270,12 +1301,15 @@ static int __init mtrr_cleanup(unsigned address_bits) result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT; - printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", - result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10, - result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", + result[i].bad?"*BAD*":" ", + gran_base, gran_factor, chunk_base, chunk_factor); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", result[i].num_reg, result[i].bad?"-":"", - result[i].lose_cover_sizek >> 10); + lose_base, lose_factor); if (!result[i].bad) { set_var_mtrr_all(address_bits); return 1; @@ -1290,14 +1324,25 @@ static int __init mtrr_cleanup(unsigned address_bits) memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); memset(result, 0, sizeof(result)); for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { + char gran_factor; + unsigned long gran_base; + + if (debug_print) + gran_base = to_size_factor(gran_size >> 10, &gran_factor); + for (chunk_size = gran_size; chunk_size < (1ULL<<32); chunk_size <<= 1) { int num_reg; - if (debug_print) - printk(KERN_INFO - "\ngran_size: %lldM chunk_size_size: %lldM\n", - gran_size >> 20, chunk_size >> 20); + if (debug_print) { + char chunk_factor; + unsigned long chunk_base; + + chunk_base = to_size_factor(chunk_size>>10, &chunk_factor), + printk(KERN_INFO "\n"); + printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n", + gran_base, gran_factor, chunk_base, chunk_factor); + } if (i >= NUM_RESULT) continue; @@ -1340,12 +1385,18 @@ static int __init mtrr_cleanup(unsigned address_bits) /* print out all */ for (i = 0; i < NUM_RESULT; i++) { - printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", - result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, - result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n", - result[i].num_reg, result[i].bad?"-":"", - result[i].lose_cover_sizek >> 10); + char gran_factor, chunk_factor, lose_factor; + unsigned long gran_base, chunk_base, lose_base; + + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", + result[i].bad?"*BAD*":" ", + gran_base, gran_factor, chunk_base, chunk_factor); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", + result[i].num_reg, result[i].bad?"-":"", + lose_base, lose_factor); } /* try to find the optimal index */ @@ -1370,14 +1421,18 @@ static int __init mtrr_cleanup(unsigned address_bits) } if (index_good != -1) { + char gran_factor, chunk_factor, lose_factor; + unsigned long gran_base, chunk_base, lose_base; + printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); i = index_good; - printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", - result[i].gran_sizek >> 10, - result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n", - result[i].num_reg, - result[i].lose_cover_sizek >> 10); + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t", + gran_base, gran_factor, chunk_base, chunk_factor); + printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n", + result[i].num_reg, lose_base, lose_factor); /* convert ranges to var ranges state */ chunk_size = result[i].chunk_sizek; chunk_size <<= 10; -- cgit v1.2.3 From 4624065731751a3ace88e5824d8e5654e2d7abd3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 29 Sep 2008 18:54:12 -0700 Subject: x86: mtrr_cleanup try gran_size to less than 1M one have gran < 1M reg00: base=0xd8000000 (3456MB), size= 128MB: uncachable, count=1 reg01: base=0xe0000000 (3584MB), size= 512MB: uncachable, count=1 reg02: base=0x00000000 ( 0MB), size=4096MB: write-back, count=1 reg03: base=0x100000000 (4096MB), size= 512MB: write-back, count=1 reg04: base=0x120000000 (4608MB), size= 128MB: write-back, count=1 reg05: base=0xd7f80000 (3455MB), size= 512KB: uncachable, count=1 will get Found optimal setting for mtrr clean up gran_size: 512K chunk_size: 2M num_reg: 7 lose RAM: 0G range0: 0000000000000000 - 00000000d8000000 Setting variable MTRR 0, base: 0GB, range: 2GB, type WB Setting variable MTRR 1, base: 2GB, range: 1GB, type WB Setting variable MTRR 2, base: 3GB, range: 256MB, type WB Setting variable MTRR 3, base: 3328MB, range: 128MB, type WB hole: 00000000d7f00000 - 00000000d7f80000 Setting variable MTRR 4, base: 3455MB, range: 512KB, type UC rangeX: 0000000100000000 - 0000000128000000 Setting variable MTRR 5, base: 4GB, range: 512MB, type WB Setting variable MTRR 6, base: 4608MB, range: 128MB, type WB so start from 64k instead of 1M Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b1bd1038c91..8f1a342b16b 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1197,11 +1197,11 @@ struct mtrr_cleanup_result { }; /* - * gran_size: 1M, 2M, ..., 2G + * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G * chunk size: gran_size, ..., 2G - * so we need (1+12)*6 + * so we need (1+16)*8 */ -#define NUM_RESULT 78 +#define NUM_RESULT 136 #define PSHIFT (PAGE_SHIFT - 10) static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; @@ -1323,7 +1323,7 @@ static int __init mtrr_cleanup(unsigned address_bits) i = 0; memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); memset(result, 0, sizeof(result)); - for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { + for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { char gran_factor; unsigned long gran_base; -- cgit v1.2.3 From 3dcd7e269d2223126f6ee9bc893f5a6166e1770d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2EA=2E=20Magall=C3=B3n?= Date: Tue, 30 Sep 2008 10:02:52 +0200 Subject: x86: fix typo in enable_mtrr_cleanup early parameter Correct typo for 'enable_mtrr_cleanup' early boot param name. Signed-off-by: J.A. Magallon Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b117d7f8a56..885c8265e6b 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -834,7 +834,7 @@ static int __init enable_mtrr_cleanup_setup(char *str) enable_mtrr_cleanup = 1; return 0; } -early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); +early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); struct var_mtrr_state { unsigned long range_startk; -- cgit v1.2.3 From 9b1568458a3ef006361710dc12848aec891883b5 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Mon, 29 Sep 2008 14:52:03 -0400 Subject: x86, debug printouts: IOMMU setup failures should not be KERN_ERR The number of BIOSes that have an option to enable the IOMMU, or fix anything about its configuration, is vanishingly small. There's no good reason to punish quiet boot for this. Signed-off-by: Adam Jackson Signed-off-by: Ingo Molnar --- arch/x86/kernel/aperture_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 44e21826db1..9a32b37ee2e 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -455,11 +455,11 @@ out: force_iommu || valid_agp || fallback_aper_force) { - printk(KERN_ERR + printk(KERN_INFO "Your BIOS doesn't leave a aperture memory hole\n"); - printk(KERN_ERR + printk(KERN_INFO "Please enable the IOMMU option in the BIOS setup\n"); - printk(KERN_ERR + printk(KERN_INFO "This costs you %d MB of RAM\n", 32 << fallback_aper_order); -- cgit v1.2.3 From de59985e3a623d4d5d6207f1777398ca0606ab1c Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 30 Sep 2008 11:02:12 -0700 Subject: x86: Fix broken LDT access in VMI After investigating a JRE failure, I found this bug was introduced a long time ago, and had already managed to survive another bugfix which occurred on the same line. The result is a total failure of the JRE due to LDT selectors not working properly. This one took a long time to rear up because LDT usage is not very common, but the bug is quite serious. It got introduced along with another bug, already fixed, by 75b8bb3e56ca09a467fbbe5229bc68627f7445be Signed-off-by: Zachary Amsden Cc: Ingo Molnar Cc: Glauber de Oliveira Costa Cc: Signed-off-by: Linus Torvalds --- arch/x86/kernel/vmi_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 6ca515d6db5..edfb09f3047 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) { u32 *ldt_entry = (u32 *)desc; - vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); + vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); } static void vmi_load_sp0(struct tss_struct *tss, -- cgit v1.2.3 From dc63b52673d71f9d49b9d72d263a9f32df18c3ee Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 30 Sep 2008 11:02:12 -0700 Subject: x86, vmi: fix broken LDT access This one took a long time to rear up because LDT usage is not very common, but the bug is quite serious. It got introduced along with another bug, already fixed, by 75b8bb3e56ca09a467fbbe5229bc68627f7445be After investigating a JRE failure, I found this bug was introduced a long time ago, and had already managed to survive another bugfix which occurred on the same line. The result is a total failure of the JRE due to LDT selectors not working properly. Signed-off-by: Zachary Amsden Cc: Glauber de Oliveira Costa Cc: stable@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmi_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 6ca515d6db5..edfb09f3047 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) { u32 *ldt_entry = (u32 *)desc; - vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); + vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); } static void vmi_load_sp0(struct tss_struct *tss, -- cgit v1.2.3 From 40becd8d5af03ee7935e79c3fccd0d1f380d95b4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 29 Sep 2008 00:06:36 +0900 Subject: AMD IOMMU: use iommu_device_max_index AMD IOMMU can use iommu_device_max_index() instead of the homegrown function. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c19212191c9..3b346c6f551 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -470,10 +470,6 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, * efficient allocator. * ****************************************************************************/ -static unsigned long dma_mask_to_pages(unsigned long mask) -{ - return PAGE_ALIGN(mask) >> PAGE_SHIFT; -} /* * The address allocator core function. @@ -486,14 +482,14 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, unsigned long align_mask, u64 dma_mask) { - unsigned long limit = dma_mask_to_pages(dma_mask); + unsigned long limit; unsigned long address; - unsigned long size = dom->aperture_size >> PAGE_SHIFT; unsigned long boundary_size; boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; - limit = limit < size ? limit : size; + limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0, + dma_mask >> PAGE_SHIFT); if (dom->next_bit >= limit) { dom->next_bit = 0; -- cgit v1.2.3 From fd1452ebf257317f24e0e285a17a2ec2ce3e6df7 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Thu, 2 Oct 2008 16:56:19 +0300 Subject: x86/microcode: fix sleeping function called from invalid context at kernel/mutex.c Fix the following "sleeping function called from invalid context" bug: ... __might_sleep mutex_lock_nested microcode_update_cpu mc_sysdev_resume __sysdev_resume sysdev_resume device_power_up ... Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_core.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 8db2eb55e9e..936d8d55f23 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -324,10 +324,6 @@ void microcode_update_cpu(int cpu) struct ucode_cpu_info *uci = ucode_cpu_info + cpu; int err = 0; - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu); - - mutex_lock(µcode_mutex); /* * Check if the system resume is in progress (uci->valid != NULL), * otherwise just request a firmware: @@ -340,11 +336,8 @@ void microcode_update_cpu(int cpu) err = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); } - if (!err) microcode_ops->apply_microcode(cpu); - - mutex_unlock(µcode_mutex); } static void microcode_init_cpu(int cpu) @@ -352,7 +345,13 @@ static void microcode_init_cpu(int cpu) cpumask_t old = current->cpus_allowed; set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + /* We should bind the task to the CPU */ + BUG_ON(raw_smp_processor_id() != cpu); + + mutex_lock(µcode_mutex); microcode_update_cpu(cpu); + mutex_unlock(µcode_mutex); + set_cpus_allowed_ptr(current, &old); } -- cgit v1.2.3 From 136c82c6f7f12c9bed8bf709f92123077966512c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2EA=2E=20Magall=C3=B3n?= Date: Fri, 3 Oct 2008 00:32:37 +0200 Subject: x86: mtrr_cleanup try gran_size to less than 1M, cleanup Patch below cleans up formatting, with space for big bases and sizes (64 Gb). Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/if.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 84c480bb371..4c4214690dd 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -405,9 +405,9 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) } /* RED-PEN: base can be > 32bit */ len += seq_printf(seq, - "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", + "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n", i, base, base >> (20 - PAGE_SHIFT), size, factor, - mtrr_attrib_to_str(type), mtrr_usage_table[i]); + mtrr_usage_table[i], mtrr_attrib_to_str(type)); } } return 0; -- cgit v1.2.3 From 834836ee6a3a9deadff9394b23db965a08bcde35 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 2 Oct 2008 15:46:20 -0700 Subject: x86: mtrr_cleanup try gran_size to less than 1M, v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit J.A. Magallón reported: >> Also, on a 64 bit box with 4Gb, it gives this: >> >> cicely:~# cat /proc/mtrr >> reg00: base=0x00000000 ( 0MB), size=4096MB: write-back, count=1 >> reg01: base=0x100000000 (4096MB), size=1024MB: write-back, count=1 >> reg02: base=0x140000000 (5120MB), size= 512MB: write-back, count=1 >> reg03: base=0x160000000 (5632MB), size= 256MB: write-back, count=1 >> reg04: base=0x80000000 (2048MB), size=2048MB: uncachable, count=1 boundary handling has a problem ... fix it. Reported-by: J.A. Magallón Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 8f1a342b16b..b4a3f0c1a5e 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1044,7 +1044,7 @@ second_try: hole_sizek = range0_sizek - state->range_sizek - second_sizek; /* hole size should be less than half of range0 size */ - if (hole_sizek > (range0_sizek >> 1) && + if (hole_sizek >= (range0_sizek >> 1) && range0_sizek >= chunk_sizek) { range0_sizek -= chunk_sizek; second_sizek = 0; -- cgit v1.2.3 From 8bb39311bf461243f6cade3644764d848516dd23 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 2 Oct 2008 23:26:59 -0700 Subject: x86, debug: mtrr_cleanup print out var mtrr before change it Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b4a3f0c1a5e..406074c46f1 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1211,13 +1211,14 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM]; static int __init mtrr_cleanup(unsigned address_bits) { unsigned long extra_remove_base, extra_remove_size; - unsigned long i, base, size, def, dummy; + unsigned long base, size, def, dummy; mtrr_type type; int nr_range, nr_range_new; u64 chunk_size, gran_size; unsigned long range_sums, range_sums_new; int index_good; int num_reg_good; + int i; /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; @@ -1259,6 +1260,28 @@ static int __init mtrr_cleanup(unsigned address_bits) num_var_ranges - num[MTRR_NUM_TYPES]) return 0; + /* print original var MTRRs at first, for debugging: */ + printk(KERN_DEBUG "original variable MTRRs\n"); + for (i = 0; i < num_var_ranges; i++) { + char start_factor = 'K', size_factor = 'K'; + unsigned long start_base, size_base; + + size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); + if (!size_base) + continue; + + size_base = to_size_factor(size_base, &size_factor), + start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); + start_base = to_size_factor(start_base, &start_factor), + + printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", + i, start_base, start_factor, + size_base, size_factor, + (type == MTRR_TYPE_UNCACHABLE) ? "UC" : + ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other") + ); + } + memset(range, 0, sizeof(range)); extra_remove_size = 0; if (mtrr_tom2) { -- cgit v1.2.3 From a2e8d3dcfd420177aaa0c53aca60a869bad75f4b Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 2 Oct 2008 22:09:20 -0700 Subject: x86: signal: move macros out from restore_sigcontext() move macros, COPY, COPY_SEG*, GET_SEG, out from restore_sigcontext(). x86_64: introduce COPY_SEG_STRICT for cs. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 38 +++++++++++++++++++++----------------- arch/x86/kernel/signal_64.c | 18 +++++++++++------- 2 files changed, 32 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 4337cd510f0..545448b7aeb 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -113,6 +113,27 @@ asmlinkage int sys_sigaltstack(unsigned long bx) return do_sigaltstack(uss, uoss, regs->sp); } +#define COPY(x) { \ + err |= __get_user(regs->x, &sc->x); \ +} + +#define COPY_SEG(seg) { \ + unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->seg = tmp; \ +} + +#define COPY_SEG_STRICT(seg) { \ + unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->seg = tmp | 3; \ +} + +#define GET_SEG(seg) { \ + unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + loadsegment(seg, tmp); \ +} /* * Do a signal return; undo the signal stack. @@ -126,23 +147,6 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; -#define COPY(x) err |= __get_user(regs->x, &sc->x) - -#define COPY_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - regs->seg = tmp; } - -#define COPY_SEG_STRICT(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - regs->seg = tmp|3; } - -#define GET_SEG(seg) \ - { unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ - loadsegment(seg, tmp); } - GET_SEG(gs); COPY_SEG(fs); COPY_SEG(es); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 53f86d95985..feff4a91d09 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -52,6 +52,16 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, return do_sigaltstack(uss, uoss, regs->sp); } +#define COPY(x) { \ + err |= __get_user(regs->x, &sc->x); \ +} + +#define COPY_SEG_STRICT(seg) { \ + unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->seg = tmp | 3; \ +} + /* * Do a signal return; undo the signal stack. */ @@ -64,8 +74,6 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; -#define COPY(x) (err |= __get_user(regs->x, &sc->x)) - COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); COPY(dx); COPY(cx); COPY(ip); COPY(r8); @@ -80,11 +88,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Kernel saves and restores only the CS segment register on signals, * which is the bare minimum needed to allow mixed 32/64-bit code. * App's signal handler can save/restore other segments if needed. */ - { - unsigned cs; - err |= __get_user(cs, &sc->cs); - regs->cs = cs | 3; /* Force into user mode */ - } + COPY_SEG_STRICT(cs); { unsigned int tmpflags; -- cgit v1.2.3 From 69e13ad56f9e2cd81c4f8bfd6267211c10c14c08 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 2 Oct 2008 22:18:47 -0700 Subject: x86: signal: remove indent in restore_sigcontext() remove braces and indent for flags and fpstate in restore_sigcontext(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 21 +++++++-------------- arch/x86/kernel/signal_64.c | 19 +++++++------------ 2 files changed, 14 insertions(+), 26 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 545448b7aeb..d6dd057d0f2 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -142,6 +142,8 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned long *pax) { + void __user *buf; + unsigned int tmpflags; unsigned int err = 0; /* Always make any pending restarted system calls return -EINTR */ @@ -156,21 +158,12 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, COPY_SEG_STRICT(cs); COPY_SEG_STRICT(ss); - { - unsigned int tmpflags; + err |= __get_user(tmpflags, &sc->flags); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); + regs->orig_ax = -1; /* disable syscall checks */ - err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~FIX_EFLAGS) | - (tmpflags & FIX_EFLAGS); - regs->orig_ax = -1; /* disable syscall checks */ - } - - { - void __user *buf; - - err |= __get_user(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); - } + err |= __get_user(buf, &sc->fpstate); + err |= restore_i387_xstate(buf); err |= __get_user(*pax, &sc->ax); return err; diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index feff4a91d09..a5c9627f4db 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -69,6 +69,8 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned long *pax) { + void __user *buf; + unsigned int tmpflags; unsigned int err = 0; /* Always make any pending restarted system calls return -EINTR */ @@ -90,19 +92,12 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, * App's signal handler can save/restore other segments if needed. */ COPY_SEG_STRICT(cs); - { - unsigned int tmpflags; - err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); - regs->orig_ax = -1; /* disable syscall checks */ - } - - { - void __user *buf; + err |= __get_user(tmpflags, &sc->flags); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); + regs->orig_ax = -1; /* disable syscall checks */ - err |= __get_user(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); - } + err |= __get_user(buf, &sc->fpstate); + err |= restore_i387_xstate(buf); err |= __get_user(*pax, &sc->ax); return err; -- cgit v1.2.3 From 175e438f7a2de9d94110046be48697969569736a Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Thu, 2 Oct 2008 17:32:06 -0500 Subject: x86: trivial printk fix in efi.c [patch] x86: Trivial printk fix in efi.c The following line is lacking a space between "memdesc" and "doesn't". "Kernel-defined memdescdoesn't match the one from EFI!" Fixed the printk by adding a space. Signed-off-by: Russ Anderson Cc: Russ Anderson Signed-off-by: Ingo Molnar --- arch/x86/kernel/efi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 06cc8d4254b..945a31cdd81 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -414,9 +414,11 @@ void __init efi_init(void) if (memmap.map == NULL) printk(KERN_ERR "Could not map the EFI memory map!\n"); memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); + if (memmap.desc_size != sizeof(efi_memory_desc_t)) - printk(KERN_WARNING "Kernel-defined memdesc" - "doesn't match the one from EFI!\n"); + printk(KERN_WARNING + "Kernel-defined memdesc doesn't match the one from EFI!\n"); + if (add_efi_memmap) do_add_efi_memmap(); -- cgit v1.2.3 From 42fde7a05c5d6dc76e518ae12091ea897b1c132b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 19:34:18 -0700 Subject: x86: mtrr_cleanup: print out correct type v2 Print out the correct type when the Write Protected (WP) type is seen. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 406074c46f1..9086b38fbab 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1273,12 +1273,14 @@ static int __init mtrr_cleanup(unsigned address_bits) size_base = to_size_factor(size_base, &size_factor), start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); start_base = to_size_factor(start_base, &start_factor), + type = range_state[i].type; printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", i, start_base, start_factor, size_base, size_factor, (type == MTRR_TYPE_UNCACHABLE) ? "UC" : - ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other") + ((type == MTRR_TYPE_WRPROT) ? "WP" : + ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) ); } -- cgit v1.2.3 From 99e1aa17ce434010dd820b583628370cc15f10f3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 14:50:32 -0700 Subject: x86: mtrr_cleanup: first 1M may be covered in var mtrrs The first 1M is don't care when it comes to the variables MTRRs. Cover it as WB as a heuristic approximation; this is generally what we want to minimize the number of registers. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 9086b38fbab..663e530e08e 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1293,6 +1293,15 @@ static int __init mtrr_cleanup(unsigned address_bits) } nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size); + /* + * [0, 1M) should always be coverred by var mtrr with WB + * and fixed mtrrs should take effective before var mtrr for it + */ + nr_range = add_range_with_merge(range, nr_range, 0, + (1ULL<<(20 - PAGE_SHIFT)) - 1); + /* sort the ranges */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + range_sums = sum_ranges(range, nr_range); printk(KERN_INFO "total RAM coverred: %ldM\n", range_sums >> (20 - PAGE_SHIFT)); -- cgit v1.2.3 From dd5523552c2897e3fde16fc2fc8f6332addf66ab Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 14:50:33 -0700 Subject: x86: mtrr_cleanup: treat WRPROT as UNCACHEABLE For the purpose of MTRR canonicalization, treat WRPROT as UNCACHEABLE. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 663e530e08e..5994a9f78f3 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -759,7 +759,8 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, /* take out UC ranges */ for (i = 0; i < num_var_ranges; i++) { type = range_state[i].type; - if (type != MTRR_TYPE_UNCACHABLE) + if (type != MTRR_TYPE_UNCACHABLE && + type != MTRR_TYPE_WRPROT) continue; size = range_state[i].size_pfn; if (!size) @@ -1248,6 +1249,8 @@ static int __init mtrr_cleanup(unsigned address_bits) continue; if (!size) type = MTRR_NUM_TYPES; + if (type == MTRR_TYPE_WRPROT) + type = MTRR_TYPE_UNCACHABLE; num[type]++; } -- cgit v1.2.3 From d99e90164e6cf2eb85fa94d547d6336f8127a107 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 15:55:12 -0700 Subject: x86: gart iommu have direct mapping when agp is present too move init_memory_mapping() out of init_k8_gatt. for: http://bugzilla.kernel.org/show_bug.cgi?id=11676 2.6.27-rc2 to rc8, apgart fails, iommu=soft works, regression This is needed because we need to map the GART aperture even if the GATT is not initialized. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 49285f8fd4d..be33a5442d8 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -626,7 +626,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) struct pci_dev *dev; void *gatt; int i, error; - unsigned long start_pfn, end_pfn; printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); aper_size = aper_base = info->aper_size = 0; @@ -672,12 +671,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10); - /* need to map that range */ - end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); - if (end_pfn > max_low_pfn_mapped) { - start_pfn = (aper_base>>PAGE_SHIFT); - init_memory_mapping(start_pfn<>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); + if (end_pfn > max_low_pfn_mapped) { + start_pfn = (aper_base>>PAGE_SHIFT); + init_memory_mapping(start_pfn<> PAGE_SHIFT; -- cgit v1.2.3 From e84956f92a846246b09b34f2a728329c386d250f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 6 Oct 2008 11:59:29 +0200 Subject: x86 ACPI: Blacklist two HP machines with buggy BIOSes There is a bug in the BIOSes of some HP boxes with AMD Turions which connects IO-APIC pins with ACPI thermal trip points in such a way that if the state of the IO-APIC is not as expected by the (buggy) BIOS, the thermal trip points are set to insanely low values (usually all of them become 16 degrees Celsius). As a result, thermal throttling kicks in and knock the system down to its shoes. Unfortunately some of the recent IO-APIC changes made the bug show up. To prevent this from happening, blacklist machines that are known to be affected (nx6115 and 6715b in this particular case). This fixes http://bugzilla.kernel.org/show_bug.cgi?id=11516 listed as a regression from 2.6.26. On my box it was caused by: commit 691874fa96d6349a8b60f8ea9c2bae52ece79941 Author: Maciej W. Rozycki Date: Tue May 27 21:19:51 2008 +0100 x86: I/O APIC: timer through 8259A second-chance Signed-off-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar and the whole story is described in this (huge) thread: http://marc.info/?l=linux-kernel&m=121358440508410&w=4 Matthew Garrett told us about that happening on the nx6125: http://marc.info/?l=linux-kernel&m=121396307411930&w=4 and then Maciej analysed the breakage on the basis of a DSDT from the nx6325: http://marc.info/?l=linux-kernel&m=121401068718826&w=4 As far as the Dmitry's and Jason's boxes are concerned, I recognized the symptoms and asked them to verify that the blacklisting helped. It appears that the buggy BIOS code has been copy-pasted to the entire range of machines, for no good reason. Signed-off-by: Rafael J. Wysocki Tested-by: Dmitry Torokhov Tested-by: Jason Vas Dias Signed-off-by: Linus Torvalds --- arch/x86/kernel/acpi/boot.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index bfd10fd211c..c102af85df9 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1603,6 +1603,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { * is not connected at all. Force ignoring BIOS IRQ0 pin2 * override in that cases. */ + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "HP nx6115 laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"), + }, + }, { .callback = dmi_ignore_irq0_timer_override, .ident = "HP NX6125 laptop", @@ -1619,6 +1627,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), }, }, + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "HP 6715b laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), + }, + }, {} }; -- cgit v1.2.3 From e85ceae9102f6e3c1d707e7ac88fa48d252e9cfa Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 6 Oct 2008 13:50:59 -0500 Subject: kgdb, x86: Avoid invoking kgdb_nmicallback twice per NMI Stress-testing KVM's latest NMI support with kgdbts inside an SMP guest, I came across spurious unhandled NMIs while running the singlestep test. Looking closer at the code path each NMI takes when KGDB is enabled, I noticed that kgdb_nmicallback is called twice per event: One time via DIE_NMI_IPI notification, the second time on DIE_NMI. Removing the first invocation cures the unhandled NMIs here. Signed-off-by: Jan Kiszka Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8282a213968..10435a120d2 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -455,12 +455,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) return NOTIFY_DONE; case DIE_NMI_IPI: - if (atomic_read(&kgdb_active) != -1) { - /* KGDB CPU roundup */ - kgdb_nmicallback(raw_smp_processor_id(), regs); - was_in_debug_nmi[raw_smp_processor_id()] = 1; - touch_nmi_watchdog(); - } + /* Just ignore, we will handle the roundup on DIE_NMI. */ return NOTIFY_DONE; case DIE_NMIUNKNOWN: -- cgit v1.2.3 From 33fb0e4eb53f16af312f9698f974e2e64af39c12 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Tue, 7 Oct 2008 00:11:22 +0200 Subject: x86: SB450: skip IRQ0 override if it is not routed to INT2 of IOAPIC On some HP nx6... laptops (e.g. nx6325) BIOS reports an IRQ0 override but the SB450 chipset is configured such that timer interrupts goe to INT0 of IOAPIC. Check IRQ0 routing and if it is routed to INT0 of IOAPIC skip the timer override. [ This more generic PCI ID based quirk should alleviate the need for dmi_ignore_irq0_timer_override DMI quirks. ] Signed-off-by: Andreas Herrmann Acked-by: "Maciej W. Rozycki" Tested-by: Dmitry Torokhov Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/early-quirks.c | 48 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 4353cf5e6fa..6b839b14764 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -95,6 +95,52 @@ static void __init nvidia_bugs(int num, int slot, int func) } +static u32 ati_ixp4x0_rev(int num, int slot, int func) +{ + u32 d; + u8 b; + + b = read_pci_config_byte(num, slot, func, 0xac); + b &= ~(1<<5); + write_pci_config_byte(num, slot, func, 0xac, b); + + d = read_pci_config(num, slot, func, 0x70); + d |= 1<<8; + write_pci_config(num, slot, func, 0x70, d); + + d = read_pci_config(num, slot, func, 0x8); + d &= 0xff; + return d; +} + +static void __init ati_bugs(int num, int slot, int func) +{ +#if defined(CONFIG_ACPI) && defined (CONFIG_X86_IO_APIC) + u32 d; + u8 b; + + if (acpi_use_timer_override) + return; + + d = ati_ixp4x0_rev(num, slot, func); + if (d < 0x82) + acpi_skip_timer_override = 1; + else { + /* check for IRQ0 interrupt swap */ + outb(0x72, 0xcd6); b = inb(0xcd7); + if (!(b & 0x2)) + acpi_skip_timer_override = 1; + } + + if (acpi_skip_timer_override) { + printk(KERN_INFO "SB4X0 revision 0x%x\n", d); + printk(KERN_INFO "Ignoring ACPI timer override.\n"); + printk(KERN_INFO "If you got timer trouble " + "try acpi_use_timer_override\n"); + } +#endif +} + #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -114,6 +160,8 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, + { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, + PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, {} }; -- cgit v1.2.3 From 8d89adf44cf750e49691ba5b744b2ad77a05e997 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 7 Oct 2008 06:47:52 +0200 Subject: x86: SB450: deprioritize DMI quirks This PCI ID based quick should be a full solution for the IRQ0 override related slowdown problem on SB450 based systems: 33fb0e4: x86: SB450: skip IRQ0 override if it is not routed to INT2 of IOAPIC Emit a warning in those cases where the DMI quirk triggers but the PCI ID based quirk didnt. If this warning does not trigger then we can phase out the DMI quirks. Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c102af85df9..096102d9b24 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1421,8 +1421,16 @@ static int __init force_acpi_ht(const struct dmi_system_id *d) */ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) { - pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident); - acpi_skip_timer_override = 1; + /* + * The ati_ixp4x0_rev() early PCI quirk should have set + * the acpi_skip_timer_override flag already: + */ + if (!acpi_skip_timer_override) { + WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n"); + pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", + d->ident); + acpi_skip_timer_override = 1; + } return 0; } -- cgit v1.2.3 From f364eadab59b316ea0bd9f9bc01af0ad89065569 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 7 Oct 2008 14:04:27 -0700 Subject: x86: xsave: fix error condition in save_i387_xstate() Actually return failure on error. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/kernel/xsave.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index ed5274a5bb0..448fde96963 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -121,6 +121,8 @@ int save_i387_xstate(void __user *buf) err |= __put_user(FP_XSTATE_MAGIC2, (__u32 __user *) (buf + sig_xstate_size - FP_XSTATE_MAGIC2_SIZE)); + if (err) + return err; } return 1; -- cgit v1.2.3 From 04944b793e18ece23f63c0252646b310c1845940 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 7 Oct 2008 14:04:28 -0700 Subject: x86: xsave: set FP, SSE bits in the xsave header in the user sigcontext If a processor implementation discern that a processor state component is in its initialized state, it may modify the corresponding bit in the xsave header.xstate_bv as '0'. State in the memory layout setup by 'xsave' will be consistent with the bit values in the header. During signal handling, legacy applications may change the FP/SSE bits in the sigcontext memory layout without touching the FP/SSE header bits in the xsave header. So always set FP/SSE bits in the xsave header while saving the sigcontext state to the user space. During signal return, this will enable the kernel to capture any changes to the FP/SSE bits by the legacy applications which don't touch xsave headers. xsave aware apps can change the xstate_bv in the xsave header aswell as change any contents in the memory layout. xrestor as part of sigreturn will capture all the changes. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/kernel/i387.c | 14 ++++++++++++++ arch/x86/kernel/xsave.c | 25 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 45723f1fe19..1f20608d4ca 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -468,9 +468,23 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) static int save_i387_xsave(void __user *buf) { + struct task_struct *tsk = current; struct _fpstate_ia32 __user *fx = buf; int err = 0; + /* + * For legacy compatible, we always set FP/SSE bits in the bit + * vector while saving the state to the user context. + * This will enable us capturing any changes(during sigreturn) to + * the FP/SSE bits by the legacy applications which don't touch + * xstate_bv in the xsave header. + * + * xsave aware applications can change the xstate_bv in the xsave + * header as well as change any contents in the memory layout. + * xrestore as part of sigreturn will capture all the changes. + */ + tsk->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; + if (save_i387_fxsave(fx) < 0) return -1; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 448fde96963..2f98323716d 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -114,6 +114,8 @@ int save_i387_xstate(void __user *buf) if (task_thread_info(tsk)->status & TS_XSAVE) { struct _fpstate __user *fx = buf; + struct _xstate __user *x = buf; + u64 xstate_bv; err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, sizeof(struct _fpx_sw_bytes)); @@ -121,6 +123,29 @@ int save_i387_xstate(void __user *buf) err |= __put_user(FP_XSTATE_MAGIC2, (__u32 __user *) (buf + sig_xstate_size - FP_XSTATE_MAGIC2_SIZE)); + + /* + * Read the xstate_bv which we copied (directly from the cpu or + * from the state in task struct) to the user buffers and + * set the FP/SSE bits. + */ + err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv); + + /* + * For legacy compatible, we always set FP/SSE bits in the bit + * vector while saving the state to the user context. This will + * enable us capturing any changes(during sigreturn) to + * the FP/SSE bits by the legacy applications which don't touch + * xstate_bv in the xsave header. + * + * xsave aware apps can change the xstate_bv in the xsave + * header as well as change any contents in the memory layout. + * xrestore as part of sigreturn will capture all the changes. + */ + xstate_bv |= XSTATE_FPSSE; + + err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); + if (err) return err; } -- cgit v1.2.3 From 8d5922572038bae9f7b16fcb974eee806727b44c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=A9meth=20M=C3=A1rton?= Date: Thu, 9 Oct 2008 14:59:17 +0200 Subject: [CPUFREQ] correct broken links and email addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the no longer working links and email address in the documentation and in source code. Signed-off-by: Márton Németh Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 2 +- arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index f1685fb91fb..b8e05ee4f73 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -171,7 +171,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) } if (c->x86 != 0xF) { - printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to \n"); + printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to \n"); return 0; } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 15e13c01cc3..3b5f06423e7 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -26,7 +26,7 @@ #include #define PFX "speedstep-centrino: " -#define MAINTAINER "cpufreq@lists.linux.org.uk" +#define MAINTAINER "cpufreq@vger.kernel.org" #define dprintk(msg...) \ cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) -- cgit v1.2.3 From 18c6faa9624bf5d9d7da3906a8a1b909dba5899b Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Mon, 28 Jul 2008 13:00:49 +0200 Subject: [CPUFREQ] Coding style fixes to arch/x86/kernel/cpu/cpufreq/elanfreq.c Before: total: 15 errors, 10 warnings, 308 lines checked After: total: 0 errors, 4 warnings, 308 lines checked paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/elafreq.o.* add1d36c2f077c5aab7682e8642a9f34 /tmp/elafreq.o.after add1d36c2f077c5aab7682e8642a9f34 /tmp/elafreq.o.before paolo@paolo-desktop:~/linux.trees.git$ size /tmp/elafreq.o.* text data bss dec hex filename 934 270 4 1208 4b8 /tmp/elafreq.o.after 934 270 4 1208 4b8 /tmp/elafreq.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/elanfreq.c | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index e4a4bf870e9..fe613c93b36 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -25,8 +25,8 @@ #include #include -#include -#include +#include +#include #define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ #define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ @@ -82,7 +82,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) u8 clockspeed_reg; /* Clock Speed Register */ local_irq_disable(); - outb_p(0x80,REG_CSCIR); + outb_p(0x80, REG_CSCIR); clockspeed_reg = inb_p(REG_CSCDR); local_irq_enable(); @@ -98,10 +98,10 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) } /* 33 MHz is not 32 MHz... */ - if ((clockspeed_reg & 0xE0)==0xA0) + if ((clockspeed_reg & 0xE0) == 0xA0) return 33000; - return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); + return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000; } @@ -117,7 +117,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) * There is no return value. */ -static void elanfreq_set_cpu_state (unsigned int state) +static void elanfreq_set_cpu_state(unsigned int state) { struct cpufreq_freqs freqs; @@ -144,20 +144,20 @@ static void elanfreq_set_cpu_state (unsigned int state) */ local_irq_disable(); - outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ - outb_p(0x00,REG_CSCDR); + outb_p(0x40, REG_CSCIR); /* Disable hyperspeed mode */ + outb_p(0x00, REG_CSCDR); local_irq_enable(); /* wait till internal pipelines and */ udelay(1000); /* buffers have cleaned up */ local_irq_disable(); /* now, set the CPU clock speed register (0x80) */ - outb_p(0x80,REG_CSCIR); - outb_p(elan_multiplier[state].val80h,REG_CSCDR); + outb_p(0x80, REG_CSCIR); + outb_p(elan_multiplier[state].val80h, REG_CSCDR); /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ - outb_p(0x40,REG_CSCIR); - outb_p(elan_multiplier[state].val40h,REG_CSCDR); + outb_p(0x40, REG_CSCIR); + outb_p(elan_multiplier[state].val40h, REG_CSCDR); udelay(10000); local_irq_enable(); @@ -173,12 +173,12 @@ static void elanfreq_set_cpu_state (unsigned int state) * for the hardware supported by the driver. */ -static int elanfreq_verify (struct cpufreq_policy *policy) +static int elanfreq_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); } -static int elanfreq_target (struct cpufreq_policy *policy, +static int elanfreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { @@ -205,7 +205,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) /* capability check */ if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model!=10)) + (c->x86 != 4) || (c->x86_model != 10)) return -ENODEV; /* max freq */ @@ -213,7 +213,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) max_freq = elanfreq_get_cpu_frequency(0); /* table init */ - for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { + for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { if (elanfreq_table[i].frequency > max_freq) elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; } @@ -224,7 +224,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); if (result) - return (result); + return result; cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); return 0; @@ -260,7 +260,7 @@ __setup("elanfreq=", elanfreq_setup); #endif -static struct freq_attr* elanfreq_attr[] = { +static struct freq_attr *elanfreq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -284,9 +284,9 @@ static int __init elanfreq_init(void) /* Test if we have the right hardware */ if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model!=10)) { + (c->x86 != 4) || (c->x86_model != 10)) { printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); - return -ENODEV; + return -ENODEV; } return cpufreq_register_driver(&elanfreq_driver); } @@ -298,7 +298,7 @@ static void __exit elanfreq_exit(void) } -module_param (max_freq, int, 0444); +module_param(max_freq, int, 0444); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Robert Schwebel , Sven Geggus "); -- cgit v1.2.3 From 8d2d2051e50c4ca0207a42b243369e22f7d90a5c Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Mon, 28 Jul 2008 21:08:16 +0200 Subject: [CPUFREQ] Coding style fixes to arch/x86/kernel/cpu/cpufreq/powernow-k6.c Before: total: 11 errors, 15 warnings, 255 lines checked After: total: 0 errors, 6 warnings, 254 lines checked paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/powernow-k6.o.* 476932f5e1ffe365db9d1dfb3f860369 /tmp/powernow-k6.o.after 476932f5e1ffe365db9d1dfb3f860369 /tmp/powernow-k6.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k6.c | 41 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index eb9b62b0830..b5ced806a31 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -15,12 +15,11 @@ #include #include -#include -#include +#include +#include - -#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long - as it is unused */ +#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long + as it is unused */ static unsigned int busfreq; /* FSB, in 10 kHz */ static unsigned int max_multiplier; @@ -53,7 +52,7 @@ static int powernow_k6_get_cpu_multiplier(void) msrval = POWERNOW_IOPORT + 0x1; wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ - invalue=inl(POWERNOW_IOPORT + 0x8); + invalue = inl(POWERNOW_IOPORT + 0x8); msrval = POWERNOW_IOPORT + 0x0; wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ @@ -67,9 +66,9 @@ static int powernow_k6_get_cpu_multiplier(void) * * Tries to change the PowerNow! multiplier */ -static void powernow_k6_set_state (unsigned int best_i) +static void powernow_k6_set_state(unsigned int best_i) { - unsigned long outvalue=0, invalue=0; + unsigned long outvalue = 0, invalue = 0; unsigned long msrval; struct cpufreq_freqs freqs; @@ -90,10 +89,10 @@ static void powernow_k6_set_state (unsigned int best_i) msrval = POWERNOW_IOPORT + 0x1; wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ - invalue=inl(POWERNOW_IOPORT + 0x8); + invalue = inl(POWERNOW_IOPORT + 0x8); invalue = invalue & 0xf; outvalue = outvalue | invalue; - outl(outvalue ,(POWERNOW_IOPORT + 0x8)); + outl(outvalue , (POWERNOW_IOPORT + 0x8)); msrval = POWERNOW_IOPORT + 0x0; wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ @@ -124,7 +123,7 @@ static int powernow_k6_verify(struct cpufreq_policy *policy) * * sets a new CPUFreq policy */ -static int powernow_k6_target (struct cpufreq_policy *policy, +static int powernow_k6_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { @@ -152,7 +151,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) busfreq = cpu_khz / max_multiplier; /* table init */ - for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { + for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { if (clock_ratio[i].index > max_multiplier) clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; else @@ -165,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); if (result) - return (result); + return result; cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); @@ -176,8 +175,8 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) { unsigned int i; - for (i=0; i<8; i++) { - if (i==max_multiplier) + for (i = 0; i < 8; i++) { + if (i == max_multiplier) powernow_k6_set_state(i); } cpufreq_frequency_table_put_attr(policy->cpu); @@ -189,7 +188,7 @@ static unsigned int powernow_k6_get(unsigned int cpu) return busfreq * powernow_k6_get_cpu_multiplier(); } -static struct freq_attr* powernow_k6_attr[] = { +static struct freq_attr *powernow_k6_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -227,7 +226,7 @@ static int __init powernow_k6_init(void) } if (cpufreq_register_driver(&powernow_k6_driver)) { - release_region (POWERNOW_IOPORT, 16); + release_region(POWERNOW_IOPORT, 16); return -EINVAL; } @@ -243,13 +242,13 @@ static int __init powernow_k6_init(void) static void __exit powernow_k6_exit(void) { cpufreq_unregister_driver(&powernow_k6_driver); - release_region (POWERNOW_IOPORT, 16); + release_region(POWERNOW_IOPORT, 16); } -MODULE_AUTHOR ("Arjan van de Ven , Dave Jones , Dominik Brodowski "); -MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Arjan van de Ven , Dave Jones , Dominik Brodowski "); +MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); +MODULE_LICENSE("GPL"); module_init(powernow_k6_init); module_exit(powernow_k6_exit); -- cgit v1.2.3 From 847aef6ffd85787b62395b64719f8f7c5975bf1b Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 14 Jul 2008 11:59:44 +0900 Subject: [CPUFREQ] acpi-cpufreq: add error handling for cpufreq_register_driver() error add error handling for cpufreq_register_driver() error Signed-off-by: Akinobu Mita Cc: cpufreq@lists.linux.org.uk Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index dd097b83583..1def5b06fa4 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -785,7 +785,11 @@ static int __init acpi_cpufreq_init(void) if (ret) return ret; - return cpufreq_register_driver(&acpi_cpufreq_driver); + ret = cpufreq_register_driver(&acpi_cpufreq_driver); + if (ret) + free_percpu(acpi_perf_data); + + return ret; } static void __exit acpi_cpufreq_exit(void) @@ -795,8 +799,6 @@ static void __exit acpi_cpufreq_exit(void) cpufreq_unregister_driver(&acpi_cpufreq_driver); free_percpu(acpi_perf_data); - - return; } module_param(acpi_pstate_strict, uint, 0644); -- cgit v1.2.3 From bf0b90e357c883e8efd72954432efe652de74c76 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Mon, 4 Aug 2008 11:59:07 -0700 Subject: [CPUFREQ][1/6] cpufreq: Add cpu number parameter to __cpufreq_driver_getavg() Add a cpu parameter to __cpufreq_driver_getavg(). This is needed for software cpufreq coordination where policy->cpu may not be same as the CPU on which we want to getavg frequency. A follow-on patch will use this parameter to getavg freq from all cpus in policy->cpus. Change since last patch. Fix the offline/online and suspend/resume oops reported by Youquan Song Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 1def5b06fa4..c24c4a487b7 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -256,7 +256,8 @@ static u32 get_cur_val(const cpumask_t *mask) * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and * no meaning should be associated with absolute values of these MSRs. */ -static unsigned int get_measured_perf(unsigned int cpu) +static unsigned int get_measured_perf(struct cpufreq_policy *policy, + unsigned int cpu) { union { struct { @@ -326,7 +327,7 @@ static unsigned int get_measured_perf(unsigned int cpu) #endif - retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100; + retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100; put_cpu(); set_cpus_allowed_ptr(current, &saved_mask); -- cgit v1.2.3 From 43603c8df97f246e8be7b9cc92a8f968a85108bd Mon Sep 17 00:00:00 2001 From: Hans Schou Date: Thu, 9 Oct 2008 20:47:24 +0200 Subject: x86, debug: print more information about unknown CPUs Write the name of the unknown vendor_id to output instead of just "unknown". Tag changed to 'vendor_id' as used in /proc/cpuinfo Signed-off-by: Hans Schou Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cef3519b3bb..84c4040efa8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -406,7 +406,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) if (!printed) { printed++; - printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); + printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } -- cgit v1.2.3 From b2bc27314664c4d1a2f02e6f4cd0c32e4681d61e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 23 Sep 2008 14:00:36 -0700 Subject: x86, cpa: rename PTE attribute macros for kernel direct mapping in early boot Signed-off-by: Suresh Siddha Cc: Suresh Siddha Cc: arjan@linux.intel.com Cc: venkatesh.pallipadi@intel.com Cc: jeremy@goop.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 34 +++++++++++++++------------------- arch/x86/kernel/head_64.S | 4 ++-- 2 files changed, 17 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index a7010c3a377..e835b4eea70 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -172,10 +172,6 @@ num_subarch_entries = (. - subarch_entries) / 4 * * Note that the stack is not yet set up! */ -#define PTE_ATTR 0x007 /* PRESENT+RW+USER */ -#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ -#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */ - default_entry: #ifdef CONFIG_X86_PAE @@ -196,9 +192,9 @@ default_entry: movl $pa(pg0), %edi movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_pmd), %edx - movl $PTE_ATTR, %eax + movl $PTE_IDENT_ATTR, %eax 10: - leal PDE_ATTR(%edi),%ecx /* Create PMD entry */ + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ movl %ecx,(%edx) /* Store PMD entry */ /* Upper half already zero */ addl $8,%edx @@ -215,7 +211,7 @@ default_entry: * End condition: we must map up to and including INIT_MAP_BEYOND_END * bytes beyond the end of our own page tables. */ - leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp + leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp cmpl %ebp,%eax jb 10b 1: @@ -224,7 +220,7 @@ default_entry: movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ - movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax + movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) #else /* Not PAE */ @@ -233,9 +229,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20); movl $pa(pg0), %edi movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_dir), %edx - movl $PTE_ATTR, %eax + movl $PTE_IDENT_ATTR, %eax 10: - leal PDE_ATTR(%edi),%ecx /* Create PDE entry */ + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ movl %ecx,(%edx) /* Store identity PDE entry */ movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ addl $4,%edx @@ -249,7 +245,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); * bytes beyond the end of our own page tables; the +0x007 is * the attribute bits */ - leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp + leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp cmpl %ebp,%eax jb 10b movl %edi,pa(init_pg_tables_end) @@ -257,7 +253,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ - movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax + movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax movl %eax,pa(swapper_pg_dir+0xffc) #endif jmp 3f @@ -634,19 +630,19 @@ ENTRY(empty_zero_page) /* Page-aligned for the benefit of paravirt? */ .align PAGE_SIZE_asm ENTRY(swapper_pg_dir) - .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */ + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ # if KPMDS == 3 - .long pa(swapper_pg_pmd+PGD_ATTR),0 - .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 - .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0 # elif KPMDS == 2 .long 0,0 - .long pa(swapper_pg_pmd+PGD_ATTR),0 - .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 # elif KPMDS == 1 .long 0,0 .long 0,0 - .long pa(swapper_pg_pmd+PGD_ATTR),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 # else # error "Kernel PMDs should be 1, 2 or 3" # endif diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index db3280afe88..26cfdc1d7c7 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -110,7 +110,7 @@ startup_64: movq %rdi, %rax shrq $PMD_SHIFT, %rax andq $(PTRS_PER_PMD - 1), %rax - leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx + leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx leaq level2_spare_pgt(%rip), %rbx movq %rdx, 0(%rbx, %rax, 8) ident_complete: @@ -374,7 +374,7 @@ NEXT_PAGE(level2_ident_pgt) /* Since I easily can, map the first 1G. * Don't set NX because code runs from these pages. */ - PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) NEXT_PAGE(level2_kernel_pgt) /* -- cgit v1.2.3 From e1e23bb0513520035ec934fa3483507cb6648b7c Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 7 Oct 2008 14:15:11 -0700 Subject: x86: avoid dereferencing beyond stack + THREAD_SIZE It's possible for get_wchan() to dereference past task->stack + THREAD_SIZE while iterating through instruction pointers if fp equals the upper boundary, causing a kernel panic. Signed-off-by: David Rientjes Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 2a8ccb9238b..b6b508ea711 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -754,12 +754,12 @@ unsigned long get_wchan(struct task_struct *p) if (!p || p == current || p->state == TASK_RUNNING) return 0; stack = (unsigned long)task_stack_page(p); - if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) + if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) return 0; fp = *(u64 *)(p->thread.sp); do { if (fp < (unsigned long)stack || - fp > (unsigned long)stack+THREAD_SIZE) + fp >= (unsigned long)stack+THREAD_SIZE) return 0; ip = *(u64 *)(fp+8); if (!in_sched_functions(ip)) -- cgit v1.2.3 From cb58ffc3889f0545628f138f849e759a331b8ddc Mon Sep 17 00:00:00 2001 From: Petr Vandrovec Date: Sun, 12 Oct 2008 10:51:03 +0200 Subject: x86: fix early panic on amd64 due to typo in supported CPU section Do not crash when enumerating supported CPU architectures SECURITY_INIT somehow ended up in x86_cpu_dev.init section. That caused printk in code which prints supported architectures to hit #GP due to non-canonical address being used. Signed-off-by: Petr Vandrovec Cc: thomas.petazzoni@free-electrons.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux_64.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 201e81a91a9..46e05447405 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -172,8 +172,8 @@ SECTIONS .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { *(.x86_cpu_dev.init) } - SECURITY_INIT __x86_cpu_dev_end = .; + SECURITY_INIT . = ALIGN(8); .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { -- cgit v1.2.3 From 325af5fb1418c79953db0954556de048e061d8b6 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 8 Aug 2008 15:58:39 -0700 Subject: x86: ioperm user_regset This adds a user_regset type for the x86 io permissions bitmap. This makes it appear in core dumps (when ioperm has been used). It will also make it visible to debuggers in the future. Signed-off-by: Roland McGrath Signed-off-by: H. Peter Anvin [conflict resolutions: Signed-off-by: Ingo Molnar ] --- arch/x86/kernel/ptrace.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e375b658efc..4e1ef66c2ea 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -40,7 +40,9 @@ enum x86_regset { REGSET_GENERAL, REGSET_FP, REGSET_XFP, + REGSET_IOPERM64 = REGSET_XFP, REGSET_TLS, + REGSET_IOPERM32, }; /* @@ -555,6 +557,29 @@ static int ptrace_set_debugreg(struct task_struct *child, return 0; } +/* + * These access the current or another (stopped) task's io permission + * bitmap for debugging or core dump. + */ +static int ioperm_active(struct task_struct *target, + const struct user_regset *regset) +{ + return target->thread.io_bitmap_max / regset->size; +} + +static int ioperm_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (!target->thread.io_bitmap_ptr) + return -ENXIO; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + target->thread.io_bitmap_ptr, + 0, IO_BITMAP_BYTES); +} + #ifdef CONFIG_X86_PTRACE_BTS /* * The configuration for a particular BTS hardware implementation. @@ -1385,6 +1410,12 @@ static const struct user_regset x86_64_regsets[] = { .size = sizeof(long), .align = sizeof(long), .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set }, + [REGSET_IOPERM64] = { + .core_note_type = NT_386_IOPERM, + .n = IO_BITMAP_LONGS, + .size = sizeof(long), .align = sizeof(long), + .active = ioperm_active, .get = ioperm_get + }, }; static const struct user_regset_view user_x86_64_view = { @@ -1431,6 +1462,12 @@ static const struct user_regset x86_32_regsets[] = { .active = regset_tls_active, .get = regset_tls_get, .set = regset_tls_set }, + [REGSET_IOPERM32] = { + .core_note_type = NT_386_IOPERM, + .n = IO_BITMAP_BYTES / sizeof(u32), + .size = sizeof(u32), .align = sizeof(u32), + .active = ioperm_active, .get = ioperm_get + }, }; static const struct user_regset_view user_x86_32_view = { -- cgit v1.2.3 From 9f482807a6bd7e2aa1ed0d8cfc48463ec4ca3568 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 18 Aug 2008 12:59:32 +0200 Subject: x86, fpu: check __clear_user() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix warning: arch/x86/kernel/xsave.c: In function ‘save_i387_xstate’: arch/x86/kernel/xsave.c:98: warning: ignoring return value of ‘__clear_user’, declared with attribute warn_unused_result check the return value and act on it. We should not be ignoring faults at this point. Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 2f98323716d..9abac8a9d82 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -95,7 +95,9 @@ int save_i387_xstate(void __user *buf) * Start with clearing the user buffer. This will present a * clean context for the bytes not touched by the fxsave/xsave. */ - __clear_user(buf, sig_xstate_size); + err = __clear_user(buf, sig_xstate_size); + if (err) + return err; if (task_thread_info(tsk)->status & TS_XSAVE) err = xsave_user(buf); -- cgit v1.2.3 From 45e96f26f257bd873017c6244a6cafd27f6f5439 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 Aug 2008 10:37:14 +0200 Subject: warnings: fix arch/x86/kernel/early_printk.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix warning: arch/x86/kernel/early_printk.c:993: warning: ‘enable_debug_console’ defined but not used Eliminate dead code. Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 02fc5b40c14..34ad997d383 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -989,22 +989,4 @@ static int __init setup_early_printk(char *buf) return 0; } -static void __init enable_debug_console(char *buf) -{ -#ifdef DBGP_DEBUG - struct console *old_early_console = NULL; - - if (early_console_initialized && early_console) { - old_early_console = early_console; - unregister_console(early_console); - early_console_initialized = 0; - } - - setup_early_printk(buf); - - if (early_console == old_early_console && old_early_console) - register_console(old_early_console); -#endif -} - early_param("earlyprintk", setup_early_printk); -- cgit v1.2.3 From d562353a4533c4671af683499191707c9a77c406 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 12 Oct 2008 15:22:22 +0200 Subject: warnings: fix arch/x86/kernel/io_apic_64.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/io_apic_64.c: In function ‘print_local_APIC’: arch/x86/kernel/io_apic_64.c:1284: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/x86/kernel/io_apic_64.c:1285: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ We want to print the two halves of 'icr' at 32 bit width. Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index a1bec2969c6..02063ae042f 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1281,8 +1281,8 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC ESR: %08x\n", v); icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); + printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32)); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); -- cgit v1.2.3 From 8a66712ba0969aea5580b9e312badfc2490fc614 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 12 Oct 2008 15:24:53 +0200 Subject: x86, amd-iommu: propagate PCI device enabling error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit propagate an error in enabling the PCI device. Also eliminates this warning: arch/x86/kernel/amd_iommu_init.c: In function ‘init_iommu_one’: arch/x86/kernel/amd_iommu_init.c:726: warning: ignoring return value of ‘pci_enable_device’, declared with attribute warn_unused_result Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 148fcfe22f1..4cd8083c58b 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -723,9 +723,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); - pci_enable_device(iommu->dev); - - return 0; + return pci_enable_device(iommu->dev); } /* -- cgit v1.2.3 From 0cefa5b9b0a61b62442c5d0ca00a304c5896b6e9 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 7 Sep 2008 11:29:58 +0200 Subject: arch/x86/kernel/smpboot.c: Clarify when irq processing begins. Secondary cpus start with local interrupts disabled. start_secondary() first initializes the new cpu, then it enables the local interrupts. (although interrupts are enabled within smp_callin() as well). Right now, the local interrupts are enabled as a side effect of calling ipi_call_lock_irq(). The attached patch clarifies when local interrupts are enabled. Signed-off-by: Manfred Spraul Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 76b6f50978f..b700c9a1064 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -334,14 +334,17 @@ static void __cpuinit start_secondary(void *unused) * does not change while we are assigning vectors to cpus. Holding * this lock ensures we don't half assign or remove an irq from a cpu. */ - ipi_call_lock_irq(); + ipi_call_lock(); lock_vector_lock(); __setup_vector_irq(smp_processor_id()); cpu_set(smp_processor_id(), cpu_online_map); unlock_vector_lock(); - ipi_call_unlock_irq(); + ipi_call_unlock(); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; + /* enable local interrupts */ + local_irq_enable(); + setup_secondary_clock(); wmb(); -- cgit v1.2.3 From 762db4347060c5d23e9675846e02f7d95d7f944f Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:55:55 +0200 Subject: i386: remove kprobes' restore_interrupts in favour of conditional_sti x86_64 uses a helper function conditional_sti in traps_64.c which is equal to restore_interrupts in kprobes.h. The only user of restore_interrupts is in traps_32.c. Introduce conditional_sti for i386 and remove restore_interrupts. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 0429c5de5ea..203b863ac2a 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -84,6 +84,12 @@ static unsigned int code_bytes = 64; static int ignore_nmis; static int die_counter; +static inline void conditional_sti(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); +} + void printk_address(unsigned long address, int reliable) { #ifdef CONFIG_KALLSYMS @@ -859,7 +865,7 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code) * This is an interrupt gate, because kprobes wants interrupts * disabled. Normal trap handlers don't. */ - restore_interrupts(regs); + conditional_sti(regs); do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); } -- cgit v1.2.3 From 61aef7d24909b95f9eddaa78d78902fbea1d7059 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:55:56 +0200 Subject: i386: prepare to convert exceptions to interrupts There is some macro magic in traps_32.c to construct standard exception dispatch functions. This patch renames the DO_ERROR- like macros to DO_TRAP, and introduces new DO_ERROR ones that conditionally reenable interrupts explicitly, like x86_64. No code changes. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 76 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 203b863ac2a..fe6dd972068 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -528,6 +528,56 @@ vm86_trap: return; } +#define DO_TRAP(trapnr, signr, str, name) \ +void do_##name(struct pt_regs *regs, long error_code) \ +{ \ + trace_hardirqs_fixup(); \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ + do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ +} + +#define DO_TRAP_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ +void do_##name(struct pt_regs *regs, long error_code) \ +{ \ + siginfo_t info; \ + if (irq) \ + local_irq_enable(); \ + info.si_signo = signr; \ + info.si_errno = 0; \ + info.si_code = sicode; \ + info.si_addr = (void __user *)siaddr; \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ + do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ +} + +#define DO_VM86_TRAP(trapnr, signr, str, name) \ +void do_##name(struct pt_regs *regs, long error_code) \ +{ \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ + do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ +} + +#define DO_VM86_TRAP_INFO(trapnr, signr, str, name, sicode, siaddr) \ +void do_##name(struct pt_regs *regs, long error_code) \ +{ \ + siginfo_t info; \ + info.si_signo = signr; \ + info.si_errno = 0; \ + info.si_code = sicode; \ + info.si_addr = (void __user *)siaddr; \ + trace_hardirqs_fixup(); \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ + do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ +} + #define DO_ERROR(trapnr, signr, str, name) \ void do_##name(struct pt_regs *regs, long error_code) \ { \ @@ -535,6 +585,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ + conditional_sti(regs); \ do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ } @@ -551,6 +602,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ + conditional_sti(regs); \ do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ } @@ -560,6 +612,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ + conditional_sti(regs); \ do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ } @@ -575,22 +628,23 @@ void do_##name(struct pt_regs *regs, long error_code) \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ + conditional_sti(regs); \ do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ } -DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) +DO_VM86_TRAP_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) #ifndef CONFIG_KPROBES -DO_VM86_ERROR(3, SIGTRAP, "int3", int3) +DO_VM86_TRAP(3, SIGTRAP, "int3", int3) #endif -DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) -DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) -DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) -DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) -DO_ERROR(12, SIGBUS, "stack segment", stack_segment) -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) -DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) +DO_VM86_TRAP(4, SIGSEGV, "overflow", overflow) +DO_VM86_TRAP(5, SIGSEGV, "bounds", bounds) +DO_TRAP_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) +DO_TRAP(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_TRAP(10, SIGSEGV, "invalid TSS", invalid_TSS) +DO_TRAP(11, SIGBUS, "segment not present", segment_not_present) +DO_TRAP(12, SIGBUS, "stack segment", stack_segment) +DO_TRAP_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) +DO_TRAP_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) void __kprobes do_general_protection(struct pt_regs *regs, long error_code) -- cgit v1.2.3 From 976382dcbe3a2233f48cda5b0840874f43a30825 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:55:57 +0200 Subject: i386: convert hardware exception 0 to an interrupt gate Handle divide error exception with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index fe6dd972068..c18824b9d1a 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -632,7 +632,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ } -DO_VM86_TRAP_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) +DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) #ifndef CONFIG_KPROBES DO_VM86_TRAP(3, SIGTRAP, "int3", int3) #endif @@ -1247,7 +1247,7 @@ void __init trap_init(void) early_iounmap(p, 4); #endif - set_trap_gate(0, ÷_error); + set_intr_gate(0, ÷_error); set_intr_gate(1, &debug); set_intr_gate(2, &nmi); set_system_intr_gate(3, &int3); /* int3 can be called from all */ -- cgit v1.2.3 From b94da1e4b7d396abe10ce4d566f1a5f623602c21 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:55:58 +0200 Subject: i386: expand exception 3 DO_TRAP macro The int3 exception was already takes as an interrupt and do_int3 does not fit in the new DO_ERROR macro. This patch just expands the DO_TRAP macro and rearranges the code a bit. No functional changes intended. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index c18824b9d1a..e3c4809f414 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -633,9 +633,6 @@ void do_##name(struct pt_regs *regs, long error_code) \ } DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) -#ifndef CONFIG_KPROBES -DO_VM86_TRAP(3, SIGTRAP, "int3", int3) -#endif DO_VM86_TRAP(4, SIGSEGV, "overflow", overflow) DO_VM86_TRAP(5, SIGSEGV, "bounds", bounds) DO_TRAP_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) @@ -907,9 +904,9 @@ void restart_nmi(void) acpi_nmi_enable(); } -#ifdef CONFIG_KPROBES void __kprobes do_int3(struct pt_regs *regs, long error_code) { +#ifdef CONFIG_KPROBES trace_hardirqs_fixup(); if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) @@ -920,10 +917,14 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code) * disabled. Normal trap handlers don't. */ conditional_sti(regs); +#else + if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) + == NOTIFY_STOP) + return; +#endif do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); } -#endif /* * Our handling of the processor debug registers is non-trivial. -- cgit v1.2.3 From 8d6f9d69bdc1dcf75a3ba436c9b2efb20478619b Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:55:59 +0200 Subject: i386: convert hardware exception 4 to an interrupt gate Handle overflow exception with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index e3c4809f414..6f685e61325 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -633,7 +633,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ } DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) -DO_VM86_TRAP(4, SIGSEGV, "overflow", overflow) +DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) DO_VM86_TRAP(5, SIGSEGV, "bounds", bounds) DO_TRAP_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) DO_TRAP(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) @@ -1252,7 +1252,7 @@ void __init trap_init(void) set_intr_gate(1, &debug); set_intr_gate(2, &nmi); set_system_intr_gate(3, &int3); /* int3 can be called from all */ - set_system_gate(4, &overflow); /* int4 can be called from all */ + set_system_intr_gate(4, &overflow); /* int4 can be called from all */ set_trap_gate(5, &bounds); set_trap_gate(6, &invalid_op); set_trap_gate(7, &device_not_available); -- cgit v1.2.3 From 64f644c0b4b8b7818867fb27de077c98c4f0022b Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:00 +0200 Subject: i386: convert hardware exception 5 to an interrupt gate Handle bounds exception with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 6f685e61325..06149083763 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -634,7 +634,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) -DO_VM86_TRAP(5, SIGSEGV, "bounds", bounds) +DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) DO_TRAP_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) DO_TRAP(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_TRAP(10, SIGSEGV, "invalid TSS", invalid_TSS) @@ -1253,7 +1253,7 @@ void __init trap_init(void) set_intr_gate(2, &nmi); set_system_intr_gate(3, &int3); /* int3 can be called from all */ set_system_intr_gate(4, &overflow); /* int4 can be called from all */ - set_trap_gate(5, &bounds); + set_intr_gate(5, &bounds); set_trap_gate(6, &invalid_op); set_trap_gate(7, &device_not_available); set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); -- cgit v1.2.3 From 12394cf567d509f6ab5d94544e1f414b35286d9e Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:01 +0200 Subject: i386: convert hardware exception 6 to an interrupt gate Handle invalid opcode exception with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 06149083763..39a6101a612 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -635,7 +635,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) -DO_TRAP_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) +DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) DO_TRAP(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_TRAP(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_TRAP(11, SIGBUS, "segment not present", segment_not_present) @@ -1254,7 +1254,7 @@ void __init trap_init(void) set_system_intr_gate(3, &int3); /* int3 can be called from all */ set_system_intr_gate(4, &overflow); /* int4 can be called from all */ set_intr_gate(5, &bounds); - set_trap_gate(6, &invalid_op); + set_intr_gate(6, &invalid_op); set_trap_gate(7, &device_not_available); set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); set_trap_gate(9, &coprocessor_segment_overrun); -- cgit v1.2.3 From 7643e9b936b4af31ba4851eb7d5b3a3bfad52502 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:02 +0200 Subject: i386: convert hardware exception 7 to an interrupt gate Handle no coprocessor exception with interrupt initially off. device_not_available in entry_32.S calls either math_state_restore or math_emulate. This patch adds an extra indirection to be able to re-enable interrupts explicitly in traps_32.c Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 15 ++------------- arch/x86/kernel/traps_32.c | 14 +++++++++++++- 2 files changed, 15 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 109792bc7cf..5a885855fc8 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -760,20 +760,9 @@ ENTRY(device_not_available) RING0_INT_FRAME pushl $-1 # mark this as an int CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - GET_CR0_INTO_EAX - testl $0x4, %eax # EM (math emulation bit) - jne device_not_available_emulate - preempt_stop(CLBR_ANY) - call math_state_restore - jmp ret_from_exception -device_not_available_emulate: - pushl $0 # temporary storage for ORIG_EIP + pushl $do_device_not_available CFI_ADJUST_CFA_OFFSET 4 - call math_emulate - addl $4, %esp - CFI_ADJUST_CFA_OFFSET -4 - jmp ret_from_exception + jmp error_code CFI_ENDPROC END(device_not_available) diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 39a6101a612..5b15f75d059 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -46,6 +46,7 @@ #include #endif +#include #include #include #include @@ -1236,6 +1237,17 @@ asmlinkage void math_emulate(long arg) #endif /* CONFIG_MATH_EMULATION */ +void __kprobes do_device_not_available(struct pt_regs *regs, long error) +{ + if (read_cr0() & X86_CR0_EM) { + conditional_sti(regs); + math_emulate(0); + } else { + math_state_restore(); /* interrupts still off */ + conditional_sti(regs); + } +} + void __init trap_init(void) { int i; @@ -1255,7 +1267,7 @@ void __init trap_init(void) set_system_intr_gate(4, &overflow); /* int4 can be called from all */ set_intr_gate(5, &bounds); set_intr_gate(6, &invalid_op); - set_trap_gate(7, &device_not_available); + set_intr_gate(7, &device_not_available); set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); set_trap_gate(9, &coprocessor_segment_overrun); set_trap_gate(10, &invalid_TSS); -- cgit v1.2.3 From 51bc1ed6060e96de5099f604071961540880efc2 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:03 +0200 Subject: i386: convert hardware exception 9 to an interrupt gate Handle coprocessor segment overrun exception with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 5b15f75d059..d26f32308db 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -637,7 +637,7 @@ DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) -DO_TRAP(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_TRAP(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_TRAP(11, SIGBUS, "segment not present", segment_not_present) DO_TRAP(12, SIGBUS, "stack segment", stack_segment) @@ -1269,7 +1269,7 @@ void __init trap_init(void) set_intr_gate(6, &invalid_op); set_intr_gate(7, &device_not_available); set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); - set_trap_gate(9, &coprocessor_segment_overrun); + set_intr_gate(9, &coprocessor_segment_overrun); set_trap_gate(10, &invalid_TSS); set_trap_gate(11, &segment_not_present); set_trap_gate(12, &stack_segment); -- cgit v1.2.3 From 6bf77bf93938ae6baad9945a0ba57b3225e8e58b Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:04 +0200 Subject: i386: convert hardware exception 10 to an interrupt gate Handle invalid TSS exception with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index d26f32308db..069075e19ea 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -638,7 +638,7 @@ DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) -DO_TRAP(10, SIGSEGV, "invalid TSS", invalid_TSS) +DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_TRAP(11, SIGBUS, "segment not present", segment_not_present) DO_TRAP(12, SIGBUS, "stack segment", stack_segment) DO_TRAP_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) @@ -1270,7 +1270,7 @@ void __init trap_init(void) set_intr_gate(7, &device_not_available); set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); set_intr_gate(9, &coprocessor_segment_overrun); - set_trap_gate(10, &invalid_TSS); + set_intr_gate(10, &invalid_TSS); set_trap_gate(11, &segment_not_present); set_trap_gate(12, &stack_segment); set_trap_gate(13, &general_protection); -- cgit v1.2.3 From 36d936c7988ac5caf575bc0e2dc618dbfebc6065 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:05 +0200 Subject: i386: convert hardware exception 11 to an interrupt gate Handle segment not present exception with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 069075e19ea..6910bbe94f6 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -639,7 +639,7 @@ DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) -DO_TRAP(11, SIGBUS, "segment not present", segment_not_present) +DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_TRAP(12, SIGBUS, "stack segment", stack_segment) DO_TRAP_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) DO_TRAP_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) @@ -1271,7 +1271,7 @@ void __init trap_init(void) set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); set_intr_gate(9, &coprocessor_segment_overrun); set_intr_gate(10, &invalid_TSS); - set_trap_gate(11, &segment_not_present); + set_intr_gate(11, &segment_not_present); set_trap_gate(12, &stack_segment); set_trap_gate(13, &general_protection); set_intr_gate(14, &page_fault); -- cgit v1.2.3 From f5ca81878b42ae7d1b00d0ed5f62bb1a158bfac1 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:06 +0200 Subject: i386: convert hardware exception 12 to an interrupt gate Handle stack segment exception with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 6910bbe94f6..16c056ba14e 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -640,7 +640,7 @@ DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) -DO_TRAP(12, SIGBUS, "stack segment", stack_segment) +DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_TRAP_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) DO_TRAP_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) @@ -1272,7 +1272,7 @@ void __init trap_init(void) set_intr_gate(9, &coprocessor_segment_overrun); set_intr_gate(10, &invalid_TSS); set_intr_gate(11, &segment_not_present); - set_trap_gate(12, &stack_segment); + set_intr_gate(12, &stack_segment); set_trap_gate(13, &general_protection); set_intr_gate(14, &page_fault); set_trap_gate(15, &spurious_interrupt_bug); -- cgit v1.2.3 From c6df0d71bec391e78e0a38109d63154acd69a937 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:07 +0200 Subject: i386: convert hardware exception 13 to an interrupt gate Handle general protection exception with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 16c056ba14e..e2598505ef5 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -652,6 +652,8 @@ do_general_protection(struct pt_regs *regs, long error_code) struct tss_struct *tss; int cpu; + conditional_sti(regs); + cpu = get_cpu(); tss = &per_cpu(init_tss, cpu); thread = ¤t->thread; @@ -1273,7 +1275,7 @@ void __init trap_init(void) set_intr_gate(10, &invalid_TSS); set_intr_gate(11, &segment_not_present); set_intr_gate(12, &stack_segment); - set_trap_gate(13, &general_protection); + set_intr_gate(13, &general_protection); set_intr_gate(14, &page_fault); set_trap_gate(15, &spurious_interrupt_bug); set_trap_gate(16, &coprocessor_error); -- cgit v1.2.3 From cf81978d5fb32ab75f701690b372e1126b41861f Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:08 +0200 Subject: i386: convert hardware exception 15 to an interrupt gate Handle exception 15 with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index e2598505ef5..0039856b634 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1164,6 +1164,7 @@ void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) { + conditional_sti(regs); #if 0 /* No need to warn about this any longer. */ printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); @@ -1277,7 +1278,7 @@ void __init trap_init(void) set_intr_gate(12, &stack_segment); set_intr_gate(13, &general_protection); set_intr_gate(14, &page_fault); - set_trap_gate(15, &spurious_interrupt_bug); + set_intr_gate(15, &spurious_interrupt_bug); set_trap_gate(16, &coprocessor_error); set_trap_gate(17, &alignment_check); #ifdef CONFIG_X86_MCE -- cgit v1.2.3 From 252d28fe65adc2e51bc71358eae43ba94b74da91 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:09 +0200 Subject: i386: convert hardware exception 16 to an interrupt gate Handle coprocessor error exception with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 0039856b634..5ab4c3fc7c1 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1088,6 +1088,7 @@ void math_error(void __user *ip) void do_coprocessor_error(struct pt_regs *regs, long error_code) { + conditional_sti(regs); ignore_fpu_irq = 1; math_error((void __user *)regs->ip); } @@ -1279,7 +1280,7 @@ void __init trap_init(void) set_intr_gate(13, &general_protection); set_intr_gate(14, &page_fault); set_intr_gate(15, &spurious_interrupt_bug); - set_trap_gate(16, &coprocessor_error); + set_intr_gate(16, &coprocessor_error); set_trap_gate(17, &alignment_check); #ifdef CONFIG_X86_MCE set_trap_gate(18, &machine_check); -- cgit v1.2.3 From 5feedfd401c91e41bbe31ddec93cbe809dc98309 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:10 +0200 Subject: i386: convert hardware exception 17 to an interrupt gate Handle alignment check exception with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 5ab4c3fc7c1..86c808b4daf 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -641,7 +641,7 @@ DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) -DO_TRAP_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) DO_TRAP_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) void __kprobes @@ -1281,7 +1281,7 @@ void __init trap_init(void) set_intr_gate(14, &page_fault); set_intr_gate(15, &spurious_interrupt_bug); set_intr_gate(16, &coprocessor_error); - set_trap_gate(17, &alignment_check); + set_intr_gate(17, &alignment_check); #ifdef CONFIG_X86_MCE set_trap_gate(18, &machine_check); #endif -- cgit v1.2.3 From eb642f62082348c33ead53f736a9698953aa517d Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:11 +0200 Subject: i386: convert hardware exception 18 to an interrupt gate Handle machine check exception with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/traps_32.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 5a885855fc8..c5fe01bca6c 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1019,7 +1019,7 @@ ENTRY(machine_check) RING0_INT_FRAME pushl $0 CFI_ADJUST_CFA_OFFSET 4 - pushl machine_check_vector + pushl $do_machine_check CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 86c808b4daf..54b89f497ba 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -62,6 +62,7 @@ #include #include "mach_traps.h" +#include "cpu/mcheck/mce.h" DECLARE_BITMAP(used_vectors, NR_VECTORS); EXPORT_SYMBOL_GPL(used_vectors); @@ -1252,6 +1253,14 @@ void __kprobes do_device_not_available(struct pt_regs *regs, long error) } } +#ifdef CONFIG_X86_MCE +void __kprobes do_machine_check(struct pt_regs *regs, long error) +{ + conditional_sti(regs); + machine_check_vector(regs, error); +} +#endif + void __init trap_init(void) { int i; @@ -1283,7 +1292,7 @@ void __init trap_init(void) set_intr_gate(16, &coprocessor_error); set_intr_gate(17, &alignment_check); #ifdef CONFIG_X86_MCE - set_trap_gate(18, &machine_check); + set_intr_gate(18, &machine_check); #endif set_trap_gate(19, &simd_coprocessor_error); -- cgit v1.2.3 From b939bde2788a7f16741e563ee90f6f3ad38935cf Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:12 +0200 Subject: i386: convert hardware exception 19 to an interrupt gate Handle SIMD coprocessor exception with interrupt initially off. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 54b89f497ba..1bd7960b135 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1144,6 +1144,8 @@ static void simd_math_error(void __user *ip) void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { + conditional_sti(regs); + if (cpu_has_xmm) { /* Handle SIMD FPU exceptions on PIII+ processors. */ ignore_fpu_irq = 1; @@ -1294,7 +1296,7 @@ void __init trap_init(void) #ifdef CONFIG_X86_MCE set_intr_gate(18, &machine_check); #endif - set_trap_gate(19, &simd_coprocessor_error); + set_intr_gate(19, &simd_coprocessor_error); if (cpu_has_fxsr) { printk(KERN_INFO "Enabling fast FPU save and restore... "); -- cgit v1.2.3 From f8e0870f589cfa45575dc7333cdf1b8f769e7900 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:13 +0200 Subject: i386: remove temporary DO_TRAP macros, expanding the last one used Only one use of the DO_TRAP macros remains. Expand that one and remove the macros now. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 66 +++++++++++----------------------------------- 1 file changed, 15 insertions(+), 51 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 1bd7960b135..8fa8485b49c 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -530,56 +530,6 @@ vm86_trap: return; } -#define DO_TRAP(trapnr, signr, str, name) \ -void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - trace_hardirqs_fixup(); \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ -} - -#define DO_TRAP_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ -void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - siginfo_t info; \ - if (irq) \ - local_irq_enable(); \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void __user *)siaddr; \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ -} - -#define DO_VM86_TRAP(trapnr, signr, str, name) \ -void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ -} - -#define DO_VM86_TRAP_INFO(trapnr, signr, str, name, sicode, siaddr) \ -void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void __user *)siaddr; \ - trace_hardirqs_fixup(); \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ -} - #define DO_ERROR(trapnr, signr, str, name) \ void do_##name(struct pt_regs *regs, long error_code) \ { \ @@ -643,7 +593,6 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) -DO_TRAP_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) void __kprobes do_general_protection(struct pt_regs *regs, long error_code) @@ -1263,6 +1212,21 @@ void __kprobes do_machine_check(struct pt_regs *regs, long error) } #endif +void do_iret_error(struct pt_regs *regs, long error_code) +{ + siginfo_t info; + local_irq_enable(); + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_BADSTK; + info.si_addr = 0; + if (notify_die(DIE_TRAP, "iret exception", + regs, error_code, 32, SIGILL) == NOTIFY_STOP) + return; + do_trap(32, SIGILL, "iret exception", 0, regs, error_code, &info); +} + void __init trap_init(void) { int i; -- cgit v1.2.3 From 85cea51d7e7b8d3408c8e933d88fa067309395fa Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:14 +0200 Subject: i386: add TRACE_IRQS_OFF to entry_32.S in 'error_code' Many exceptions use the same code path via the label 'error_code' in entry_32.S. At this point interrupts are off, so let's inform the tracing code of that fact before calling into C. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c5fe01bca6c..49438e58291 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -730,6 +730,7 @@ error_code: movl $(__USER_DS), %ecx movl %ecx, %ds movl %ecx, %es + TRACE_IRQS_OFF movl %esp,%eax # pt_regs pointer call *%edi jmp ret_from_exception -- cgit v1.2.3 From 43024a8a5d4c63952687286f3083f7f34d4da2cc Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:15 +0200 Subject: i386: add TRACE_IRQS_OFF for exception 1 (debug) At this point interrupts are off, so let's inform the tracing code of that fact before calling into C. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 49438e58291..a278505baa1 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -804,6 +804,7 @@ debug_stack_correct: pushl $-1 # mark this as an int CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL + TRACE_IRQS_OFF xorl %edx,%edx # error code 0 movl %esp,%eax # pt_regs pointer call do_debug -- cgit v1.2.3 From e0c7317557c8fc8eacf611e30c2a80f4e24e47a3 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:16 +0200 Subject: i386: add TRACE_IRQS_OFF for the nmi At this point interrupts are off, so let's inform the tracing code of that fact before calling into C. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index a278505baa1..2abdc9a9f7b 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -849,6 +849,7 @@ nmi_stack_correct: pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL + TRACE_IRQS_OFF xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi @@ -889,6 +890,7 @@ nmi_espfix_stack: pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL + TRACE_IRQS_OFF FIXUP_ESPFIX_STACK # %eax == %esp xorl %edx,%edx # zero error code call do_nmi -- cgit v1.2.3 From a790392faa3a6138b6e90d0fe320a2829652ce22 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:17 +0200 Subject: i386: add TRACE_IRQS_OFF for the exception 3 (int3) At this point interrupts are off, so let's inform the tracing code of that fact before calling into C. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2abdc9a9f7b..b21fbfaffe3 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -921,6 +921,7 @@ KPROBE_ENTRY(int3) pushl $-1 # mark this as an int CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL + TRACE_IRQS_OFF xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_int3 -- cgit v1.2.3 From 07bb2f6236f11169fbd8a8916b16715b25fea9b6 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 9 Sep 2008 21:56:18 +0200 Subject: i386: trace_hardirqs_fixup should now not be necessary: irqs are off. The exception handlers in entry_32.S should now all call TRACE_IRQS_OFF before calling the C code. The calls to trace_hardirqs_fixup should now be unnecessary. Remove them. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 8fa8485b49c..ab559365089 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -533,7 +533,6 @@ vm86_trap: #define DO_ERROR(trapnr, signr, str, name) \ void do_##name(struct pt_regs *regs, long error_code) \ { \ - trace_hardirqs_fixup(); \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ @@ -576,7 +575,6 @@ void do_##name(struct pt_regs *regs, long error_code) \ info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ - trace_hardirqs_fixup(); \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ @@ -860,15 +858,9 @@ void restart_nmi(void) void __kprobes do_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_KPROBES - trace_hardirqs_fixup(); - if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) return; - /* - * This is an interrupt gate, because kprobes wants interrupts - * disabled. Normal trap handlers don't. - */ conditional_sti(regs); #else if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) @@ -907,8 +899,6 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) unsigned int condition; int si_code; - trace_hardirqs_fixup(); - get_debugreg(condition, 6); /* -- cgit v1.2.3 From 88b4c146961f4178f38a8c3e6e54709fa39a3f36 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 7 Sep 2008 15:21:16 -0700 Subject: x86: use early_memremap() in setup.c The remappings in setup.c are all just ordinary memory, so use early_memremap() rather than early_ioremap(). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 21b8e0a5978..de6a9d6d599 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -302,7 +302,7 @@ static void __init relocate_initrd(void) if (clen > MAX_MAP_CHUNK-slop) clen = MAX_MAP_CHUNK-slop; mapaddr = ramdisk_image & PAGE_MASK; - p = early_ioremap(mapaddr, clen+slop); + p = early_memremap(mapaddr, clen+slop); memcpy(q, p+slop, clen); early_iounmap(p, clen+slop); q += clen; @@ -379,7 +379,7 @@ static void __init parse_setup_data(void) return; pa_data = boot_params.hdr.setup_data; while (pa_data) { - data = early_ioremap(pa_data, PAGE_SIZE); + data = early_memremap(pa_data, PAGE_SIZE); switch (data->type) { case SETUP_E820_EXT: parse_e820_ext(data, pa_data); @@ -402,7 +402,7 @@ static void __init e820_reserve_setup_data(void) return; pa_data = boot_params.hdr.setup_data; while (pa_data) { - data = early_ioremap(pa_data, sizeof(*data)); + data = early_memremap(pa_data, sizeof(*data)); e820_update_range(pa_data, sizeof(*data)+data->len, E820_RAM, E820_RESERVED_KERN); found = 1; @@ -428,7 +428,7 @@ static void __init reserve_early_setup_data(void) return; pa_data = boot_params.hdr.setup_data; while (pa_data) { - data = early_ioremap(pa_data, sizeof(*data)); + data = early_memremap(pa_data, sizeof(*data)); sprintf(buf, "setup data %x", data->type); reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); pa_data = data->next; -- cgit v1.2.3 From a73aaedd95703bd49f4c3f9df06fb7b7373ba905 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 14 Sep 2008 02:33:14 -0700 Subject: x86: check dsdt before find oem table for es7000, v2 v2: use __acpi_unmap_table() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 849e5cd485b..f454c78fcef 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -109,6 +109,7 @@ struct oem_table { }; extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); #endif struct mip_reg { @@ -243,21 +244,38 @@ parse_unisys_oem (char *oemptr) } #ifdef CONFIG_ACPI -int __init -find_unisys_acpi_oem_table(unsigned long *oem_addr) +static unsigned long oem_addrX; +static unsigned long oem_size; +int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) { struct acpi_table_header *header = NULL; int i = 0; - while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { + acpi_size tbl_size; + + while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) { if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { struct oem_table *t = (struct oem_table *)header; - *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr, - t->OEMTableSize); + + oem_addrX = t->OEMTableAddr; + oem_size = t->OEMTableSize; + early_acpi_os_unmap_memory(header, tbl_size); + + *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, + oem_size); return 0; } + early_acpi_os_unmap_memory(header, tbl_size); } return -1; } + +void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) +{ + if (!oem_addr) + return; + + __acpi_unmap_table((char *)oem_addr, oem_size); +} #endif static void -- cgit v1.2.3 From 3e6de5a393661c5cdabe44115e93bcbde6a742fc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Sep 2008 08:26:15 +0200 Subject: x86: print out EBDA/lowmem address it's useful for debugging purposes to know the location of the EBDA. Signed-off-by: Ingo Molnar --- arch/x86/kernel/head.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 3e66bd364a9..1dcb0f13897 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c @@ -35,6 +35,7 @@ void __init reserve_ebda_region(void) /* start of EBDA area */ ebda_addr = get_bios_ebda(); + printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem); /* Fixup: bios puts an EBDA in the top 64K segment */ /* of conventional memory, but does not adjust lowmem. */ -- cgit v1.2.3 From 59ef48a58e59cc27255d526ae3fa60ddcd977208 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 14 Sep 2008 21:58:49 +0400 Subject: x86: smpboot - check if we have ESR register in wakeup_secondary_cpu We should check if we have ESR register before reading from it. Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu Cc: "Maciej W. Rozycki" Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b700c9a1064..a778e221ccf 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -599,10 +599,12 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) * Give the other CPU some time to accept the IPI. */ udelay(200); - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - accept_status = (apic_read(APIC_ESR) & 0xEF); + if (APIC_INTEGRATED(apic_version[phys_apicid])) { + maxlvt = lapic_get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + accept_status = (apic_read(APIC_ESR) & 0xEF); + } pr_debug("NMI sent.\n"); if (send_status) -- cgit v1.2.3 From bd32a8cfa8f172bd943655a3663d8005e5c1d83c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 19 Sep 2008 18:41:16 -0700 Subject: x86: cpu don't print duplicated vendor string Some CPUs have vendor string in the middle of model_id instead of beginning Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fb789dd9e69..088fbdb6734 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -797,7 +797,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) else if (c->cpuid_level >= 0) vendor = c->x86_vendor_id; - if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) + if (vendor && !strstr(c->x86_model_id, vendor)) printk(KERN_CONT "%s ", vendor); if (c->x86_model_id[0]) -- cgit v1.2.3 From cf4cfb225ab2b48611cb4f9e8db23c87486416d6 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 11 Jul 2008 13:45:46 -0300 Subject: x86: use user_mode macro Instead of using SEGMENT_IS_KERNEL_CODE, use the "user_mode" macro, which can play the same role. Delete the former, since it now lacks any user. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/time_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index bbecf8b6bf9..8abcb7b0869 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -47,7 +47,7 @@ unsigned long profile_pc(struct pt_regs *regs) unsigned long pc = instruction_pointer(regs); #ifdef CONFIG_SMP - if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->cs) && + if (!v8086_mode(regs) && !user_mode(regs) && in_lock_functions(pc)) { #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->bp + 4); -- cgit v1.2.3 From 2c44e66843cd50c5ef4f4271fbd63a4f4bf4d083 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 11 Jul 2008 13:48:07 -0300 Subject: x86: coalesce tests Coalesce v8086_mode and user_mode into a single user_mode_vm() test. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/time_32.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 8abcb7b0869..ca0a4aab12c 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -47,8 +47,7 @@ unsigned long profile_pc(struct pt_regs *regs) unsigned long pc = instruction_pointer(regs); #ifdef CONFIG_SMP - if (!v8086_mode(regs) && !user_mode(regs) && - in_lock_functions(pc)) { + if (!user_mode_vm(regs) && in_lock_functions(pc)) { #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->bp + 4); #else -- cgit v1.2.3 From 097a0788df71b0f3328c70ab5f4e41c27ee66817 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 14 Aug 2008 17:33:12 -0300 Subject: x86: set bp field in pt_regs properly Save rbp twice: One is for marking the stack frame, as usual (already there), and the other, to fill pt_regs properly. This is because bx comes right before the last saved register in that structure, and not bp. If the base pointer were in the place bx is today, this would not be needed. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index cf3a0b2d005..25bb3f9b255 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -667,6 +667,13 @@ END(stub_rt_sigreturn) SAVE_ARGS leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler pushq %rbp + /* + * Save rbp twice: One is for marking the stack frame, as usual, and the + * other, to fill pt_regs properly. This is because bx comes right + * before the last saved register in that structure, and not bp. If the + * base pointer were in the place bx is today, this would not be needed. + */ + movq %rbp, -8(%rsp) CFI_ADJUST_CFA_OFFSET 8 CFI_REL_OFFSET rbp, 0 movq %rsp,%rbp -- cgit v1.2.3 From 3927fa9e4b5d5f346d12aa0531744daef106ebd3 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 11 Jul 2008 13:53:43 -0300 Subject: x86: use frame pointer information on x86_64 profile_pc Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/time_64.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index e3d49c553af..7fd995edb76 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -34,11 +34,15 @@ unsigned long profile_pc(struct pt_regs *regs) of flags from PUSHF Eflags always has bits 22 and up cleared unlike kernel addresses. */ if (!user_mode(regs) && in_lock_functions(pc)) { +#ifdef CONFIG_FRAME_POINTER + return *(unsigned long *)(regs->bp + sizeof(long)); +#else unsigned long *sp = (unsigned long *)regs->sp; if (sp[0] >> 22) return sp[0]; if (sp[1] >> 22) return sp[1]; +#endif } return pc; } -- cgit v1.2.3 From 8de0b8a7eaf274d197698b035090eeb805f62de6 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 11 Jul 2008 14:10:13 -0300 Subject: x86: use user_mode_vm instead of user_mode For x86_64, it does not really matter. But makes the code equal to i386. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/time_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 7fd995edb76..0469243ae1b 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -33,7 +33,7 @@ unsigned long profile_pc(struct pt_regs *regs) /* Assume the lock function has either no stack frame or a copy of flags from PUSHF Eflags always has bits 22 and up cleared unlike kernel addresses. */ - if (!user_mode(regs) && in_lock_functions(pc)) { + if (!user_mode_vm(regs) && in_lock_functions(pc)) { #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->bp + sizeof(long)); #else -- cgit v1.2.3 From 2ff298372d03b01c9ca8738ee2791227a81928e2 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 11 Jul 2008 14:21:29 -0300 Subject: x86: bind irq0 irq data to cpu0 Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/time_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 0469243ae1b..841938297c2 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -115,6 +115,7 @@ void __init hpet_time_init(void) if (!hpet_enable()) setup_pit_timer(); + irq0.mask = cpumask_of_cpu(0); setup_irq(0, &irq0); } -- cgit v1.2.3 From 2f97435e57926a5cdc104fe5a7c64932cb62cdda Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 11 Jul 2008 15:08:39 -0300 Subject: x86: factor out irq initialization for x86_64 Provide apic_intr_init and smp_intr_init functions. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/irqinit_64.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 1f26fd9ec4f..18968a78ab3 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -164,22 +164,8 @@ static void __init init_ISA_irqs (void) void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); -void __init native_init_IRQ(void) +void __init smp_intr_init(void) { - int i; - - init_ISA_irqs(); - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (vector != IA32_SYSCALL_VECTOR) - set_intr_gate(vector, interrupt[i]); - } - #ifdef CONFIG_SMP /* * The reschedule interrupt is a CPU-to-CPU reschedule-helper @@ -207,6 +193,14 @@ void __init native_init_IRQ(void) /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); #endif +} + +void __init apic_intr_init(void) +{ +#ifdef CONFIG_SMP + smp_intr_init(); +#endif + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); @@ -216,6 +210,25 @@ void __init native_init_IRQ(void) /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); +} + +void __init native_init_IRQ(void) +{ + int i; + + init_ISA_irqs(); + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (vector != IA32_SYSCALL_VECTOR) + set_intr_gate(vector, interrupt[i]); + } + + apic_intr_init(); if (!acpi_ioapic) setup_irq(2, &irq2); -- cgit v1.2.3 From 780209af714ba733cd9e2f3526107becfc1969bc Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 11 Jul 2008 15:12:39 -0300 Subject: x86: make init_ISA_irqs nonstatic Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/irqinit_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 18968a78ab3..b01d9549b3d 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -135,7 +135,7 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; -static void __init init_ISA_irqs (void) +void __init init_ISA_irqs(void) { int i; -- cgit v1.2.3 From 461ebd109569d666d367f774f74002add6444d49 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 11 Jul 2008 15:25:13 -0300 Subject: x86: rename timer_event_interrupt to timer_interrupt Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/time_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 841938297c2..2be67c24909 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -48,7 +48,7 @@ unsigned long profile_pc(struct pt_regs *regs) } EXPORT_SYMBOL(profile_pc); -static irqreturn_t timer_event_interrupt(int irq, void *dev_id) +irqreturn_t timer_interrupt(int irq, void *dev_id) { add_pda(irq0_irqs, 1); @@ -104,7 +104,7 @@ unsigned long __init calibrate_cpu(void) } static struct irqaction irq0 = { - .handler = timer_event_interrupt, + .handler = timer_interrupt, .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, .mask = CPU_MASK_NONE, .name = "timer" -- cgit v1.2.3 From 2c460d0b6813a5d2a7d571b0b561e4e727036dd7 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 11 Jul 2008 16:06:40 -0300 Subject: x86: replace hardcoded number Replace "4" in time_32.c code by sizeof(long). This way, it can work on x86_64 too. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/time_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index ca0a4aab12c..7215bc6adfc 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -49,7 +49,7 @@ unsigned long profile_pc(struct pt_regs *regs) #ifdef CONFIG_SMP if (!user_mode_vm(regs) && in_lock_functions(pc)) { #ifdef CONFIG_FRAME_POINTER - return *(unsigned long *)(regs->bp + 4); + return *(unsigned long *)(regs->bp + sizeof(long)); #else unsigned long *sp = (unsigned long *)®s->sp; -- cgit v1.2.3 From 33c053d0aec344c8b2ad6966d904ebeff64e590b Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 21 Jul 2008 18:42:52 -0300 Subject: x86: wrap MCA_bus test around an ifdef Only test for MCA_bus if support for MCA is compiled in. Also, for x86_64, write the code inside the conditional for consistency with i386. It won't bite us, since it'll probably never select CONFIG_MCA anyway. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/time_32.c | 2 ++ arch/x86/kernel/time_64.c | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 7215bc6adfc..77b400f06ea 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -94,6 +94,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) do_timer_interrupt_hook(); +#ifdef CONFIG_MCA if (MCA_bus) { /* The PS/2 uses level-triggered interrupts. You can't turn them off, nor would you want to (any attempt to @@ -107,6 +108,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) u8 irq_v = inb_p( 0x61 ); /* read the current state */ outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ } +#endif return IRQ_HANDLED; } diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 2be67c24909..207a7a1d7ac 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -54,6 +55,13 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) global_clock_event->event_handler(global_clock_event); +#ifdef CONFIG_MCA + if (MCA_bus) { + u8 irq_v = inb_p(0x61); /* read the current state */ + outb_p(irq_v|0x80, 0x61); /* reset the IRQ */ + } +#endif + return IRQ_HANDLED; } -- cgit v1.2.3 From e04d645f326bc8e591bc4ae21c4ab535987a17c1 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 22 Sep 2008 14:35:08 -0300 Subject: x86: move vgetcpu mode probing to cpu detection Take it out of time initialization and move it to cpu detection time. Signed-off-by: Glauber Costa Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 12 ++++++++++++ arch/x86/kernel/time_64.c | 4 ---- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 088fbdb6734..f1af7185191 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -719,12 +719,24 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) #endif } +#ifdef CONFIG_X86_64 +static void vgetcpu_set_mode(void) +{ + if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) + vgetcpu_mode = VGETCPU_RDTSCP; + else + vgetcpu_mode = VGETCPU_LSL; +} +#endif + void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); +#else + vgetcpu_set_mode(); #endif } diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 207a7a1d7ac..cb19d650c21 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -130,10 +130,6 @@ void __init hpet_time_init(void) void __init time_init(void) { tsc_init(); - if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) - vgetcpu_mode = VGETCPU_RDTSCP; - else - vgetcpu_mode = VGETCPU_LSL; late_time_init = choose_time_init(); } -- cgit v1.2.3 From 2e42060c19cb79adacc48beb5e9ec5361df976a2 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 23 Sep 2008 15:37:13 -0500 Subject: x86, uv: add early detection of UV system types Portions of the ACPI code needs to know if a system is a UV system prior to genapic initialization. This patch adds a call early_acpi_boot_init() so that the apic type is discovered earlier. V2 of the patch adding fixes from Yinghai Lu. Much cleaner and smaller. Signed-off-by: Jack Steiner Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index de6a9d6d599..2255782e8d4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -998,6 +998,8 @@ void __init setup_arch(char **cmdline_p) */ acpi_boot_table_init(); + early_acpi_boot_init(); + #ifdef CONFIG_ACPI_NUMA /* * Parse SRAT to discover nodes. -- cgit v1.2.3 From 6b11d4ef3e4d6ce83fb177aa50fdf385abb17d1a Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 26 Sep 2008 14:03:02 +0200 Subject: traps: x86_64: add TRACE_IRQS_OFF in error_entry Add TRACE_IRQS_OFF just before entering the C code. All exceptions are taken via interrupt gates. If irq tracing is enabled, it should be notified as soon as possible. Interrupts are only (conditionally) re-enabled in C code. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 25bb3f9b255..963b35b81bc 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1065,7 +1065,8 @@ KPROBE_ENTRY(error_entry) je error_kernelspace error_swapgs: SWAPGS -error_sti: +error_sti: + TRACE_IRQS_OFF movq %rdi,RDI(%rsp) CFI_REL_OFFSET rdi,RDI movq %rsp,%rdi -- cgit v1.2.3 From 7e61a7932495e37685e95ec9a59ad08810dec959 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 26 Sep 2008 14:03:03 +0200 Subject: traps: x86_64: add TRACE_IRQS_OFF in paranoidentry macro Add TRACE_IRQS_OFF just before entering the C code. All exceptions are taken via interrupt gates. If irq tracing is enabled, it should be notified as soon as possible. Interrupts are only (conditionally) re-enabled in C code. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 963b35b81bc..977c8ea6602 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -939,6 +939,9 @@ END(spurious_interrupt) .if \ist movq %gs:pda_data_offset, %rbp .endif + .if \irqtrace + TRACE_IRQS_OFF + .endif movq %rsp,%rdi movq ORIG_RAX(%rsp),%rsi movq $-1,ORIG_RAX(%rsp) -- cgit v1.2.3 From 4b986a365253b57d8ab4ed7b796ba0893ff4c05c Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 26 Sep 2008 14:03:04 +0200 Subject: traps: x86_64: remove trace_hardirqs_fixup from DO_ERROR_INFO macro All exceptions are taken via interrupt gates. TRACE_IRQS_OFF is called just before entering the C code, so the irq state is known to the irq tracer at this point. No need to call trace_hardirqs_fixup. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 9c0ac0cab01..9f487f374c7 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -658,7 +658,6 @@ asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ - trace_hardirqs_fixup(); \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ -- cgit v1.2.3 From 8b1c870f19849235b8c7e4dfe2ec6b0d691fa9d7 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 26 Sep 2008 14:03:05 +0200 Subject: traps: x86_64: remove trace_hardirqs_fixup from int3 handler All exceptions are taken via interrupt gates. TRACE_IRQS_OFF is called just before entering the C code, so the irq state is known to the irq tracer at this point. No need to call trace_hardirqs_fixup. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 9f487f374c7..7d56c875dd0 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -861,8 +861,6 @@ void restart_nmi(void) /* runs on IST stack. */ asmlinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) { - trace_hardirqs_fixup(); - if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) return; -- cgit v1.2.3 From a491503e4d0cb739f409069826e2746e38826099 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 26 Sep 2008 14:03:06 +0200 Subject: traps: x86_64: remove trace_hardirqs_fixup from debug handler All exceptions are taken via interrupt gates. TRACE_IRQS_OFF is called just before entering the C code, so the irq state is known to the irq tracer at this point. No need to call trace_hardirqs_fixup. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 7d56c875dd0..7d59559c2df 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -899,8 +899,6 @@ asmlinkage void __kprobes do_debug(struct pt_regs *regs, unsigned long condition; siginfo_t info; - trace_hardirqs_fixup(); - get_debugreg(condition, 6); /* -- cgit v1.2.3 From 3c1326f8a6d8b9815ca88c95441330f96eef7352 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 26 Sep 2008 14:03:08 +0200 Subject: traps: i386: make do_trap more like x86_64 This patch hardcodes which traps should be forwarded to handle_vm86_trap in do_trap. This allows to remove the vm86 parameter from the i386-version of do_trap, which makes the DO_VM86_ERROR and DO_VM86_ERROR_INFO macros unnecessary. x86_64 part is whitespace only. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 55 ++++++++++++++-------------------------------- arch/x86/kernel/traps_64.c | 2 +- 2 files changed, 17 insertions(+), 40 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index ab559365089..bf4d71231d4 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -482,13 +482,17 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err) } static void __kprobes -do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs, +do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, long error_code, siginfo_t *info) { struct task_struct *tsk = current; if (regs->flags & X86_VM_MASK) { - if (vm86) + /* + * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. + * On nmi (interrupt 2), do_trap should not be called. + */ + if (trapnr < 6) goto vm86_trap; goto trap_signal; } @@ -537,37 +541,10 @@ void do_##name(struct pt_regs *regs, long error_code) \ == NOTIFY_STOP) \ return; \ conditional_sti(regs); \ - do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ -} - -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ -void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - siginfo_t info; \ - if (irq) \ - local_irq_enable(); \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void __user *)siaddr; \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ -} - -#define DO_VM86_ERROR(trapnr, signr, str, name) \ -void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ + do_trap(trapnr, signr, str, regs, error_code, NULL); \ } -#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ void do_##name(struct pt_regs *regs, long error_code) \ { \ siginfo_t info; \ @@ -579,18 +556,18 @@ void do_##name(struct pt_regs *regs, long error_code) \ == NOTIFY_STOP) \ return; \ conditional_sti(regs); \ - do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ + do_trap(trapnr, signr, str, regs, error_code, &info); \ } -DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) -DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) -DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) +DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) +DO_ERROR(4, SIGSEGV, "overflow", overflow) +DO_ERROR(5, SIGSEGV, "bounds", bounds) +DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) void __kprobes do_general_protection(struct pt_regs *regs, long error_code) @@ -868,7 +845,7 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code) return; #endif - do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); + do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); } /* @@ -1214,7 +1191,7 @@ void do_iret_error(struct pt_regs *regs, long error_code) if (notify_die(DIE_TRAP, "iret exception", regs, error_code, 32, SIGILL) == NOTIFY_STOP) return; - do_trap(32, SIGILL, "iret exception", 0, regs, error_code, &info); + do_trap(32, SIGILL, "iret exception", regs, error_code, &info); } void __init trap_init(void) diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 7d59559c2df..1efd1ea6466 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -640,7 +640,7 @@ kernel_trap: return; } -#define DO_ERROR(trapnr, signr, str, name) \ +#define DO_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ -- cgit v1.2.3 From 9b658f6f8bd30b91aec527325e398771511ea61b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 25 Sep 2008 22:22:12 -0700 Subject: x86: cleanup, remove extra ifdef also change two functions to static. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/irqinit_64.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index b01d9549b3d..5b5be9d43c2 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -164,7 +164,7 @@ void __init init_ISA_irqs(void) void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); -void __init smp_intr_init(void) +static void __init smp_intr_init(void) { #ifdef CONFIG_SMP /* @@ -195,11 +195,9 @@ void __init smp_intr_init(void) #endif } -void __init apic_intr_init(void) +static void __init apic_intr_init(void) { -#ifdef CONFIG_SMP smp_intr_init(); -#endif alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); -- cgit v1.2.3 From d2f904bb9a1ba88a58a03612abd8c6c54bdaf73a Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 25 Sep 2008 07:52:10 -0500 Subject: x86, uv: fix for size of hub mappings Fix the size of the mappings of UV hub registers. Size must be a function of the maximum node number within the SSI. Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index ae2ffc8a400..8cf4ec2a37d 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -286,12 +286,13 @@ static __init void map_low_mmrs(void) enum map_type {map_wb, map_uc}; -static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type) +static __init void map_high(char *id, unsigned long base, int shift, + int max_pnode, enum map_type map_type) { unsigned long bytes, paddr; paddr = base << shift; - bytes = (1UL << shift); + bytes = (1UL << shift) * (max_pnode + 1); printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes); if (map_type == map_uc) @@ -307,7 +308,7 @@ static __init void map_gru_high(int max_pnode) gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); if (gru.s.enable) - map_high("GRU", gru.s.base, shift, map_wb); + map_high("GRU", gru.s.base, shift, max_pnode, map_wb); } static __init void map_config_high(int max_pnode) @@ -317,7 +318,7 @@ static __init void map_config_high(int max_pnode) cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); if (cfg.s.enable) - map_high("CONFIG", cfg.s.base, shift, map_uc); + map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc); } static __init void map_mmr_high(int max_pnode) @@ -327,7 +328,7 @@ static __init void map_mmr_high(int max_pnode) mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); if (mmr.s.enable) - map_high("MMR", mmr.s.base, shift, map_uc); + map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); } static __init void map_mmioh_high(int max_pnode) @@ -337,7 +338,7 @@ static __init void map_mmioh_high(int max_pnode) mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); if (mmioh.s.enable) - map_high("MMIOH", mmioh.s.base, shift, map_uc); + map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); } static __init void uv_rtc_init(void) -- cgit v1.2.3 From 14adf855baefad5ac3b545be23a64e6b61d6b74a Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Mon, 29 Sep 2008 18:29:42 -0400 Subject: x86: move prefill_possible_map calling early, fix, V2 Commit 4a701737 ("x86: move prefill_possible_map calling early, fix") is the wrong fix: prefill_possible_map() needs to be available even when CONFIG_HOTPLUG_CPU is not set. A followon patch will do that. Fix this correctly by making prefill_possible_map() available even when CONFIG_HOTPLUG_CPU is not set. The function is needed so that the number of possible CPUs can be determined. Tested on uniprocessor machine with CPU hotplug disabled. From boot log: Before: NR_CPUS: 512, nr_cpu_ids: 512, nr_node_ids 1 After: NR_CPUS: 512, nr_cpu_ids: 1, nr_node_ids 1 Signed-off-by: Chuck Ebbert Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 62 +++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 31 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a778e221ccf..23913785c26 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1261,39 +1261,8 @@ void __init native_smp_cpus_done(unsigned int max_cpus) check_nmi_watchdog(); } -#ifdef CONFIG_HOTPLUG_CPU - -static void remove_siblinginfo(int cpu) -{ - int sibling; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) { - cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); - /*/ - * last thread sibling in this cpu core going down - */ - if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) - cpu_data(sibling).booted_cores--; - } - - for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu)) - cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); - cpus_clear(per_cpu(cpu_sibling_map, cpu)); - cpus_clear(per_cpu(cpu_core_map, cpu)); - c->phys_proc_id = 0; - c->cpu_core_id = 0; - cpu_clear(cpu, cpu_sibling_setup_map); -} - static int additional_cpus __initdata = -1; -static __init int setup_additional_cpus(char *s) -{ - return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL; -} -early_param("additional_cpus", setup_additional_cpus); - /* * cpu_possible_map should be static, it cannot change as cpu's * are onlined, or offlined. The reason is per-cpu data-structures @@ -1340,6 +1309,37 @@ __init void prefill_possible_map(void) nr_cpu_ids = possible; } +#ifdef CONFIG_HOTPLUG_CPU + +static void remove_siblinginfo(int cpu) +{ + int sibling; + struct cpuinfo_x86 *c = &cpu_data(cpu); + + for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) { + cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); + /*/ + * last thread sibling in this cpu core going down + */ + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) + cpu_data(sibling).booted_cores--; + } + + for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu)) + cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); + c->phys_proc_id = 0; + c->cpu_core_id = 0; + cpu_clear(cpu, cpu_sibling_setup_map); +} + +static __init int setup_additional_cpus(char *s) +{ + return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL; +} +early_param("additional_cpus", setup_additional_cpus); + static void __ref remove_cpu_from_maps(int cpu) { cpu_clear(cpu, cpu_online_map); -- cgit v1.2.3 From 1e0b5d00b230ceffe1bb33284b46b8572e418423 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Mon, 29 Sep 2008 08:45:29 -0500 Subject: x86, UV: new UV genapic functions for x2apic Add functions that use the infrastructure added by the x2apic code. These functions were originally stubbed out since the UV code went into the tree prior to the x2apic code. Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 8cf4ec2a37d..33581d94a90 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -114,7 +114,7 @@ static void uv_send_IPI_one(int cpu, int vector) unsigned long val, apicid, lapicid; int pnode; - apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ + apicid = per_cpu(x86_cpu_to_apicid, cpu); lapicid = apicid & 0x3f; /* ZZZ macro needed */ pnode = uv_apicid_to_pnode(apicid); val = @@ -202,12 +202,10 @@ static unsigned int phys_pkg_id(int index_msb) return uv_read_apic_id() >> index_msb; } -#ifdef ZZZ /* Needs x2apic patch */ static void uv_send_IPI_self(int vector) { apic_write(APIC_SELF_IPI, vector); } -#endif struct genapic apic_x2apic_uv_x = { .name = "UV large system", @@ -215,15 +213,15 @@ struct genapic apic_x2apic_uv_x = { .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = uv_target_cpus, - .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ + .vector_allocation_domain = uv_vector_allocation_domain, .apic_id_registered = uv_apic_id_registered, .init_apic_ldr = uv_init_apic_ldr, .send_IPI_all = uv_send_IPI_all, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_mask = uv_send_IPI_mask, - /* ZZZ.send_IPI_self = uv_send_IPI_self, */ + .send_IPI_self = uv_send_IPI_self, .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, - .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ + .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = (0xFFFFFFFFu), -- cgit v1.2.3 From e2ce07c8042975e52df4cec1f41faf15b83f2e42 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 3 Apr 2008 16:40:48 +0300 Subject: x86: __show_registers() and __show_regs() API unification Currently the low-level function to dump user-passed registers on i386 is called __show_registers() whereas on x86-64 it's called __show_regs(). Unify the API to simplify porting of kmemcheck to x86-64. Signed-off-by: Pekka Enberg Acked-by: Vegard Nossum Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 4 ++-- arch/x86/kernel/process_64.c | 7 +++++-- arch/x86/kernel/traps_32.c | 2 +- arch/x86/kernel/traps_64.c | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 922c14058f9..0a1302fe6d4 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -123,7 +123,7 @@ void cpu_idle(void) } } -void __show_registers(struct pt_regs *regs, int all) +void __show_regs(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long d0, d1, d2, d3, d6, d7; @@ -189,7 +189,7 @@ void __show_registers(struct pt_regs *regs, int all) void show_regs(struct pt_regs *regs) { - __show_registers(regs, 1); + __show_regs(regs, 1); show_trace(NULL, regs, ®s->sp, regs->bp); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ca80394ef5b..cd8c0ed02b7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -136,7 +136,7 @@ void cpu_idle(void) } /* Prints also some state that isn't saved in the pt_regs */ -void __show_regs(struct pt_regs *regs) +void __show_regs(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; unsigned long d0, d1, d2, d3, d6, d7; @@ -175,6 +175,9 @@ void __show_regs(struct pt_regs *regs) rdmsrl(MSR_GS_BASE, gs); rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); + if (!all) + return; + cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); @@ -200,7 +203,7 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs *regs) { printk(KERN_INFO "CPU %d:", smp_processor_id()); - __show_regs(regs); + __show_regs(regs, 1); show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index bf4d71231d4..4f0831593e3 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -319,7 +319,7 @@ void show_registers(struct pt_regs *regs) int i; print_modules(); - __show_registers(regs, 0); + __show_regs(regs, 0); printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", TASK_COMM_LEN, current->comm, task_pid_nr(current), diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 1efd1ea6466..729157ee4c1 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -430,7 +430,7 @@ void show_registers(struct pt_regs *regs) sp = regs->sp; printk("CPU %d ", cpu); - __show_regs(regs); + __show_regs(regs, 1); printk("Process %s (pid: %d, threadinfo %p, task %p)\n", cur->comm, cur->pid, task_thread_info(cur), cur); -- cgit v1.2.3 From 94f6bac1058fd59a8bd472d18c4b77f220d930b0 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Tue, 30 Sep 2008 23:17:51 +0200 Subject: x86: do not allow to optimize flag_is_changeable_p() (rev. 2) The flag_is_changeable_p() is used by has_cpuid_p() which can return different results in the code sequence below: if (!have_cpuid_p()) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ if (!have_cpuid_p()) return; Otherwise, the gcc 3.4.6 optimizes these two calls into one which make the code not working correctly. Cyrix cpus have the CPUID instruction enabled before the second call to the have_cpuid_p() but it is not detected due to the gcc optimization. Thus the ARR registers (mtrr like) are not detected on such a cpu. Signed-off-by: Krzysztof Helt Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f1af7185191..25581dcb280 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -124,18 +124,25 @@ static inline int flag_is_changeable_p(u32 flag) { u32 f1, f2; - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); + /* + * Cyrix and IDT cpus allow disabling of CPUID + * so the code below may return different results + * when it is executed before and after enabling + * the CPUID. Add "volatile" to not allow gcc to + * optimize the subsequent calls to this function. + */ + asm volatile ("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); return ((f1^f2) & flag) != 0; } -- cgit v1.2.3 From 7f2f49a58283110083a7358d2d98025a11653373 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Thu, 2 Oct 2008 15:30:07 -0400 Subject: x86: allow number of additional hotplug CPUs to be set at compile time, V2 x86: allow number of additional hotplug CPUs to be set at compile time, V2 The default number of additional CPU IDs for hotplugging is determined by asking ACPI or mptables how many "disabled" CPUs there are in the system, but many systems get this wrong so that e.g. a uniprocessor machine gets an extra CPU allocated and never switches to single CPU mode. And sometimes CPU hotplugging is enabled only for suspend/hibernate anyway, so the additional CPU IDs are not wanted. Allow the number to be set to zero at compile time. Also, force the number of extra CPUs to zero if hotplugging is disabled which allows removing some conditional code. Tested on uniprocessor x86_64 that ACPI claims has a disabled processor, with CPU hotplugging configured. ("After" has the number of additional CPUs set to 0) Before: NR_CPUS: 512, nr_cpu_ids: 2, nr_node_ids 1 After: NR_CPUS: 512, nr_cpu_ids: 1, nr_node_ids 1 [Changed the name of the option and the prompt according to Ingo's suggestion.] Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 23913785c26..857a88bb919 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1261,7 +1261,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) check_nmi_watchdog(); } -static int additional_cpus __initdata = -1; +static int additional_cpus __initdata = CONFIG_HOTPLUG_ADDITIONAL_CPUS; /* * cpu_possible_map should be static, it cannot change as cpu's -- cgit v1.2.3 From af5c2bd16ac2e5688c3bf46ea1f95112d696d294 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 3 Oct 2008 17:54:25 +0200 Subject: x86: fix virt_addr_valid() with CONFIG_DEBUG_VIRTUAL=y, v2 virt_addr_valid() calls __pa(), which calls __phys_addr(). With CONFIG_DEBUG_VIRTUAL=y, __phys_addr() will kill the kernel if the address *isn't* valid. That's clearly wrong for virt_addr_valid(). We also incorporate the debugging checks into virt_addr_valid(). Signed-off-by: Vegard Nossum Acked-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/kernel/doublefault_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index 395acb12b0d..b4f14c6c09d 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -66,6 +66,6 @@ struct tss_struct doublefault_tss __cacheline_aligned = { .ds = __USER_DS, .fs = __KERNEL_PERCPU, - .__cr3 = __phys_addr_const((unsigned long)swapper_pg_dir) + .__cr3 = __pa_nodebug(swapper_pg_dir), } }; -- cgit v1.2.3 From 2bc5f927d489f9e47b6fa71f323b653e8ec81782 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 30 Sep 2008 13:12:14 +0200 Subject: i386: split out dumpstack code from traps_32.c The dumpstack code is logically quite independent from the hardware traps. Split it out into its own file. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/dumpstack_32.c | 422 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/traps_32.c | 404 --------------------------------------- 3 files changed, 423 insertions(+), 405 deletions(-) create mode 100644 arch/x86/kernel/dumpstack_32.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5098585f87c..d5bde5d8c2b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -28,7 +28,7 @@ obj-y += time_$(BITS).o ioport.o ldt.o obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o -obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o +obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o dumpstack_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o obj-y += bootflag.o e820.o diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c new file mode 100644 index 00000000000..c398b27df6c --- /dev/null +++ b/arch/x86/kernel/dumpstack_32.c @@ -0,0 +1,422 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +int panic_on_unrecovered_nmi; +int kstack_depth_to_print = 24; +static unsigned int code_bytes = 64; +static int die_counter; + +void printk_address(unsigned long address, int reliable) +{ +#ifdef CONFIG_KALLSYMS + unsigned long offset = 0; + unsigned long symsize; + const char *symname; + char *modname; + char *delim = ":"; + char namebuf[KSYM_NAME_LEN]; + char reliab[4] = ""; + + symname = kallsyms_lookup(address, &symsize, &offset, + &modname, namebuf); + if (!symname) { + printk(" [<%08lx>]\n", address); + return; + } + if (!reliable) + strcpy(reliab, "? "); + + if (!modname) + modname = delim = ""; + printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n", + address, reliab, delim, modname, delim, symname, offset, symsize); +#else + printk(" [<%08lx>]\n", address); +#endif +} + +static inline int valid_stack_ptr(struct thread_info *tinfo, + void *p, unsigned int size) +{ + void *t = tinfo; + return p > t && p <= t + THREAD_SIZE - size; +} + +/* The form of the top of the frame on the stack */ +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; + +static inline unsigned long +print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data) +{ + struct stack_frame *frame = (struct stack_frame *)bp; + + while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) { + unsigned long addr; + + addr = *stack; + if (__kernel_text_address(addr)) { + if ((unsigned long) stack == bp + 4) { + ops->address(data, addr, 1); + frame = frame->next_frame; + bp = (unsigned long) frame; + } else { + ops->address(data, addr, bp == 0); + } + } + stack++; + } + return bp; +} + +void dump_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data) +{ + if (!task) + task = current; + + if (!stack) { + unsigned long dummy; + stack = &dummy; + if (task != current) + stack = (unsigned long *)task->thread.sp; + } + +#ifdef CONFIG_FRAME_POINTER + if (!bp) { + if (task == current) { + /* Grab bp right from our regs */ + asm("movl %%ebp, %0" : "=r" (bp) :); + } else { + /* bp is the last reg pushed by switch_to */ + bp = *(unsigned long *) task->thread.sp; + } + } +#endif + + for (;;) { + struct thread_info *context; + + context = (struct thread_info *) + ((unsigned long)stack & (~(THREAD_SIZE - 1))); + bp = print_context_stack(context, stack, bp, ops, data); + /* + * Should be after the line below, but somewhere + * in early boot context comes out corrupted and we + * can't reference it: + */ + if (ops->stack(data, "IRQ") < 0) + break; + stack = (unsigned long *)context->previous_esp; + if (!stack) + break; + touch_nmi_watchdog(); + } +} +EXPORT_SYMBOL(dump_trace); + +static void +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + printk(data); + print_symbol(msg, symbol); + printk("\n"); +} + +static void print_trace_warning(void *data, char *msg) +{ + printk("%s%s\n", (char *)data, msg); +} + +static int print_trace_stack(void *data, char *name) +{ + return 0; +} + +/* + * Print one address/symbol entries per line. + */ +static void print_trace_address(void *data, unsigned long addr, int reliable) +{ + printk("%s [<%08lx>] ", (char *)data, addr); + if (!reliable) + printk("? "); + print_symbol("%s\n", addr); + touch_nmi_watchdog(); +} + +static const struct stacktrace_ops print_trace_ops = { + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, +}; + +static void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, char *log_lvl) +{ + dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); + printk("%s =======================\n", log_lvl); +} + +void show_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp) +{ + show_trace_log_lvl(task, regs, stack, bp, ""); +} + +static void +show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *sp, unsigned long bp, char *log_lvl) +{ + unsigned long *stack; + int i; + + if (sp == NULL) { + if (task) + sp = (unsigned long *)task->thread.sp; + else + sp = (unsigned long *)&sp; + } + + stack = sp; + for (i = 0; i < kstack_depth_to_print; i++) { + if (kstack_end(stack)) + break; + if (i && ((i % 8) == 0)) + printk("\n%s ", log_lvl); + printk("%08lx ", *stack++); + } + printk("\n%sCall Trace:\n", log_lvl); + + show_trace_log_lvl(task, regs, sp, bp, log_lvl); +} + +void show_stack(struct task_struct *task, unsigned long *sp) +{ + printk(" "); + show_stack_log_lvl(task, NULL, sp, 0, ""); +} + +/* + * The architecture-independent dump_stack generator + */ +void dump_stack(void) +{ + unsigned long bp = 0; + unsigned long stack; + +#ifdef CONFIG_FRAME_POINTER + if (!bp) + asm("movl %%ebp, %0" : "=r" (bp):); +#endif + + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + + show_trace(current, NULL, &stack, bp); +} + +EXPORT_SYMBOL(dump_stack); + +void show_registers(struct pt_regs *regs) +{ + int i; + + print_modules(); + __show_regs(regs, 0); + + printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", + TASK_COMM_LEN, current->comm, task_pid_nr(current), + current_thread_info(), current, task_thread_info(current)); + /* + * When in-kernel, we also print out the stack and code at the + * time of the fault.. + */ + if (!user_mode_vm(regs)) { + unsigned int code_prologue = code_bytes * 43 / 64; + unsigned int code_len = code_bytes; + unsigned char c; + u8 *ip; + + printk("\n" KERN_EMERG "Stack: "); + show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); + + printk(KERN_EMERG "Code: "); + + ip = (u8 *)regs->ip - code_prologue; + if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { + /* try starting at EIP */ + ip = (u8 *)regs->ip; + code_len = code_len - code_prologue + 1; + } + for (i = 0; i < code_len; i++, ip++) { + if (ip < (u8 *)PAGE_OFFSET || + probe_kernel_address(ip, c)) { + printk(" Bad EIP value."); + break; + } + if (ip == (u8 *)regs->ip) + printk("<%02x> ", c); + else + printk("%02x ", c); + } + } + printk("\n"); +} + +int is_valid_bugaddr(unsigned long ip) +{ + unsigned short ud2; + + if (ip < PAGE_OFFSET) + return 0; + if (probe_kernel_address((unsigned short *)ip, ud2)) + return 0; + + return ud2 == 0x0b0f; +} + +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; + +unsigned __kprobes long oops_begin(void) +{ + unsigned long flags; + + oops_enter(); + + if (die_owner != raw_smp_processor_id()) { + console_verbose(); + raw_local_irq_save(flags); + __raw_spin_lock(&die_lock); + die_owner = smp_processor_id(); + die_nest_count = 0; + bust_spinlocks(1); + } else { + raw_local_irq_save(flags); + } + die_nest_count++; + return flags; +} + +void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) +{ + bust_spinlocks(0); + die_owner = -1; + add_taint(TAINT_DIE); + __raw_spin_unlock(&die_lock); + raw_local_irq_restore(flags); + + if (!regs) + return; + + if (kexec_should_crash(current)) + crash_kexec(regs); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + + if (panic_on_oops) + panic("Fatal exception"); + + oops_exit(); + do_exit(signr); +} + +int __kprobes __die(const char *str, struct pt_regs *regs, long err) +{ + unsigned short ss; + unsigned long sp; + + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP "); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); +#endif + printk("\n"); + if (notify_die(DIE_OOPS, str, regs, err, + current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) + return 1; + + show_registers(regs); + /* Executive summary in case the oops scrolled away */ + sp = (unsigned long) (®s->sp); + savesegment(ss, ss); + if (user_mode(regs)) { + sp = regs->sp; + ss = regs->ss & 0xffff; + } + printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); + print_symbol("%s", regs->ip); + printk(" SS:ESP %04x:%08lx\n", ss, sp); + return 0; +} + +/* + * This is gone through when something in the kernel has done something bad + * and is about to be terminated: + */ +void die(const char *str, struct pt_regs *regs, long err) +{ + unsigned long flags = oops_begin(); + + if (die_nest_count < 3) { + report_bug(regs->ip, regs); + + if (__die(str, regs, err)) + regs = NULL; + } else { + printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); + } + + oops_end(flags, regs, SIGSEGV); +} + +static int __init kstack_setup(char *s) +{ + kstack_depth_to_print = simple_strtoul(s, NULL, 0); + + return 1; +} +__setup("kstack=", kstack_setup); + +static int __init code_bytes_setup(char *s) +{ + code_bytes = simple_strtoul(s, NULL, 0); + if (code_bytes > 8192) + code_bytes = 8192; + + return 1; +} +__setup("code_bytes=", code_bytes_setup); diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 4f0831593e3..ce1063c141f 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -80,11 +80,7 @@ char ignore_fpu_irq; gate_desc idt_table[256] __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; -int panic_on_unrecovered_nmi; -int kstack_depth_to_print = 24; -static unsigned int code_bytes = 64; static int ignore_nmis; -static int die_counter; static inline void conditional_sti(struct pt_regs *regs) { @@ -92,388 +88,6 @@ static inline void conditional_sti(struct pt_regs *regs) local_irq_enable(); } -void printk_address(unsigned long address, int reliable) -{ -#ifdef CONFIG_KALLSYMS - unsigned long offset = 0; - unsigned long symsize; - const char *symname; - char *modname; - char *delim = ":"; - char namebuf[KSYM_NAME_LEN]; - char reliab[4] = ""; - - symname = kallsyms_lookup(address, &symsize, &offset, - &modname, namebuf); - if (!symname) { - printk(" [<%08lx>]\n", address); - return; - } - if (!reliable) - strcpy(reliab, "? "); - - if (!modname) - modname = delim = ""; - printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n", - address, reliab, delim, modname, delim, symname, offset, symsize); -#else - printk(" [<%08lx>]\n", address); -#endif -} - -static inline int valid_stack_ptr(struct thread_info *tinfo, - void *p, unsigned int size) -{ - void *t = tinfo; - return p > t && p <= t + THREAD_SIZE - size; -} - -/* The form of the top of the frame on the stack */ -struct stack_frame { - struct stack_frame *next_frame; - unsigned long return_address; -}; - -static inline unsigned long -print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data) -{ - struct stack_frame *frame = (struct stack_frame *)bp; - - while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) { - unsigned long addr; - - addr = *stack; - if (__kernel_text_address(addr)) { - if ((unsigned long) stack == bp + 4) { - ops->address(data, addr, 1); - frame = frame->next_frame; - bp = (unsigned long) frame; - } else { - ops->address(data, addr, bp == 0); - } - } - stack++; - } - return bp; -} - -void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data) -{ - if (!task) - task = current; - - if (!stack) { - unsigned long dummy; - stack = &dummy; - if (task != current) - stack = (unsigned long *)task->thread.sp; - } - -#ifdef CONFIG_FRAME_POINTER - if (!bp) { - if (task == current) { - /* Grab bp right from our regs */ - asm("movl %%ebp, %0" : "=r" (bp) :); - } else { - /* bp is the last reg pushed by switch_to */ - bp = *(unsigned long *) task->thread.sp; - } - } -#endif - - for (;;) { - struct thread_info *context; - - context = (struct thread_info *) - ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, data); - /* - * Should be after the line below, but somewhere - * in early boot context comes out corrupted and we - * can't reference it: - */ - if (ops->stack(data, "IRQ") < 0) - break; - stack = (unsigned long *)context->previous_esp; - if (!stack) - break; - touch_nmi_watchdog(); - } -} -EXPORT_SYMBOL(dump_trace); - -static void -print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - printk(data); - print_symbol(msg, symbol); - printk("\n"); -} - -static void print_trace_warning(void *data, char *msg) -{ - printk("%s%s\n", (char *)data, msg); -} - -static int print_trace_stack(void *data, char *name) -{ - return 0; -} - -/* - * Print one address/symbol entries per line. - */ -static void print_trace_address(void *data, unsigned long addr, int reliable) -{ - printk("%s [<%08lx>] ", (char *)data, addr); - if (!reliable) - printk("? "); - print_symbol("%s\n", addr); - touch_nmi_watchdog(); -} - -static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, -}; - -static void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) -{ - dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); - printk("%s =======================\n", log_lvl); -} - -void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp) -{ - show_trace_log_lvl(task, regs, stack, bp, ""); -} - -static void -show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) -{ - unsigned long *stack; - int i; - - if (sp == NULL) { - if (task) - sp = (unsigned long *)task->thread.sp; - else - sp = (unsigned long *)&sp; - } - - stack = sp; - for (i = 0; i < kstack_depth_to_print; i++) { - if (kstack_end(stack)) - break; - if (i && ((i % 8) == 0)) - printk("\n%s ", log_lvl); - printk("%08lx ", *stack++); - } - printk("\n%sCall Trace:\n", log_lvl); - - show_trace_log_lvl(task, regs, sp, bp, log_lvl); -} - -void show_stack(struct task_struct *task, unsigned long *sp) -{ - printk(" "); - show_stack_log_lvl(task, NULL, sp, 0, ""); -} - -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long bp = 0; - unsigned long stack; - -#ifdef CONFIG_FRAME_POINTER - if (!bp) - asm("movl %%ebp, %0" : "=r" (bp):); -#endif - - printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - - show_trace(current, NULL, &stack, bp); -} - -EXPORT_SYMBOL(dump_stack); - -void show_registers(struct pt_regs *regs) -{ - int i; - - print_modules(); - __show_regs(regs, 0); - - printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", - TASK_COMM_LEN, current->comm, task_pid_nr(current), - current_thread_info(), current, task_thread_info(current)); - /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. - */ - if (!user_mode_vm(regs)) { - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; - unsigned char c; - u8 *ip; - - printk("\n" KERN_EMERG "Stack: "); - show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); - - printk(KERN_EMERG "Code: "); - - ip = (u8 *)regs->ip - code_prologue; - if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - /* try starting at EIP */ - ip = (u8 *)regs->ip; - code_len = code_len - code_prologue + 1; - } - for (i = 0; i < code_len; i++, ip++) { - if (ip < (u8 *)PAGE_OFFSET || - probe_kernel_address(ip, c)) { - printk(" Bad EIP value."); - break; - } - if (ip == (u8 *)regs->ip) - printk("<%02x> ", c); - else - printk("%02x ", c); - } - } - printk("\n"); -} - -int is_valid_bugaddr(unsigned long ip) -{ - unsigned short ud2; - - if (ip < PAGE_OFFSET) - return 0; - if (probe_kernel_address((unsigned short *)ip, ud2)) - return 0; - - return ud2 == 0x0b0f; -} - -static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; -static int die_owner = -1; -static unsigned int die_nest_count; - -unsigned __kprobes long oops_begin(void) -{ - unsigned long flags; - - oops_enter(); - - if (die_owner != raw_smp_processor_id()) { - console_verbose(); - raw_local_irq_save(flags); - __raw_spin_lock(&die_lock); - die_owner = smp_processor_id(); - die_nest_count = 0; - bust_spinlocks(1); - } else { - raw_local_irq_save(flags); - } - die_nest_count++; - return flags; -} - -void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) -{ - bust_spinlocks(0); - die_owner = -1; - add_taint(TAINT_DIE); - __raw_spin_unlock(&die_lock); - raw_local_irq_restore(flags); - - if (!regs) - return; - - if (kexec_should_crash(current)) - crash_kexec(regs); - - if (in_interrupt()) - panic("Fatal exception in interrupt"); - - if (panic_on_oops) - panic("Fatal exception"); - - oops_exit(); - do_exit(signr); -} - -int __kprobes __die(const char *str, struct pt_regs *regs, long err) -{ - unsigned short ss; - unsigned long sp; - - printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); -#endif - printk("\n"); - if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) - return 1; - - show_registers(regs); - /* Executive summary in case the oops scrolled away */ - sp = (unsigned long) (®s->sp); - savesegment(ss, ss); - if (user_mode(regs)) { - sp = regs->sp; - ss = regs->ss & 0xffff; - } - printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); - print_symbol("%s", regs->ip); - printk(" SS:ESP %04x:%08lx\n", ss, sp); - return 0; -} - -/* - * This is gone through when something in the kernel has done something bad - * and is about to be terminated: - */ -void die(const char *str, struct pt_regs *regs, long err) -{ - unsigned long flags = oops_begin(); - - if (die_nest_count < 3) { - report_bug(regs->ip, regs); - - if (__die(str, regs, err)) - regs = NULL; - } else { - printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); - } - - oops_end(flags, regs, SIGSEGV); -} - static inline void die_if_kernel(const char *str, struct pt_regs *regs, long err) { @@ -1256,21 +870,3 @@ void __init trap_init(void) trap_init_hook(); } - -static int __init kstack_setup(char *s) -{ - kstack_depth_to_print = simple_strtoul(s, NULL, 0); - - return 1; -} -__setup("kstack=", kstack_setup); - -static int __init code_bytes_setup(char *s) -{ - code_bytes = simple_strtoul(s, NULL, 0); - if (code_bytes > 8192) - code_bytes = 8192; - - return 1; -} -__setup("code_bytes=", code_bytes_setup); -- cgit v1.2.3 From 6fcbede3fdfbd83d8de97296286f5a9ff5a8f371 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 30 Sep 2008 13:12:15 +0200 Subject: x86_64: split out dumpstack code from traps_64.c The dumpstack code is logically quite independent from the hardware traps. Split it out into its own file. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 4 +- arch/x86/kernel/dumpstack_64.c | 565 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/traps_64.c | 545 --------------------------------------- 3 files changed, 567 insertions(+), 547 deletions(-) create mode 100644 arch/x86/kernel/dumpstack_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d5bde5d8c2b..de63fed9fae 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -23,12 +23,12 @@ CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o -obj-y += traps_$(BITS).o irq_$(BITS).o +obj-y += traps_$(BITS).o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o -obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o dumpstack_32.o +obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o obj-y += bootflag.o e820.o diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c new file mode 100644 index 00000000000..6f1505074db --- /dev/null +++ b/arch/x86/kernel/dumpstack_64.c @@ -0,0 +1,565 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +int panic_on_unrecovered_nmi; +int kstack_depth_to_print = 12; +static unsigned int code_bytes = 64; +static int die_counter; + +void printk_address(unsigned long address, int reliable) +{ + printk(" [<%016lx>] %s%pS\n", + address, reliable ? "" : "? ", (void *) address); +} + +static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, + unsigned *usedp, char **idp) +{ + static char ids[][8] = { + [DEBUG_STACK - 1] = "#DB", + [NMI_STACK - 1] = "NMI", + [DOUBLEFAULT_STACK - 1] = "#DF", + [STACKFAULT_STACK - 1] = "#SS", + [MCE_STACK - 1] = "#MC", +#if DEBUG_STKSZ > EXCEPTION_STKSZ + [N_EXCEPTION_STACKS ... + N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" +#endif + }; + unsigned k; + + /* + * Iterate over all exception stacks, and figure out whether + * 'stack' is in one of them: + */ + for (k = 0; k < N_EXCEPTION_STACKS; k++) { + unsigned long end = per_cpu(orig_ist, cpu).ist[k]; + /* + * Is 'stack' above this exception frame's end? + * If yes then skip to the next frame. + */ + if (stack >= end) + continue; + /* + * Is 'stack' above this exception frame's start address? + * If yes then we found the right frame. + */ + if (stack >= end - EXCEPTION_STKSZ) { + /* + * Make sure we only iterate through an exception + * stack once. If it comes up for the second time + * then there's something wrong going on - just + * break out and return NULL: + */ + if (*usedp & (1U << k)) + break; + *usedp |= 1U << k; + *idp = ids[k]; + return (unsigned long *)end; + } + /* + * If this is a debug stack, and if it has a larger size than + * the usual exception stacks, then 'stack' might still + * be within the lower portion of the debug stack: + */ +#if DEBUG_STKSZ > EXCEPTION_STKSZ + if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { + unsigned j = N_EXCEPTION_STACKS - 1; + + /* + * Black magic. A large debug stack is composed of + * multiple exception stack entries, which we + * iterate through now. Dont look: + */ + do { + ++j; + end -= EXCEPTION_STKSZ; + ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); + } while (stack < end - EXCEPTION_STKSZ); + if (*usedp & (1U << j)) + break; + *usedp |= 1U << j; + *idp = ids[j]; + return (unsigned long *)end; + } +#endif + } + return NULL; +} + +/* + * x86-64 can have up to three kernel stacks: + * process stack + * interrupt stack + * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack + */ + +static inline int valid_stack_ptr(struct thread_info *tinfo, + void *p, unsigned int size, void *end) +{ + void *t = tinfo; + if (end) { + if (p < end && p >= (end-THREAD_SIZE)) + return 1; + else + return 0; + } + return p > t && p < t + THREAD_SIZE - size; +} + +/* The form of the top of the frame on the stack */ +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; + +static inline unsigned long +print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end) +{ + struct stack_frame *frame = (struct stack_frame *)bp; + + while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { + unsigned long addr; + + addr = *stack; + if (__kernel_text_address(addr)) { + if ((unsigned long) stack == bp + 8) { + ops->address(data, addr, 1); + frame = frame->next_frame; + bp = (unsigned long) frame; + } else { + ops->address(data, addr, bp == 0); + } + } + stack++; + } + return bp; +} + +void dump_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data) +{ + const unsigned cpu = get_cpu(); + unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; + unsigned used = 0; + struct thread_info *tinfo; + + if (!task) + task = current; + + if (!stack) { + unsigned long dummy; + stack = &dummy; + if (task && task != current) + stack = (unsigned long *)task->thread.sp; + } + +#ifdef CONFIG_FRAME_POINTER + if (!bp) { + if (task == current) { + /* Grab bp right from our regs */ + asm("movq %%rbp, %0" : "=r" (bp) : ); + } else { + /* bp is the last reg pushed by switch_to */ + bp = *(unsigned long *) task->thread.sp; + } + } +#endif + + /* + * Print function call entries in all stacks, starting at the + * current stack address. If the stacks consist of nested + * exceptions + */ + tinfo = task_thread_info(task); + for (;;) { + char *id; + unsigned long *estack_end; + estack_end = in_exception_stack(cpu, (unsigned long)stack, + &used, &id); + + if (estack_end) { + if (ops->stack(data, id) < 0) + break; + + bp = print_context_stack(tinfo, stack, bp, ops, + data, estack_end); + ops->stack(data, ""); + /* + * We link to the next stack via the + * second-to-last pointer (index -2 to end) in the + * exception stack: + */ + stack = (unsigned long *) estack_end[-2]; + continue; + } + if (irqstack_end) { + unsigned long *irqstack; + irqstack = irqstack_end - + (IRQSTACKSIZE - 64) / sizeof(*irqstack); + + if (stack >= irqstack && stack < irqstack_end) { + if (ops->stack(data, "IRQ") < 0) + break; + bp = print_context_stack(tinfo, stack, bp, + ops, data, irqstack_end); + /* + * We link to the next stack (which would be + * the process stack normally) the last + * pointer (index -1 to end) in the IRQ stack: + */ + stack = (unsigned long *) (irqstack_end[-1]); + irqstack_end = NULL; + ops->stack(data, "EOI"); + continue; + } + } + break; + } + + /* + * This handles the process stack: + */ + bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); + put_cpu(); +} +EXPORT_SYMBOL(dump_trace); + +static void +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + print_symbol(msg, symbol); + printk("\n"); +} + +static void print_trace_warning(void *data, char *msg) +{ + printk("%s\n", msg); +} + +static int print_trace_stack(void *data, char *name) +{ + printk(" <%s> ", name); + return 0; +} + +static void print_trace_address(void *data, unsigned long addr, int reliable) +{ + touch_nmi_watchdog(); + printk_address(addr, reliable); +} + +static const struct stacktrace_ops print_trace_ops = { + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, +}; + +static void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, char *log_lvl) +{ + printk("Call Trace:\n"); + dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); +} + +void show_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp) +{ + show_trace_log_lvl(task, regs, stack, bp, ""); +} + +static void +show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *sp, unsigned long bp, char *log_lvl) +{ + unsigned long *stack; + int i; + const int cpu = smp_processor_id(); + unsigned long *irqstack_end = + (unsigned long *) (cpu_pda(cpu)->irqstackptr); + unsigned long *irqstack = + (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); + + /* + * debugging aid: "show_stack(NULL, NULL);" prints the + * back trace for this cpu. + */ + + if (sp == NULL) { + if (task) + sp = (unsigned long *)task->thread.sp; + else + sp = (unsigned long *)&sp; + } + + stack = sp; + for (i = 0; i < kstack_depth_to_print; i++) { + if (stack >= irqstack && stack <= irqstack_end) { + if (stack == irqstack_end) { + stack = (unsigned long *) (irqstack_end[-1]); + printk(" "); + } + } else { + if (((long) stack & (THREAD_SIZE-1)) == 0) + break; + } + if (i && ((i % 4) == 0)) + printk("\n"); + printk(" %016lx", *stack++); + touch_nmi_watchdog(); + } + printk("\n"); + show_trace_log_lvl(task, regs, sp, bp, log_lvl); +} + +void show_stack(struct task_struct *task, unsigned long *sp) +{ + show_stack_log_lvl(task, NULL, sp, 0, ""); +} + +/* + * The architecture-independent dump_stack generator + */ +void dump_stack(void) +{ + unsigned long bp = 0; + unsigned long stack; + +#ifdef CONFIG_FRAME_POINTER + if (!bp) + asm("movq %%rbp, %0" : "=r" (bp) : ); +#endif + + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + show_trace(NULL, NULL, &stack, bp); +} +EXPORT_SYMBOL(dump_stack); + +void show_registers(struct pt_regs *regs) +{ + int i; + unsigned long sp; + const int cpu = smp_processor_id(); + struct task_struct *cur = cpu_pda(cpu)->pcurrent; + + sp = regs->sp; + printk("CPU %d ", cpu); + __show_regs(regs, 1); + printk("Process %s (pid: %d, threadinfo %p, task %p)\n", + cur->comm, cur->pid, task_thread_info(cur), cur); + + /* + * When in-kernel, we also print out the stack and code at the + * time of the fault.. + */ + if (!user_mode(regs)) { + unsigned int code_prologue = code_bytes * 43 / 64; + unsigned int code_len = code_bytes; + unsigned char c; + u8 *ip; + + printk("Stack: "); + show_stack_log_lvl(NULL, regs, (unsigned long *)sp, + regs->bp, ""); + + printk(KERN_EMERG "Code: "); + + ip = (u8 *)regs->ip - code_prologue; + if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { + /* try starting at RIP */ + ip = (u8 *)regs->ip; + code_len = code_len - code_prologue + 1; + } + for (i = 0; i < code_len; i++, ip++) { + if (ip < (u8 *)PAGE_OFFSET || + probe_kernel_address(ip, c)) { + printk(" Bad RIP value."); + break; + } + if (ip == (u8 *)regs->ip) + printk("<%02x> ", c); + else + printk("%02x ", c); + } + } + printk("\n"); +} + +int is_valid_bugaddr(unsigned long ip) +{ + unsigned short ud2; + + if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2))) + return 0; + + return ud2 == 0x0b0f; +} + +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; + +unsigned __kprobes long oops_begin(void) +{ + int cpu; + unsigned long flags; + + oops_enter(); + + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!__raw_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + __raw_spin_lock(&die_lock); + } + die_nest_count++; + die_owner = cpu; + console_verbose(); + bust_spinlocks(1); + return flags; +} + +void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) +{ + die_owner = -1; + bust_spinlocks(0); + die_nest_count--; + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + __raw_spin_unlock(&die_lock); + raw_local_irq_restore(flags); + if (!regs) { + oops_exit(); + return; + } + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception"); + oops_exit(); + do_exit(signr); +} + +int __kprobes __die(const char *str, struct pt_regs *regs, long err) +{ + printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP "); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); +#endif + printk("\n"); + if (notify_die(DIE_OOPS, str, regs, err, + current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) + return 1; + + show_registers(regs); + add_taint(TAINT_DIE); + /* Executive summary in case the oops scrolled away */ + printk(KERN_ALERT "RIP "); + printk_address(regs->ip, 1); + printk(" RSP <%016lx>\n", regs->sp); + if (kexec_should_crash(current)) + crash_kexec(regs); + return 0; +} + +void die(const char *str, struct pt_regs *regs, long err) +{ + unsigned long flags = oops_begin(); + + if (!user_mode(regs)) + report_bug(regs->ip, regs); + + if (__die(str, regs, err)) + regs = NULL; + oops_end(flags, regs, SIGSEGV); +} + +notrace __kprobes void +die_nmi(char *str, struct pt_regs *regs, int do_panic) +{ + unsigned long flags; + + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; + + flags = oops_begin(); + /* + * We are in trouble anyway, lets at least try + * to get a message out. + */ + printk(KERN_EMERG "%s", str); + printk(" on CPU%d, ip %08lx, registers:\n", + smp_processor_id(), regs->ip); + show_registers(regs); + if (kexec_should_crash(current)) + crash_kexec(regs); + if (do_panic || panic_on_oops) + panic("Non maskable interrupt"); + oops_end(flags, NULL, SIGBUS); + nmi_exit(); + local_irq_enable(); + do_exit(SIGBUS); +} + +static int __init oops_setup(char *s) +{ + if (!s) + return -EINVAL; + if (!strcmp(s, "panic")) + panic_on_oops = 1; + return 0; +} +early_param("oops", oops_setup); + +static int __init kstack_setup(char *s) +{ + if (!s) + return -EINVAL; + kstack_depth_to_print = simple_strtoul(s, NULL, 0); + return 0; +} +early_param("kstack", kstack_setup); + +static int __init code_bytes_setup(char *s) +{ + code_bytes = simple_strtoul(s, NULL, 0); + if (code_bytes > 8192) + code_bytes = 8192; + + return 1; +} +__setup("code_bytes=", code_bytes_setup); diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 729157ee4c1..1cd61ddd90b 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -54,11 +54,7 @@ #include -int panic_on_unrecovered_nmi; -int kstack_depth_to_print = 12; -static unsigned int code_bytes = 64; static int ignore_nmis; -static int die_counter; static inline void conditional_sti(struct pt_regs *regs) { @@ -82,518 +78,6 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) dec_preempt_count(); } -void printk_address(unsigned long address, int reliable) -{ - printk(" [<%016lx>] %s%pS\n", - address, reliable ? "" : "? ", (void *) address); -} - -static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, - unsigned *usedp, char **idp) -{ - static char ids[][8] = { - [DEBUG_STACK - 1] = "#DB", - [NMI_STACK - 1] = "NMI", - [DOUBLEFAULT_STACK - 1] = "#DF", - [STACKFAULT_STACK - 1] = "#SS", - [MCE_STACK - 1] = "#MC", -#if DEBUG_STKSZ > EXCEPTION_STKSZ - [N_EXCEPTION_STACKS ... - N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" -#endif - }; - unsigned k; - - /* - * Iterate over all exception stacks, and figure out whether - * 'stack' is in one of them: - */ - for (k = 0; k < N_EXCEPTION_STACKS; k++) { - unsigned long end = per_cpu(orig_ist, cpu).ist[k]; - /* - * Is 'stack' above this exception frame's end? - * If yes then skip to the next frame. - */ - if (stack >= end) - continue; - /* - * Is 'stack' above this exception frame's start address? - * If yes then we found the right frame. - */ - if (stack >= end - EXCEPTION_STKSZ) { - /* - * Make sure we only iterate through an exception - * stack once. If it comes up for the second time - * then there's something wrong going on - just - * break out and return NULL: - */ - if (*usedp & (1U << k)) - break; - *usedp |= 1U << k; - *idp = ids[k]; - return (unsigned long *)end; - } - /* - * If this is a debug stack, and if it has a larger size than - * the usual exception stacks, then 'stack' might still - * be within the lower portion of the debug stack: - */ -#if DEBUG_STKSZ > EXCEPTION_STKSZ - if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { - unsigned j = N_EXCEPTION_STACKS - 1; - - /* - * Black magic. A large debug stack is composed of - * multiple exception stack entries, which we - * iterate through now. Dont look: - */ - do { - ++j; - end -= EXCEPTION_STKSZ; - ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); - } while (stack < end - EXCEPTION_STKSZ); - if (*usedp & (1U << j)) - break; - *usedp |= 1U << j; - *idp = ids[j]; - return (unsigned long *)end; - } -#endif - } - return NULL; -} - -/* - * x86-64 can have up to three kernel stacks: - * process stack - * interrupt stack - * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack - */ - -static inline int valid_stack_ptr(struct thread_info *tinfo, - void *p, unsigned int size, void *end) -{ - void *t = tinfo; - if (end) { - if (p < end && p >= (end-THREAD_SIZE)) - return 1; - else - return 0; - } - return p > t && p < t + THREAD_SIZE - size; -} - -/* The form of the top of the frame on the stack */ -struct stack_frame { - struct stack_frame *next_frame; - unsigned long return_address; -}; - -static inline unsigned long -print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end) -{ - struct stack_frame *frame = (struct stack_frame *)bp; - - while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { - unsigned long addr; - - addr = *stack; - if (__kernel_text_address(addr)) { - if ((unsigned long) stack == bp + 8) { - ops->address(data, addr, 1); - frame = frame->next_frame; - bp = (unsigned long) frame; - } else { - ops->address(data, addr, bp == 0); - } - } - stack++; - } - return bp; -} - -void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data) -{ - const unsigned cpu = get_cpu(); - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; - unsigned used = 0; - struct thread_info *tinfo; - - if (!task) - task = current; - - if (!stack) { - unsigned long dummy; - stack = &dummy; - if (task && task != current) - stack = (unsigned long *)task->thread.sp; - } - -#ifdef CONFIG_FRAME_POINTER - if (!bp) { - if (task == current) { - /* Grab bp right from our regs */ - asm("movq %%rbp, %0" : "=r" (bp) : ); - } else { - /* bp is the last reg pushed by switch_to */ - bp = *(unsigned long *) task->thread.sp; - } - } -#endif - - /* - * Print function call entries in all stacks, starting at the - * current stack address. If the stacks consist of nested - * exceptions - */ - tinfo = task_thread_info(task); - for (;;) { - char *id; - unsigned long *estack_end; - estack_end = in_exception_stack(cpu, (unsigned long)stack, - &used, &id); - - if (estack_end) { - if (ops->stack(data, id) < 0) - break; - - bp = print_context_stack(tinfo, stack, bp, ops, - data, estack_end); - ops->stack(data, ""); - /* - * We link to the next stack via the - * second-to-last pointer (index -2 to end) in the - * exception stack: - */ - stack = (unsigned long *) estack_end[-2]; - continue; - } - if (irqstack_end) { - unsigned long *irqstack; - irqstack = irqstack_end - - (IRQSTACKSIZE - 64) / sizeof(*irqstack); - - if (stack >= irqstack && stack < irqstack_end) { - if (ops->stack(data, "IRQ") < 0) - break; - bp = print_context_stack(tinfo, stack, bp, - ops, data, irqstack_end); - /* - * We link to the next stack (which would be - * the process stack normally) the last - * pointer (index -1 to end) in the IRQ stack: - */ - stack = (unsigned long *) (irqstack_end[-1]); - irqstack_end = NULL; - ops->stack(data, "EOI"); - continue; - } - } - break; - } - - /* - * This handles the process stack: - */ - bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); - put_cpu(); -} -EXPORT_SYMBOL(dump_trace); - -static void -print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - print_symbol(msg, symbol); - printk("\n"); -} - -static void print_trace_warning(void *data, char *msg) -{ - printk("%s\n", msg); -} - -static int print_trace_stack(void *data, char *name) -{ - printk(" <%s> ", name); - return 0; -} - -static void print_trace_address(void *data, unsigned long addr, int reliable) -{ - touch_nmi_watchdog(); - printk_address(addr, reliable); -} - -static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, -}; - -static void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) -{ - printk("Call Trace:\n"); - dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); -} - -void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp) -{ - show_trace_log_lvl(task, regs, stack, bp, ""); -} - -static void -show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) -{ - unsigned long *stack; - int i; - const int cpu = smp_processor_id(); - unsigned long *irqstack_end = - (unsigned long *) (cpu_pda(cpu)->irqstackptr); - unsigned long *irqstack = - (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); - - /* - * debugging aid: "show_stack(NULL, NULL);" prints the - * back trace for this cpu. - */ - - if (sp == NULL) { - if (task) - sp = (unsigned long *)task->thread.sp; - else - sp = (unsigned long *)&sp; - } - - stack = sp; - for (i = 0; i < kstack_depth_to_print; i++) { - if (stack >= irqstack && stack <= irqstack_end) { - if (stack == irqstack_end) { - stack = (unsigned long *) (irqstack_end[-1]); - printk(" "); - } - } else { - if (((long) stack & (THREAD_SIZE-1)) == 0) - break; - } - if (i && ((i % 4) == 0)) - printk("\n"); - printk(" %016lx", *stack++); - touch_nmi_watchdog(); - } - printk("\n"); - show_trace_log_lvl(task, regs, sp, bp, log_lvl); -} - -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_log_lvl(task, NULL, sp, 0, ""); -} - -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long bp = 0; - unsigned long stack; - -#ifdef CONFIG_FRAME_POINTER - if (!bp) - asm("movq %%rbp, %0" : "=r" (bp) : ); -#endif - - printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - show_trace(NULL, NULL, &stack, bp); -} -EXPORT_SYMBOL(dump_stack); - -void show_registers(struct pt_regs *regs) -{ - int i; - unsigned long sp; - const int cpu = smp_processor_id(); - struct task_struct *cur = cpu_pda(cpu)->pcurrent; - - sp = regs->sp; - printk("CPU %d ", cpu); - __show_regs(regs, 1); - printk("Process %s (pid: %d, threadinfo %p, task %p)\n", - cur->comm, cur->pid, task_thread_info(cur), cur); - - /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. - */ - if (!user_mode(regs)) { - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; - unsigned char c; - u8 *ip; - - printk("Stack: "); - show_stack_log_lvl(NULL, regs, (unsigned long *)sp, - regs->bp, ""); - - printk(KERN_EMERG "Code: "); - - ip = (u8 *)regs->ip - code_prologue; - if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - /* try starting at RIP */ - ip = (u8 *)regs->ip; - code_len = code_len - code_prologue + 1; - } - for (i = 0; i < code_len; i++, ip++) { - if (ip < (u8 *)PAGE_OFFSET || - probe_kernel_address(ip, c)) { - printk(" Bad RIP value."); - break; - } - if (ip == (u8 *)regs->ip) - printk("<%02x> ", c); - else - printk("%02x ", c); - } - } - printk("\n"); -} - -int is_valid_bugaddr(unsigned long ip) -{ - unsigned short ud2; - - if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2))) - return 0; - - return ud2 == 0x0b0f; -} - -static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; -static int die_owner = -1; -static unsigned int die_nest_count; - -unsigned __kprobes long oops_begin(void) -{ - int cpu; - unsigned long flags; - - oops_enter(); - - /* racy, but better than risking deadlock. */ - raw_local_irq_save(flags); - cpu = smp_processor_id(); - if (!__raw_spin_trylock(&die_lock)) { - if (cpu == die_owner) - /* nested oops. should stop eventually */; - else - __raw_spin_lock(&die_lock); - } - die_nest_count++; - die_owner = cpu; - console_verbose(); - bust_spinlocks(1); - return flags; -} - -void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) -{ - die_owner = -1; - bust_spinlocks(0); - die_nest_count--; - if (!die_nest_count) - /* Nest count reaches zero, release the lock. */ - __raw_spin_unlock(&die_lock); - raw_local_irq_restore(flags); - if (!regs) { - oops_exit(); - return; - } - if (panic_on_oops) - panic("Fatal exception"); - oops_exit(); - do_exit(signr); -} - -int __kprobes __die(const char *str, struct pt_regs *regs, long err) -{ - printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); -#endif - printk("\n"); - if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) - return 1; - - show_registers(regs); - add_taint(TAINT_DIE); - /* Executive summary in case the oops scrolled away */ - printk(KERN_ALERT "RIP "); - printk_address(regs->ip, 1); - printk(" RSP <%016lx>\n", regs->sp); - if (kexec_should_crash(current)) - crash_kexec(regs); - return 0; -} - -void die(const char *str, struct pt_regs *regs, long err) -{ - unsigned long flags = oops_begin(); - - if (!user_mode(regs)) - report_bug(regs->ip, regs); - - if (__die(str, regs, err)) - regs = NULL; - oops_end(flags, regs, SIGSEGV); -} - -notrace __kprobes void -die_nmi(char *str, struct pt_regs *regs, int do_panic) -{ - unsigned long flags; - - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - flags = oops_begin(); - /* - * We are in trouble anyway, lets at least try - * to get a message out. - */ - printk(KERN_EMERG "%s", str); - printk(" on CPU%d, ip %08lx, registers:\n", - smp_processor_id(), regs->ip); - show_registers(regs); - if (kexec_should_crash(current)) - crash_kexec(regs); - if (do_panic || panic_on_oops) - panic("Non maskable interrupt"); - oops_end(flags, NULL, SIGBUS); - nmi_exit(); - local_irq_enable(); - do_exit(SIGBUS); -} - static void __kprobes do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, long error_code, siginfo_t *info) @@ -1178,32 +662,3 @@ void __init trap_init(void) */ cpu_init(); } - -static int __init oops_setup(char *s) -{ - if (!s) - return -EINVAL; - if (!strcmp(s, "panic")) - panic_on_oops = 1; - return 0; -} -early_param("oops", oops_setup); - -static int __init kstack_setup(char *s) -{ - if (!s) - return -EINVAL; - kstack_depth_to_print = simple_strtoul(s, NULL, 0); - return 0; -} -early_param("kstack", kstack_setup); - -static int __init code_bytes_setup(char *s) -{ - code_bytes = simple_strtoul(s, NULL, 0); - if (code_bytes > 8192) - code_bytes = 8192; - - return 1; -} -__setup("code_bytes=", code_bytes_setup); -- cgit v1.2.3 From a28680b4b821a262fd3b5e57a28c148b5f9e662a Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 30 Sep 2008 18:41:34 +0200 Subject: x86, traps: split out math_error and simd_math_error Split out math_error from do_coprocessor_error and simd_math_error from do_simd_coprocessor_error, like on i386. While at it, add the "error_code" parameter to do_coprocessor_error, do_simd_coprocessor_error and do_spurious_interrupt_bug. This does not change the generated code, but brings the declarations in line with all the other trap handlers. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 1cd61ddd90b..b295ebf0cb3 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -452,18 +452,12 @@ static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) * the correct behaviour even in the presence of the asynchronous * IRQ13 behaviour */ -asmlinkage void do_coprocessor_error(struct pt_regs *regs) +void math_error(void __user *ip) { - void __user *ip = (void __user *)(regs->ip); struct task_struct *task; siginfo_t info; unsigned short cwd, swd; - conditional_sti(regs); - if (!user_mode(regs) && - kernel_math_error(regs, "kernel x87 math error", 16)) - return; - /* * Save the info for the exception handler and clear the error. */ @@ -516,23 +510,26 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) force_sig_info(SIGFPE, &info, task); } +asmlinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) +{ + conditional_sti(regs); + if (!user_mode(regs) && + kernel_math_error(regs, "kernel x87 math error", 16)) + return; + math_error((void __user *)regs->ip); +} + asmlinkage void bad_intr(void) { printk("bad interrupt"); } -asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) +static void simd_math_error(void __user *ip) { - void __user *ip = (void __user *)(regs->ip); struct task_struct *task; siginfo_t info; unsigned short mxcsr; - conditional_sti(regs); - if (!user_mode(regs) && - kernel_math_error(regs, "kernel simd math error", 19)) - return; - /* * Save the info for the exception handler and clear the error. */ @@ -575,7 +572,16 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) force_sig_info(SIGFPE, &info, task); } -asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs) +asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) +{ + conditional_sti(regs); + if (!user_mode(regs) && + kernel_math_error(regs, "kernel simd math error", 19)) + return; + simd_math_error((void __user *)regs->ip); +} + +asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) { } -- cgit v1.2.3 From ae82157b3d8bb4902f76b56c7450a945288786ac Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 30 Sep 2008 18:41:35 +0200 Subject: x86, traps, i386: factor out lazy io-bitmap copy x86_64 does not do the lazy io-bitmap dance. Putting it in its own function makes i386's do_general_protection look much more like x86_64's. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 76 ++++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 33 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index ce1063c141f..0206c915748 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -95,6 +95,47 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err) die(str, regs, err); } +/* + * Perform the lazy TSS's I/O bitmap copy. If the TSS has an + * invalid offset set (the LAZY one) and the faulting thread has + * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, + * we set the offset field correctly and return 1. + */ +static int lazy_iobitmap_copy(void) +{ + struct thread_struct *thread; + struct tss_struct *tss; + int cpu; + + cpu = get_cpu(); + tss = &per_cpu(init_tss, cpu); + thread = ¤t->thread; + + if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && + thread->io_bitmap_ptr) { + memcpy(tss->io_bitmap, thread->io_bitmap_ptr, + thread->io_bitmap_max); + /* + * If the previously set map was extending to higher ports + * than the current one, pad extra space with 0xff (no access). + */ + if (thread->io_bitmap_max < tss->io_bitmap_max) { + memset((char *) tss->io_bitmap + + thread->io_bitmap_max, 0xff, + tss->io_bitmap_max - thread->io_bitmap_max); + } + tss->io_bitmap_max = thread->io_bitmap_max; + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; + tss->io_bitmap_owner = thread; + put_cpu(); + + return 1; + } + put_cpu(); + + return 0; +} + static void __kprobes do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, long error_code, siginfo_t *info) @@ -187,44 +228,13 @@ void __kprobes do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; - struct thread_struct *thread; - struct tss_struct *tss; - int cpu; conditional_sti(regs); - cpu = get_cpu(); - tss = &per_cpu(init_tss, cpu); - thread = ¤t->thread; - - /* - * Perform the lazy TSS's I/O bitmap copy. If the TSS has an - * invalid offset set (the LAZY one) and the faulting thread has - * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS - * and we set the offset field correctly. Then we let the CPU to - * restart the faulting instruction. - */ - if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && - thread->io_bitmap_ptr) { - memcpy(tss->io_bitmap, thread->io_bitmap_ptr, - thread->io_bitmap_max); - /* - * If the previously set map was extending to higher ports - * than the current one, pad extra space with 0xff (no access). - */ - if (thread->io_bitmap_max < tss->io_bitmap_max) { - memset((char *) tss->io_bitmap + - thread->io_bitmap_max, 0xff, - tss->io_bitmap_max - thread->io_bitmap_max); - } - tss->io_bitmap_max = thread->io_bitmap_max; - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - tss->io_bitmap_owner = thread; - put_cpu(); - + if (lazy_iobitmap_copy()) { + /* restart the faulting instruction */ return; } - put_cpu(); if (regs->flags & X86_VM_MASK) goto gp_in_vm86; -- cgit v1.2.3 From e407d62088b7f61f38e1086062650c75a4f2757a Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 30 Sep 2008 18:41:36 +0200 Subject: x86, traps: introduce dotraplinkage Mark the exception handlers with "dotraplinkage" to hide the calling convention differences between i386 and x86_64. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 2 +- arch/x86/kernel/traps_32.c | 28 ++++++++++++++++------------ arch/x86/kernel/traps_64.c | 31 +++++++++++++++++++------------ 3 files changed, 36 insertions(+), 25 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 977c8ea6602..1db6ce4314e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1243,7 +1243,7 @@ ENTRY(simd_coprocessor_error) END(simd_coprocessor_error) ENTRY(device_not_available) - zeroentry math_state_restore + zeroentry do_device_not_available END(device_not_available) /* runs on exception stack */ diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 0206c915748..7eb1203c909 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -190,7 +190,7 @@ vm86_trap: } #define DO_ERROR(trapnr, signr, str, name) \ -void do_##name(struct pt_regs *regs, long error_code) \ +dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ @@ -200,7 +200,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -void do_##name(struct pt_regs *regs, long error_code) \ +dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ siginfo_t info; \ info.si_signo = signr; \ @@ -224,7 +224,7 @@ DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) -void __kprobes +dotraplinkage void __kprobes do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; @@ -428,7 +428,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) reassert_nmi(); } -notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) +dotraplinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) { int cpu; @@ -456,7 +457,7 @@ void restart_nmi(void) acpi_nmi_enable(); } -void __kprobes do_int3(struct pt_regs *regs, long error_code) +dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_KPROBES if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) @@ -494,7 +495,7 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code) * find every occurrence of the TF bit that could be saved away even * by user code) */ -void __kprobes do_debug(struct pt_regs *regs, long error_code) +dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; unsigned int condition; @@ -627,7 +628,7 @@ void math_error(void __user *ip) force_sig_info(SIGFPE, &info, task); } -void do_coprocessor_error(struct pt_regs *regs, long error_code) +dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) { conditional_sti(regs); ignore_fpu_irq = 1; @@ -682,7 +683,8 @@ static void simd_math_error(void __user *ip) force_sig_info(SIGFPE, &info, task); } -void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) +dotraplinkage void +do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { conditional_sti(regs); @@ -706,7 +708,8 @@ void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) force_sig(SIGSEGV, current); } -void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) +dotraplinkage void +do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) { conditional_sti(regs); #if 0 @@ -784,7 +787,8 @@ asmlinkage void math_emulate(long arg) #endif /* CONFIG_MATH_EMULATION */ -void __kprobes do_device_not_available(struct pt_regs *regs, long error) +dotraplinkage void __kprobes +do_device_not_available(struct pt_regs *regs, long error) { if (read_cr0() & X86_CR0_EM) { conditional_sti(regs); @@ -796,14 +800,14 @@ void __kprobes do_device_not_available(struct pt_regs *regs, long error) } #ifdef CONFIG_X86_MCE -void __kprobes do_machine_check(struct pt_regs *regs, long error) +dotraplinkage void __kprobes do_machine_check(struct pt_regs *regs, long error) { conditional_sti(regs); machine_check_vector(regs, error); } #endif -void do_iret_error(struct pt_regs *regs, long error_code) +dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { siginfo_t info; local_irq_enable(); diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index b295ebf0cb3..be73323ca95 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -125,7 +125,7 @@ kernel_trap: } #define DO_ERROR(trapnr, signr, str, name) \ -asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ +dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ @@ -135,7 +135,7 @@ asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ +dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ siginfo_t info; \ info.si_signo = signr; \ @@ -159,7 +159,7 @@ DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) /* Runs on IST stack */ -asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) +dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) { if (notify_die(DIE_TRAP, "stack segment", regs, error_code, 12, SIGBUS) == NOTIFY_STOP) @@ -169,7 +169,7 @@ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); } -asmlinkage void do_double_fault(struct pt_regs *regs, long error_code) +dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) { static const char str[] = "double fault"; struct task_struct *tsk = current; @@ -186,7 +186,7 @@ asmlinkage void do_double_fault(struct pt_regs *regs, long error_code) die(str, regs, error_code); } -asmlinkage void __kprobes +dotraplinkage void __kprobes do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; @@ -317,7 +317,7 @@ asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) io_check_error(reason, regs); } -asmlinkage notrace __kprobes void +dotraplinkage notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) { nmi_enter(); @@ -343,7 +343,7 @@ void restart_nmi(void) } /* runs on IST stack. */ -asmlinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) +dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) { if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) @@ -376,8 +376,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) } /* runs on IST stack. */ -asmlinkage void __kprobes do_debug(struct pt_regs *regs, - unsigned long error_code) +dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; unsigned long condition; @@ -510,7 +509,7 @@ void math_error(void __user *ip) force_sig_info(SIGFPE, &info, task); } -asmlinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) +dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) { conditional_sti(regs); if (!user_mode(regs) && @@ -572,7 +571,8 @@ static void simd_math_error(void __user *ip) force_sig_info(SIGFPE, &info, task); } -asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) +dotraplinkage void +do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { conditional_sti(regs); if (!user_mode(regs) && @@ -581,7 +581,8 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) simd_math_error((void __user *)regs->ip); } -asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) +dotraplinkage void +do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) { } @@ -633,6 +634,12 @@ asmlinkage void math_state_restore(void) } EXPORT_SYMBOL_GPL(math_state_restore); +dotraplinkage void __kprobes +do_device_not_available(struct pt_regs *regs, long error) +{ + math_state_restore(); +} + void __init trap_init(void) { set_intr_gate(0, ÷_error); -- cgit v1.2.3 From 3d2a71a596bd9c761c8487a2178e95f8a61da083 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 30 Sep 2008 18:41:37 +0200 Subject: x86, traps: converge do_debug handlers Make the x86_64-version and the i386-version of do_debug more similar. - introduce preempt_conditional_sti/cli to i386. The preempt-count is now elevated during the trap handler, like on x86_64. It does not run on a separate stack, however. - replace an open-coded "send_sigtrap" - copy some comments Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 30 +++++++++++++++++++++--------- arch/x86/kernel/traps_64.c | 17 +++++++++-------- 2 files changed, 30 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 7eb1203c909..953172af6e5 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -88,6 +88,20 @@ static inline void conditional_sti(struct pt_regs *regs) local_irq_enable(); } +static inline void preempt_conditional_sti(struct pt_regs *regs) +{ + inc_preempt_count(); + if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); +} + +static inline void preempt_conditional_cli(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_disable(); + dec_preempt_count(); +} + static inline void die_if_kernel(const char *str, struct pt_regs *regs, long err) { @@ -498,7 +512,7 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - unsigned int condition; + unsigned long condition; int si_code; get_debugreg(condition, 6); @@ -512,9 +526,9 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) return; + /* It's safe to allow irq's after DR6 has been saved */ - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); + preempt_conditional_sti(regs); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { @@ -533,16 +547,11 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) * kernel space (but re-enable TF when returning to user mode). */ if (condition & DR_STEP) { - /* - * We already checked v86 mode above, so we can - * check for kernel mode by just checking the CPL - * of CS. - */ if (!user_mode(regs)) goto clear_TF_reenable; } - si_code = get_si_code((unsigned long)condition); + si_code = get_si_code(condition); /* Ok, finally something we can handle */ send_sigtrap(tsk, regs, error_code, si_code); @@ -552,15 +561,18 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) */ clear_dr7: set_debugreg(0, 7); + preempt_conditional_cli(regs); return; debug_vm86: handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); + preempt_conditional_cli(regs); return; clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->flags &= ~X86_EFLAGS_TF; + preempt_conditional_cli(regs); return; } diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index be73323ca95..a851eca6d04 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -380,7 +380,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; unsigned long condition; - siginfo_t info; + int si_code; get_debugreg(condition, 6); @@ -394,6 +394,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) SIGTRAP) == NOTIFY_STOP) return; + /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); /* Mask out spurious debug traps due to lazy DR7 setting */ @@ -402,6 +403,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) goto clear_dr7; } + /* Save debug status register where ptrace can see it */ tsk->thread.debugreg6 = condition; /* @@ -413,15 +415,14 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) goto clear_TF_reenable; } + si_code = get_si_code(condition); /* Ok, finally something we can handle */ - tsk->thread.trap_no = 1; - tsk->thread.error_code = error_code; - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = get_si_code(condition); - info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; - force_sig_info(SIGTRAP, &info, tsk); + send_sigtrap(tsk, regs, error_code, si_code); + /* + * Disable additional traps. They'll be re-enabled when + * the signal is delivered. + */ clear_dr7: set_debugreg(0, 7); preempt_conditional_cli(regs); -- cgit v1.2.3 From 699d2937d45d9dabc1772d0d07501ccc43885c23 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 3 Oct 2008 22:00:32 +0200 Subject: traps: x86: converge trap_init functions - set_system_gate on i386 is really set_system_trap_gate - set_system_gate on x86_64 is really set_system_intr_gate - ist=0 means no special stack switch is done: - introduce STACKFAULT_STACK, DOUBLEFAULT_STACK, NMI_STACK, DEBUG_STACK and MCE_STACK as on x86_64. - use the _ist variants with XXX_STACK set to zero - remove set_system_gate Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar traps: x86: correct copy/paste bug: a trap is a GATE_TRAP Fix copy/paste/forgot-to-edit bug in desc.h. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 16 +++++++++------- arch/x86/kernel/traps_64.c | 6 +++--- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 953172af6e5..2c7ea382771 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -847,10 +847,12 @@ void __init trap_init(void) #endif set_intr_gate(0, ÷_error); - set_intr_gate(1, &debug); - set_intr_gate(2, &nmi); - set_system_intr_gate(3, &int3); /* int3 can be called from all */ - set_system_intr_gate(4, &overflow); /* int4 can be called from all */ + set_intr_gate_ist(1, &debug, DEBUG_STACK); + set_intr_gate_ist(2, &nmi, NMI_STACK); + /* int3 can be called from all */ + set_system_intr_gate_ist(3, &int3, DEBUG_STACK); + /* int4 can be called from all */ + set_system_intr_gate(4, &overflow); set_intr_gate(5, &bounds); set_intr_gate(6, &invalid_op); set_intr_gate(7, &device_not_available); @@ -858,14 +860,14 @@ void __init trap_init(void) set_intr_gate(9, &coprocessor_segment_overrun); set_intr_gate(10, &invalid_TSS); set_intr_gate(11, &segment_not_present); - set_intr_gate(12, &stack_segment); + set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); set_intr_gate(13, &general_protection); set_intr_gate(14, &page_fault); set_intr_gate(15, &spurious_interrupt_bug); set_intr_gate(16, &coprocessor_error); set_intr_gate(17, &alignment_check); #ifdef CONFIG_X86_MCE - set_intr_gate(18, &machine_check); + set_intr_gate_ist(18, &machine_check, MCE_STACK); #endif set_intr_gate(19, &simd_coprocessor_error); @@ -881,7 +883,7 @@ void __init trap_init(void) printk("done.\n"); } - set_system_gate(SYSCALL_VECTOR, &system_call); + set_system_trap_gate(SYSCALL_VECTOR, &system_call); /* Reserve all the builtin and the syscall vector: */ for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index a851eca6d04..ea091dfe0cd 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -647,9 +647,9 @@ void __init trap_init(void) set_intr_gate_ist(1, &debug, DEBUG_STACK); set_intr_gate_ist(2, &nmi, NMI_STACK); /* int3 can be called from all */ - set_system_gate_ist(3, &int3, DEBUG_STACK); + set_system_intr_gate_ist(3, &int3, DEBUG_STACK); /* int4 can be called from all */ - set_system_gate(4, &overflow); + set_system_intr_gate(4, &overflow); set_intr_gate(5, &bounds); set_intr_gate(6, &invalid_op); set_intr_gate(7, &device_not_available); @@ -669,7 +669,7 @@ void __init trap_init(void) set_intr_gate(19, &simd_coprocessor_error); #ifdef CONFIG_IA32_EMULATION - set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); + set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); #endif /* * Should be a barrier for any external CPU state: -- cgit v1.2.3 From 091d30c8f7744f43b0bb507fd30ceb95f9ff9e1b Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 3 Oct 2008 23:16:12 +0200 Subject: traps: x86_64: make math_state_restore more like i386 - rename variable me -> tsk - get thread and tsk like i386 - expand used_math() - copy comment Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index ea091dfe0cd..00406c99aee 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -604,14 +604,15 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) */ asmlinkage void math_state_restore(void) { - struct task_struct *me = current; + struct thread_info *thread = current_thread_info(); + struct task_struct *tsk = thread->task; - if (!used_math()) { + if (!tsk_used_math(tsk)) { local_irq_enable(); /* * does a slab alloc which can sleep */ - if (init_fpu(me)) { + if (init_fpu(tsk)) { /* * ran out of memory! */ @@ -625,13 +626,13 @@ asmlinkage void math_state_restore(void) /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ - if (unlikely(restore_fpu_checking(me))) { + if (unlikely(restore_fpu_checking(tsk))) { stts(); - force_sig(SIGSEGV, me); + force_sig(SIGSEGV, tsk); return; } - task_thread_info(me)->status |= TS_USEDFPU; - me->fpu_counter++; + thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ + tsk->fpu_counter++; } EXPORT_SYMBOL_GPL(math_state_restore); -- cgit v1.2.3 From 4915a35e35a037254550a2ba9f367a812bc37d40 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 3 Oct 2008 22:00:34 +0200 Subject: traps: i386: use preempt_conditional_sti/cli in do_int3 Use preempt_conditional_sti/cli in do_int3, like on x86_64. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 2c7ea382771..67953bbe193 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -477,14 +477,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) return; - conditional_sti(regs); #else if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) return; #endif + preempt_conditional_sti(regs); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); + preempt_conditional_cli(regs); } /* -- cgit v1.2.3 From 1c9af8a9f448abfe13f17fa76b7ca72b588a1edb Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 3 Oct 2008 22:00:35 +0200 Subject: traps: x86_64: make io_check_error equal to the one on i386 Make io_check_error equal to the one on i386. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 00406c99aee..7853f488cd6 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -252,13 +252,19 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs) static notrace __kprobes void io_check_error(unsigned char reason, struct pt_regs *regs) { - printk("NMI: IOCK error (debug interrupt?)\n"); + unsigned long i; + + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); /* Re-enable the IOCK line, wait for a few seconds */ reason = (reason & 0xf) | 8; outb(reason, 0x61); - mdelay(2000); + + i = 2000; + while (--i) + udelay(1000); + reason &= ~8; outb(reason, 0x61); } -- cgit v1.2.3 From 7970479c4881e156a0c07c1a7fdc564c8e3b2bfc Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 3 Oct 2008 22:00:36 +0200 Subject: traps: i386: expand clear_mem_error and remove from mach_traps.h This is the last user of clear_mem_error, which is defined only on i386. Expand the inline function and remove it from include/asm-x86/mach-default/mach_traps.h Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 67953bbe193..01e7ca8c766 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -313,7 +313,8 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs) printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); /* Clear and disable the memory parity error line. */ - clear_mem_error(reason); + reason = (reason & 0xf) | 4; + outb(reason, 0x61); } static notrace __kprobes void -- cgit v1.2.3 From a5ae2330a5a6e7948866715a845ad2e8900bd4c2 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 3 Oct 2008 22:00:37 +0200 Subject: traps: x86_64: use task_pid_nr(tsk) instead of tsk->pid in do_general_protection Use task_pid_nr(tsk) instead of tsk->pid in do_general_protection. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 7853f488cd6..71c2629997b 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -204,7 +204,7 @@ do_general_protection(struct pt_regs *regs, long error_code) printk_ratelimit()) { printk(KERN_INFO "%s[%d] general protection ip:%lx sp:%lx error:%lx", - tsk->comm, tsk->pid, + tsk->comm, task_pid_nr(tsk), regs->ip, regs->sp, error_code); print_vma_addr(" in ", regs->ip); printk("\n"); -- cgit v1.2.3 From c1d518c8422ff7d3f377958771b265753028579c Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 3 Oct 2008 23:17:11 +0200 Subject: traps: x86: various noop-changes preparing for unification of traps_xx.c - reordering include files - whitespace changes - comment changes - removed unused bad_intr() - make default_do_nmi static Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 28 +++++++++++--------- arch/x86/kernel/traps_64.c | 65 ++++++++++++++++++++++++++++++---------------- 2 files changed, 58 insertions(+), 35 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 01e7ca8c766..076739863d2 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -7,13 +7,11 @@ */ /* - * 'Traps.c' handles hardware traps and faults after we have saved some - * state in 'asm.s'. + * Handle hardware traps and faults. */ #include #include #include -#include #include #include #include @@ -32,6 +30,8 @@ #include #include #include +#include +#include #ifdef CONFIG_EISA #include @@ -46,22 +46,26 @@ #include #endif -#include -#include #include #include +#include #include #include #include #include +#include #include #include + +#include + +#include +#include #include #include #include #include -#include "mach_traps.h" #include "cpu/mcheck/mce.h" DECLARE_BITMAP(used_vectors, NR_VECTORS); @@ -340,7 +344,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs) static notrace __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { - if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == + NOTIFY_STOP) return; #ifdef CONFIG_MCA /* @@ -446,13 +451,9 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) dotraplinkage notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) { - int cpu; - nmi_enter(); - cpu = smp_processor_id(); - - ++nmi_count(cpu); + { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } if (!ignore_nmis) default_do_nmi(regs); @@ -472,6 +473,7 @@ void restart_nmi(void) acpi_nmi_enable(); } +/* May run on IST stack. */ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_KPROBES @@ -510,6 +512,8 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) * about restoring all the debug state, and ptrace doesn't have to * find every occurrence of the TF bit that could be saved away even * by user code) + * + * May run on IST stack. */ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 71c2629997b..22fe62a24ed 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -7,10 +7,8 @@ */ /* - * 'Traps.c' handles hardware traps and faults after we have saved some - * state in 'entry.S'. + * Handle hardware traps and faults. */ -#include #include #include #include @@ -41,18 +39,21 @@ #include #include +#include #include #include #include #include +#include #include #include + +#include + #include #include #include -#include -#include static int ignore_nmis; @@ -73,8 +74,6 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); - /* Make sure to not schedule here because we could be running - on an exception stack. */ dec_preempt_count(); } @@ -228,9 +227,12 @@ gp_in_kernel: static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs *regs) { - printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", - reason); - printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); + printk(KERN_EMERG + "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); + + printk(KERN_EMERG + "You have some hardware problem, likely on the PCI bus.\n"); #if defined(CONFIG_EDAC) if (edac_handler_set()) { @@ -275,19 +277,18 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; - printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", - reason); - printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); + printk(KERN_EMERG + "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); } -/* Runs on IST stack. This code must keep interrupts off all the time. - Nested NMIs are prevented by the CPU. */ -asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) +static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int cpu; @@ -348,7 +349,7 @@ void restart_nmi(void) acpi_nmi_enable(); } -/* runs on IST stack. */ +/* May run on IST stack. */ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) { if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) @@ -381,7 +382,30 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) return regs; } -/* runs on IST stack. */ +/* + * Our handling of the processor debug registers is non-trivial. + * We do not clear them on entry and exit from the kernel. Therefore + * it is possible to get a watchpoint trap here from inside the kernel. + * However, the code in ./ptrace.c has ensured that the user can + * only set watchpoints on userspace addresses. Therefore the in-kernel + * watchpoint trap can only occur in code which is reading/writing + * from user space. Such code must not hold kernel locks (since it + * can equally take a page fault), therefore it is safe to call + * force_sig_info even though that claims and releases locks. + * + * Code in ./signal.c ensures that the debug control register + * is restored before we deliver any signal, and therefore that + * user code runs with the correct debug control register even though + * we clear it here. + * + * Being careful here means that we don't have to be as careful in a + * lot of more complicated places (task switching can be a bit lazy + * about restoring all the debug state, and ptrace doesn't have to + * find every occurrence of the TF bit that could be saved away even + * by user code) + * + * May run on IST stack. + */ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; @@ -525,11 +549,6 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) math_error((void __user *)regs->ip); } -asmlinkage void bad_intr(void) -{ - printk("bad interrupt"); -} - static void simd_math_error(void __user *ip) { struct task_struct *task; -- cgit v1.2.3 From 081f75bbdc86de53537e1b5aca01de72bd2fea6b Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 3 Oct 2008 22:00:39 +0200 Subject: traps: x86: make traps_32.c and traps_64.c equal Use CONFIG_X86_64/CONFIG_X86_32 to condtionally compile the parts needed for x86_64 or i386 only. Runs a small userspace for a number of minimal configurations and boots the defconfigs. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 173 ++++++++++++++++++++- arch/x86/kernel/traps_64.c | 368 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 536 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 076739863d2..ffb131f74f7 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -48,7 +48,6 @@ #include #include -#include #include #include #include @@ -59,6 +58,11 @@ #include +#ifdef CONFIG_X86_64 +#include +#include +#include +#else #include #include #include @@ -83,6 +87,7 @@ char ignore_fpu_irq; */ gate_desc idt_table[256] __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; +#endif static int ignore_nmis; @@ -106,6 +111,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) dec_preempt_count(); } +#ifdef CONFIG_X86_32 static inline void die_if_kernel(const char *str, struct pt_regs *regs, long err) { @@ -153,6 +159,7 @@ static int lazy_iobitmap_copy(void) return 0; } +#endif static void __kprobes do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, @@ -160,6 +167,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, { struct task_struct *tsk = current; +#ifdef CONFIG_X86_32 if (regs->flags & X86_VM_MASK) { /* * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. @@ -169,11 +177,14 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, goto vm86_trap; goto trap_signal; } +#endif if (!user_mode(regs)) goto kernel_trap; +#ifdef CONFIG_X86_32 trap_signal: +#endif /* * We want error_code and trap_no set for userspace faults and * kernelspace faults which result in die(), but not @@ -186,6 +197,18 @@ trap_signal: tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; +#ifdef CONFIG_X86_64 + if (show_unhandled_signals && unhandled_signal(tsk, signr) && + printk_ratelimit()) { + printk(KERN_INFO + "%s[%d] trap %s ip:%lx sp:%lx error:%lx", + tsk->comm, tsk->pid, str, + regs->ip, regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); + } +#endif + if (info) force_sig_info(signr, info, tsk); else @@ -200,11 +223,13 @@ kernel_trap: } return; +#ifdef CONFIG_X86_32 vm86_trap: if (handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr)) goto trap_signal; return; +#endif } #define DO_ERROR(trapnr, signr, str, name) \ @@ -239,9 +264,41 @@ DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) +#ifdef CONFIG_X86_32 DO_ERROR(12, SIGBUS, "stack segment", stack_segment) +#endif DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) +#ifdef CONFIG_X86_64 +/* Runs on IST stack */ +dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) +{ + if (notify_die(DIE_TRAP, "stack segment", regs, error_code, + 12, SIGBUS) == NOTIFY_STOP) + return; + preempt_conditional_sti(regs); + do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); + preempt_conditional_cli(regs); +} + +dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) +{ + static const char str[] = "double fault"; + struct task_struct *tsk = current; + + /* Return not checked because double check cannot be ignored */ + notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); + + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 8; + + /* This is always a kernel trap and never fixable (and thus must + never return). */ + for (;;) + die(str, regs, error_code); +} +#endif + dotraplinkage void __kprobes do_general_protection(struct pt_regs *regs, long error_code) { @@ -249,6 +306,7 @@ do_general_protection(struct pt_regs *regs, long error_code) conditional_sti(regs); +#ifdef CONFIG_X86_32 if (lazy_iobitmap_copy()) { /* restart the faulting instruction */ return; @@ -256,6 +314,7 @@ do_general_protection(struct pt_regs *regs, long error_code) if (regs->flags & X86_VM_MASK) goto gp_in_vm86; +#endif tsk = current; if (!user_mode(regs)) @@ -277,10 +336,12 @@ do_general_protection(struct pt_regs *regs, long error_code) force_sig(SIGSEGV, tsk); return; +#ifdef CONFIG_X86_32 gp_in_vm86: local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); return; +#endif gp_in_kernel: if (fixup_exception(regs)) @@ -368,6 +429,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); } +#ifdef CONFIG_X86_32 static DEFINE_SPINLOCK(nmi_print_lock); void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) @@ -402,6 +464,7 @@ void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) do_exit(SIGSEGV); } +#endif static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { @@ -441,11 +504,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) mem_parity_error(reason, regs); if (reason & 0x40) io_check_error(reason, regs); +#ifdef CONFIG_X86_32 /* * Reassert NMI in case it became active meanwhile * as it's edge-triggered: */ reassert_nmi(); +#endif } dotraplinkage notrace __kprobes void @@ -453,7 +518,11 @@ do_nmi(struct pt_regs *regs, long error_code) { nmi_enter(); +#ifdef CONFIG_X86_32 { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } +#else + add_pda(__nmi_count, 1); +#endif if (!ignore_nmis) default_do_nmi(regs); @@ -491,6 +560,29 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); } +#ifdef CONFIG_X86_64 +/* Help handler running on IST stack to switch back to user stack + for scheduling or signal handling. The actual stack switch is done in + entry.S */ +asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) +{ + struct pt_regs *regs = eregs; + /* Did already sync */ + if (eregs == (struct pt_regs *)eregs->sp) + ; + /* Exception from user space */ + else if (user_mode(eregs)) + regs = task_pt_regs(current); + /* Exception from kernel and interrupts are enabled. Move to + kernel process stack. */ + else if (eregs->flags & X86_EFLAGS_IF) + regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); + if (eregs != regs) + *regs = *eregs; + return regs; +} +#endif + /* * Our handling of the processor debug registers is non-trivial. * We do not clear them on entry and exit from the kernel. Therefore @@ -542,8 +634,10 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) goto clear_dr7; } +#ifdef CONFIG_X86_32 if (regs->flags & X86_VM_MASK) goto debug_vm86; +#endif /* Save debug status register where ptrace can see it */ tsk->thread.debugreg6 = condition; @@ -570,10 +664,12 @@ clear_dr7: preempt_conditional_cli(regs); return; +#ifdef CONFIG_X86_32 debug_vm86: handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); preempt_conditional_cli(regs); return; +#endif clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); @@ -582,6 +678,20 @@ clear_TF_reenable: return; } +#ifdef CONFIG_X86_64 +static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) +{ + if (fixup_exception(regs)) + return 1; + + notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); + /* Illegal floating point operation in the kernel */ + current->thread.trap_no = trapnr; + die(str, regs, 0); + return 0; +} +#endif + /* * Note that we play around with the 'TS' bit in an attempt to get * the correct behaviour even in the presence of the asynchronous @@ -618,7 +728,9 @@ void math_error(void __user *ip) swd = get_fpu_swd(task); switch (swd & ~cwd & 0x3f) { case 0x000: /* No unmasked exception */ +#ifdef CONFIG_X86_32 return; +#endif default: /* Multiple exceptions */ break; case 0x001: /* Invalid Op */ @@ -649,7 +761,15 @@ void math_error(void __user *ip) dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) { conditional_sti(regs); + +#ifdef CONFIG_X86_32 ignore_fpu_irq = 1; +#else + if (!user_mode(regs) && + kernel_math_error(regs, "kernel x87 math error", 16)) + return; +#endif + math_error((void __user *)regs->ip); } @@ -706,6 +826,7 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { conditional_sti(regs); +#ifdef CONFIG_X86_32 if (cpu_has_xmm) { /* Handle SIMD FPU exceptions on PIII+ processors. */ ignore_fpu_irq = 1; @@ -724,6 +845,12 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code) current->thread.error_code = error_code; die_if_kernel("cache flush denied", regs, error_code); force_sig(SIGSEGV, current); +#else + if (!user_mode(regs) && + kernel_math_error(regs, "kernel simd math error", 19)) + return; + simd_math_error((void __user *)regs->ip); +#endif } dotraplinkage void @@ -736,6 +863,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) #endif } +#ifdef CONFIG_X86_32 unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) { struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); @@ -754,6 +882,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) return new_kesp; } +#else +asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) +{ +} + +asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) +{ +} +#endif /* * 'math_state_restore()' saves the current math information in the @@ -786,14 +923,24 @@ asmlinkage void math_state_restore(void) } clts(); /* Allow maths ops (or we recurse) */ +#ifdef CONFIG_X86_32 restore_fpu(tsk); +#else + /* + * Paranoid restore. send a SIGSEGV if we fail to restore the state. + */ + if (unlikely(restore_fpu_checking(tsk))) { + stts(); + force_sig(SIGSEGV, tsk); + return; + } +#endif thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; } EXPORT_SYMBOL_GPL(math_state_restore); #ifndef CONFIG_MATH_EMULATION - asmlinkage void math_emulate(long arg) { printk(KERN_EMERG @@ -802,12 +949,12 @@ asmlinkage void math_emulate(long arg) force_sig(SIGFPE, current); schedule(); } - #endif /* CONFIG_MATH_EMULATION */ dotraplinkage void __kprobes do_device_not_available(struct pt_regs *regs, long error) { +#ifdef CONFIG_X86_32 if (read_cr0() & X86_CR0_EM) { conditional_sti(regs); math_emulate(0); @@ -815,8 +962,12 @@ do_device_not_available(struct pt_regs *regs, long error) math_state_restore(); /* interrupts still off */ conditional_sti(regs); } +#else + math_state_restore(); +#endif } +#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_MCE dotraplinkage void __kprobes do_machine_check(struct pt_regs *regs, long error) { @@ -839,10 +990,13 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) return; do_trap(32, SIGILL, "iret exception", regs, error_code, &info); } +#endif void __init trap_init(void) { +#ifdef CONFIG_X86_32 int i; +#endif #ifdef CONFIG_EISA void __iomem *p = early_ioremap(0x0FFFD9, 4); @@ -862,7 +1016,11 @@ void __init trap_init(void) set_intr_gate(5, &bounds); set_intr_gate(6, &invalid_op); set_intr_gate(7, &device_not_available); +#ifdef CONFIG_X86_32 set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); +#else + set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); +#endif set_intr_gate(9, &coprocessor_segment_overrun); set_intr_gate(10, &invalid_TSS); set_intr_gate(11, &segment_not_present); @@ -877,6 +1035,11 @@ void __init trap_init(void) #endif set_intr_gate(19, &simd_coprocessor_error); +#ifdef CONFIG_IA32_EMULATION + set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); +#endif + +#ifdef CONFIG_X86_32 if (cpu_has_fxsr) { printk(KERN_INFO "Enabling fast FPU save and restore... "); set_in_cr4(X86_CR4_OSFXSR); @@ -896,11 +1059,13 @@ void __init trap_init(void) set_bit(i, used_vectors); set_bit(SYSCALL_VECTOR, used_vectors); - +#endif /* * Should be a barrier for any external CPU state: */ cpu_init(); +#ifdef CONFIG_X86_32 trap_init_hook(); +#endif } diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 22fe62a24ed..60ecc855ab8 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -33,13 +33,21 @@ #include #include +#ifdef CONFIG_EISA +#include +#include +#endif + +#ifdef CONFIG_MCA +#include +#endif + #if defined(CONFIG_EDAC) #include #endif #include #include -#include #include #include #include @@ -50,10 +58,35 @@ #include +#ifdef CONFIG_X86_64 #include #include #include +#else +#include +#include +#include +#include +#include + +#include "cpu/mcheck/mce.h" +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); + +asmlinkage int system_call(void); + +/* Do we ignore FPU interrupts ? */ +char ignore_fpu_irq; + +/* + * The IDT has to be page-aligned to simplify the Pentium + * F0 0F bug workaround.. We have a special link segment + * for this. + */ +gate_desc idt_table[256] + __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; +#endif static int ignore_nmis; @@ -77,15 +110,80 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) dec_preempt_count(); } +#ifdef CONFIG_X86_32 +static inline void +die_if_kernel(const char *str, struct pt_regs *regs, long err) +{ + if (!user_mode_vm(regs)) + die(str, regs, err); +} + +/* + * Perform the lazy TSS's I/O bitmap copy. If the TSS has an + * invalid offset set (the LAZY one) and the faulting thread has + * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, + * we set the offset field correctly and return 1. + */ +static int lazy_iobitmap_copy(void) +{ + struct thread_struct *thread; + struct tss_struct *tss; + int cpu; + + cpu = get_cpu(); + tss = &per_cpu(init_tss, cpu); + thread = ¤t->thread; + + if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && + thread->io_bitmap_ptr) { + memcpy(tss->io_bitmap, thread->io_bitmap_ptr, + thread->io_bitmap_max); + /* + * If the previously set map was extending to higher ports + * than the current one, pad extra space with 0xff (no access). + */ + if (thread->io_bitmap_max < tss->io_bitmap_max) { + memset((char *) tss->io_bitmap + + thread->io_bitmap_max, 0xff, + tss->io_bitmap_max - thread->io_bitmap_max); + } + tss->io_bitmap_max = thread->io_bitmap_max; + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; + tss->io_bitmap_owner = thread; + put_cpu(); + + return 1; + } + put_cpu(); + + return 0; +} +#endif + static void __kprobes do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, long error_code, siginfo_t *info) { struct task_struct *tsk = current; +#ifdef CONFIG_X86_32 + if (regs->flags & X86_VM_MASK) { + /* + * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. + * On nmi (interrupt 2), do_trap should not be called. + */ + if (trapnr < 6) + goto vm86_trap; + goto trap_signal; + } +#endif + if (!user_mode(regs)) goto kernel_trap; +#ifdef CONFIG_X86_32 +trap_signal: +#endif /* * We want error_code and trap_no set for userspace faults and * kernelspace faults which result in die(), but not @@ -98,6 +196,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; +#ifdef CONFIG_X86_64 if (show_unhandled_signals && unhandled_signal(tsk, signr) && printk_ratelimit()) { printk(KERN_INFO @@ -107,6 +206,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, print_vma_addr(" in ", regs->ip); printk("\n"); } +#endif if (info) force_sig_info(signr, info, tsk); @@ -121,6 +221,14 @@ kernel_trap: die(str, regs, error_code); } return; + +#ifdef CONFIG_X86_32 +vm86_trap: + if (handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, trapnr)) + goto trap_signal; + return; +#endif } #define DO_ERROR(trapnr, signr, str, name) \ @@ -155,8 +263,12 @@ DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) +#ifdef CONFIG_X86_32 +DO_ERROR(12, SIGBUS, "stack segment", stack_segment) +#endif DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) +#ifdef CONFIG_X86_64 /* Runs on IST stack */ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) { @@ -184,6 +296,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) for (;;) die(str, regs, error_code); } +#endif dotraplinkage void __kprobes do_general_protection(struct pt_regs *regs, long error_code) @@ -192,6 +305,16 @@ do_general_protection(struct pt_regs *regs, long error_code) conditional_sti(regs); +#ifdef CONFIG_X86_32 + if (lazy_iobitmap_copy()) { + /* restart the faulting instruction */ + return; + } + + if (regs->flags & X86_VM_MASK) + goto gp_in_vm86; +#endif + tsk = current; if (!user_mode(regs)) goto gp_in_kernel; @@ -212,6 +335,13 @@ do_general_protection(struct pt_regs *regs, long error_code) force_sig(SIGSEGV, tsk); return; +#ifdef CONFIG_X86_32 +gp_in_vm86: + local_irq_enable(); + handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); + return; +#endif + gp_in_kernel: if (fixup_exception(regs)) return; @@ -277,6 +407,16 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; +#ifdef CONFIG_MCA + /* + * Might actually be able to figure out what the guilty party + * is: + */ + if (MCA_bus) { + mca_handle_nmi(); + return; + } +#endif printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", reason, smp_processor_id()); @@ -288,6 +428,43 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); } +#ifdef CONFIG_X86_32 +static DEFINE_SPINLOCK(nmi_print_lock); + +void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) +{ + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; + + spin_lock(&nmi_print_lock); + /* + * We are in trouble anyway, lets at least try + * to get a message out: + */ + bust_spinlocks(1); + printk(KERN_EMERG "%s", str); + printk(" on CPU%d, ip %08lx, registers:\n", + smp_processor_id(), regs->ip); + show_registers(regs); + if (do_panic) + panic("Non maskable interrupt"); + console_silent(); + spin_unlock(&nmi_print_lock); + bust_spinlocks(0); + + /* + * If we are in kernel we are probably nested up pretty bad + * and might aswell get out now while we still can: + */ + if (!user_mode_vm(regs)) { + current->thread.trap_no = 2; + crash_kexec(regs); + } + + do_exit(SIGSEGV); +} +#endif + static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; @@ -303,6 +480,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; +#ifdef CONFIG_X86_LOCAL_APIC /* * Ok, so this is none of the documented NMI sources, * so it must be the NMI watchdog. @@ -311,6 +489,9 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) return; if (!do_nmi_callback(regs, cpu)) unknown_nmi_error(reason, regs); +#else + unknown_nmi_error(reason, regs); +#endif return; } @@ -322,6 +503,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) mem_parity_error(reason, regs); if (reason & 0x40) io_check_error(reason, regs); +#ifdef CONFIG_X86_32 + /* + * Reassert NMI in case it became active meanwhile + * as it's edge-triggered: + */ + reassert_nmi(); +#endif } dotraplinkage notrace __kprobes void @@ -329,7 +517,11 @@ do_nmi(struct pt_regs *regs, long error_code) { nmi_enter(); +#ifdef CONFIG_X86_32 + { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } +#else add_pda(__nmi_count, 1); +#endif if (!ignore_nmis) default_do_nmi(regs); @@ -352,15 +544,22 @@ void restart_nmi(void) /* May run on IST stack. */ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) { +#ifdef CONFIG_KPROBES if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) return; +#else + if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) + == NOTIFY_STOP) + return; +#endif preempt_conditional_sti(regs); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); preempt_conditional_cli(regs); } +#ifdef CONFIG_X86_64 /* Help handler running on IST stack to switch back to user stack for scheduling or signal handling. The actual stack switch is done in entry.S */ @@ -381,6 +580,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) *regs = *eregs; return regs; } +#endif /* * Our handling of the processor debug registers is non-trivial. @@ -433,6 +633,11 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) goto clear_dr7; } +#ifdef CONFIG_X86_32 + if (regs->flags & X86_VM_MASK) + goto debug_vm86; +#endif + /* Save debug status register where ptrace can see it */ tsk->thread.debugreg6 = condition; @@ -458,6 +663,13 @@ clear_dr7: preempt_conditional_cli(regs); return; +#ifdef CONFIG_X86_32 +debug_vm86: + handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); + preempt_conditional_cli(regs); + return; +#endif + clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->flags &= ~X86_EFLAGS_TF; @@ -465,6 +677,7 @@ clear_TF_reenable: return; } +#ifdef CONFIG_X86_64 static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) { if (fixup_exception(regs)) @@ -476,6 +689,7 @@ static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) die(str, regs, 0); return 0; } +#endif /* * Note that we play around with the 'TS' bit in an attempt to get @@ -513,6 +727,9 @@ void math_error(void __user *ip) swd = get_fpu_swd(task); switch (swd & ~cwd & 0x3f) { case 0x000: /* No unmasked exception */ +#ifdef CONFIG_X86_32 + return; +#endif default: /* Multiple exceptions */ break; case 0x001: /* Invalid Op */ @@ -543,9 +760,15 @@ void math_error(void __user *ip) dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) { conditional_sti(regs); + +#ifdef CONFIG_X86_32 + ignore_fpu_irq = 1; +#else if (!user_mode(regs) && kernel_math_error(regs, "kernel x87 math error", 16)) return; +#endif + math_error((void __user *)regs->ip); } @@ -601,17 +824,64 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { conditional_sti(regs); + +#ifdef CONFIG_X86_32 + if (cpu_has_xmm) { + /* Handle SIMD FPU exceptions on PIII+ processors. */ + ignore_fpu_irq = 1; + simd_math_error((void __user *)regs->ip); + return; + } + /* + * Handle strange cache flush from user space exception + * in all other cases. This is undocumented behaviour. + */ + if (regs->flags & X86_VM_MASK) { + handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); + return; + } + current->thread.trap_no = 19; + current->thread.error_code = error_code; + die_if_kernel("cache flush denied", regs, error_code); + force_sig(SIGSEGV, current); +#else if (!user_mode(regs) && kernel_math_error(regs, "kernel simd math error", 19)) return; simd_math_error((void __user *)regs->ip); +#endif } dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) { + conditional_sti(regs); +#if 0 + /* No need to warn about this any longer. */ + printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); +#endif } +#ifdef CONFIG_X86_32 +unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) +{ + struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); + unsigned long base = (kesp - uesp) & -THREAD_SIZE; + unsigned long new_kesp = kesp - base; + unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; + __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; + + /* Set up base for espfix segment */ + desc &= 0x00f0ff0000000000ULL; + desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | + ((((__u64)base) << 32) & 0xff00000000000000ULL) | + ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | + (lim_pages & 0xffff); + *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; + + return new_kesp; +} +#else asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) { } @@ -619,6 +889,7 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) { } +#endif /* * 'math_state_restore()' saves the current math information in the @@ -626,6 +897,9 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) * * Careful.. There are problems with IBM-designed IRQ13 behaviour. * Don't touch unless you *really* know how it works. + * + * Must be called with kernel preemption disabled (in this case, + * local interrupts are disabled at the call-site in entry.S). */ asmlinkage void math_state_restore(void) { @@ -648,6 +922,9 @@ asmlinkage void math_state_restore(void) } clts(); /* Allow maths ops (or we recurse) */ +#ifdef CONFIG_X86_32 + restore_fpu(tsk); +#else /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ @@ -656,19 +933,78 @@ asmlinkage void math_state_restore(void) force_sig(SIGSEGV, tsk); return; } +#endif thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; } EXPORT_SYMBOL_GPL(math_state_restore); +#ifndef CONFIG_MATH_EMULATION +asmlinkage void math_emulate(long arg) +{ + printk(KERN_EMERG + "math-emulation not enabled and no coprocessor found.\n"); + printk(KERN_EMERG "killing %s.\n", current->comm); + force_sig(SIGFPE, current); + schedule(); +} +#endif /* CONFIG_MATH_EMULATION */ + dotraplinkage void __kprobes do_device_not_available(struct pt_regs *regs, long error) { +#ifdef CONFIG_X86_32 + if (read_cr0() & X86_CR0_EM) { + conditional_sti(regs); + math_emulate(0); + } else { + math_state_restore(); /* interrupts still off */ + conditional_sti(regs); + } +#else math_state_restore(); +#endif +} + +#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_MCE +dotraplinkage void __kprobes do_machine_check(struct pt_regs *regs, long error) +{ + conditional_sti(regs); + machine_check_vector(regs, error); } +#endif + +dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) +{ + siginfo_t info; + local_irq_enable(); + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_BADSTK; + info.si_addr = 0; + if (notify_die(DIE_TRAP, "iret exception", + regs, error_code, 32, SIGILL) == NOTIFY_STOP) + return; + do_trap(32, SIGILL, "iret exception", regs, error_code, &info); +} +#endif void __init trap_init(void) { +#ifdef CONFIG_X86_32 + int i; +#endif + +#ifdef CONFIG_EISA + void __iomem *p = early_ioremap(0x0FFFD9, 4); + + if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) + EISA_bus = 1; + early_iounmap(p, 4); +#endif + set_intr_gate(0, ÷_error); set_intr_gate_ist(1, &debug, DEBUG_STACK); set_intr_gate_ist(2, &nmi, NMI_STACK); @@ -679,7 +1015,11 @@ void __init trap_init(void) set_intr_gate(5, &bounds); set_intr_gate(6, &invalid_op); set_intr_gate(7, &device_not_available); +#ifdef CONFIG_X86_32 + set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); +#else set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); +#endif set_intr_gate(9, &coprocessor_segment_overrun); set_intr_gate(10, &invalid_TSS); set_intr_gate(11, &segment_not_present); @@ -697,8 +1037,34 @@ void __init trap_init(void) #ifdef CONFIG_IA32_EMULATION set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); #endif + +#ifdef CONFIG_X86_32 + if (cpu_has_fxsr) { + printk(KERN_INFO "Enabling fast FPU save and restore... "); + set_in_cr4(X86_CR4_OSFXSR); + printk("done.\n"); + } + if (cpu_has_xmm) { + printk(KERN_INFO + "Enabling unmasked SIMD FPU exception support... "); + set_in_cr4(X86_CR4_OSXMMEXCPT); + printk("done.\n"); + } + + set_system_trap_gate(SYSCALL_VECTOR, &system_call); + + /* Reserve all the builtin and the syscall vector: */ + for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) + set_bit(i, used_vectors); + + set_bit(SYSCALL_VECTOR, used_vectors); +#endif /* * Should be a barrier for any external CPU state: */ cpu_init(); + +#ifdef CONFIG_X86_32 + trap_init_hook(); +#endif } -- cgit v1.2.3 From 8728861b4fead8119a1b7bb856a387320859cd98 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 3 Oct 2008 22:00:40 +0200 Subject: traps: x86: finalize unification of traps.c traps_32.c and traps_64.c are now equal. Move one to traps.c, delete the other one and change the Makefile Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/traps.c | 1071 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/traps_32.c | 1071 -------------------------------------------- arch/x86/kernel/traps_64.c | 1070 ------------------------------------------- 4 files changed, 1072 insertions(+), 2142 deletions(-) create mode 100644 arch/x86/kernel/traps.c delete mode 100644 arch/x86/kernel/traps_32.c delete mode 100644 arch/x86/kernel/traps_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index de63fed9fae..0d41f0343dc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -23,7 +23,7 @@ CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o -obj-y += traps_$(BITS).o irq_$(BITS).o dumpstack_$(BITS).o +obj-y += traps.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c new file mode 100644 index 00000000000..ffb131f74f7 --- /dev/null +++ b/arch/x86/kernel/traps.c @@ -0,0 +1,1071 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +/* + * Handle hardware traps and faults. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_EISA +#include +#include +#endif + +#ifdef CONFIG_MCA +#include +#endif + +#if defined(CONFIG_EDAC) +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_X86_64 +#include +#include +#include +#else +#include +#include +#include +#include +#include +#include + +#include "cpu/mcheck/mce.h" + +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); + +asmlinkage int system_call(void); + +/* Do we ignore FPU interrupts ? */ +char ignore_fpu_irq; + +/* + * The IDT has to be page-aligned to simplify the Pentium + * F0 0F bug workaround.. We have a special link segment + * for this. + */ +gate_desc idt_table[256] + __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; +#endif + +static int ignore_nmis; + +static inline void conditional_sti(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); +} + +static inline void preempt_conditional_sti(struct pt_regs *regs) +{ + inc_preempt_count(); + if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); +} + +static inline void preempt_conditional_cli(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_disable(); + dec_preempt_count(); +} + +#ifdef CONFIG_X86_32 +static inline void +die_if_kernel(const char *str, struct pt_regs *regs, long err) +{ + if (!user_mode_vm(regs)) + die(str, regs, err); +} + +/* + * Perform the lazy TSS's I/O bitmap copy. If the TSS has an + * invalid offset set (the LAZY one) and the faulting thread has + * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, + * we set the offset field correctly and return 1. + */ +static int lazy_iobitmap_copy(void) +{ + struct thread_struct *thread; + struct tss_struct *tss; + int cpu; + + cpu = get_cpu(); + tss = &per_cpu(init_tss, cpu); + thread = ¤t->thread; + + if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && + thread->io_bitmap_ptr) { + memcpy(tss->io_bitmap, thread->io_bitmap_ptr, + thread->io_bitmap_max); + /* + * If the previously set map was extending to higher ports + * than the current one, pad extra space with 0xff (no access). + */ + if (thread->io_bitmap_max < tss->io_bitmap_max) { + memset((char *) tss->io_bitmap + + thread->io_bitmap_max, 0xff, + tss->io_bitmap_max - thread->io_bitmap_max); + } + tss->io_bitmap_max = thread->io_bitmap_max; + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; + tss->io_bitmap_owner = thread; + put_cpu(); + + return 1; + } + put_cpu(); + + return 0; +} +#endif + +static void __kprobes +do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, + long error_code, siginfo_t *info) +{ + struct task_struct *tsk = current; + +#ifdef CONFIG_X86_32 + if (regs->flags & X86_VM_MASK) { + /* + * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. + * On nmi (interrupt 2), do_trap should not be called. + */ + if (trapnr < 6) + goto vm86_trap; + goto trap_signal; + } +#endif + + if (!user_mode(regs)) + goto kernel_trap; + +#ifdef CONFIG_X86_32 +trap_signal: +#endif + /* + * We want error_code and trap_no set for userspace faults and + * kernelspace faults which result in die(), but not + * kernelspace faults which are fixed up. die() gives the + * process no chance to handle the signal and notice the + * kernel fault information, so that won't result in polluting + * the information about previously queued, but not yet + * delivered, faults. See also do_general_protection below. + */ + tsk->thread.error_code = error_code; + tsk->thread.trap_no = trapnr; + +#ifdef CONFIG_X86_64 + if (show_unhandled_signals && unhandled_signal(tsk, signr) && + printk_ratelimit()) { + printk(KERN_INFO + "%s[%d] trap %s ip:%lx sp:%lx error:%lx", + tsk->comm, tsk->pid, str, + regs->ip, regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); + } +#endif + + if (info) + force_sig_info(signr, info, tsk); + else + force_sig(signr, tsk); + return; + +kernel_trap: + if (!fixup_exception(regs)) { + tsk->thread.error_code = error_code; + tsk->thread.trap_no = trapnr; + die(str, regs, error_code); + } + return; + +#ifdef CONFIG_X86_32 +vm86_trap: + if (handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, trapnr)) + goto trap_signal; + return; +#endif +} + +#define DO_ERROR(trapnr, signr, str, name) \ +dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ +{ \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ + conditional_sti(regs); \ + do_trap(trapnr, signr, str, regs, error_code, NULL); \ +} + +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ +{ \ + siginfo_t info; \ + info.si_signo = signr; \ + info.si_errno = 0; \ + info.si_code = sicode; \ + info.si_addr = (void __user *)siaddr; \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ + == NOTIFY_STOP) \ + return; \ + conditional_sti(regs); \ + do_trap(trapnr, signr, str, regs, error_code, &info); \ +} + +DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) +DO_ERROR(4, SIGSEGV, "overflow", overflow) +DO_ERROR(5, SIGSEGV, "bounds", bounds) +DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) +DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) +DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) +#ifdef CONFIG_X86_32 +DO_ERROR(12, SIGBUS, "stack segment", stack_segment) +#endif +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) + +#ifdef CONFIG_X86_64 +/* Runs on IST stack */ +dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) +{ + if (notify_die(DIE_TRAP, "stack segment", regs, error_code, + 12, SIGBUS) == NOTIFY_STOP) + return; + preempt_conditional_sti(regs); + do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); + preempt_conditional_cli(regs); +} + +dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) +{ + static const char str[] = "double fault"; + struct task_struct *tsk = current; + + /* Return not checked because double check cannot be ignored */ + notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); + + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 8; + + /* This is always a kernel trap and never fixable (and thus must + never return). */ + for (;;) + die(str, regs, error_code); +} +#endif + +dotraplinkage void __kprobes +do_general_protection(struct pt_regs *regs, long error_code) +{ + struct task_struct *tsk; + + conditional_sti(regs); + +#ifdef CONFIG_X86_32 + if (lazy_iobitmap_copy()) { + /* restart the faulting instruction */ + return; + } + + if (regs->flags & X86_VM_MASK) + goto gp_in_vm86; +#endif + + tsk = current; + if (!user_mode(regs)) + goto gp_in_kernel; + + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 13; + + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { + printk(KERN_INFO + "%s[%d] general protection ip:%lx sp:%lx error:%lx", + tsk->comm, task_pid_nr(tsk), + regs->ip, regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); + } + + force_sig(SIGSEGV, tsk); + return; + +#ifdef CONFIG_X86_32 +gp_in_vm86: + local_irq_enable(); + handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); + return; +#endif + +gp_in_kernel: + if (fixup_exception(regs)) + return; + + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 13; + if (notify_die(DIE_GPF, "general protection fault", regs, + error_code, 13, SIGSEGV) == NOTIFY_STOP) + return; + die("general protection fault", regs, error_code); +} + +static notrace __kprobes void +mem_parity_error(unsigned char reason, struct pt_regs *regs) +{ + printk(KERN_EMERG + "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); + + printk(KERN_EMERG + "You have some hardware problem, likely on the PCI bus.\n"); + +#if defined(CONFIG_EDAC) + if (edac_handler_set()) { + edac_atomic_assert_error(); + return; + } +#endif + + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + + /* Clear and disable the memory parity error line. */ + reason = (reason & 0xf) | 4; + outb(reason, 0x61); +} + +static notrace __kprobes void +io_check_error(unsigned char reason, struct pt_regs *regs) +{ + unsigned long i; + + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); + show_registers(regs); + + /* Re-enable the IOCK line, wait for a few seconds */ + reason = (reason & 0xf) | 8; + outb(reason, 0x61); + + i = 2000; + while (--i) + udelay(1000); + + reason &= ~8; + outb(reason, 0x61); +} + +static notrace __kprobes void +unknown_nmi_error(unsigned char reason, struct pt_regs *regs) +{ + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == + NOTIFY_STOP) + return; +#ifdef CONFIG_MCA + /* + * Might actually be able to figure out what the guilty party + * is: + */ + if (MCA_bus) { + mca_handle_nmi(); + return; + } +#endif + printk(KERN_EMERG + "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); + + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); +} + +#ifdef CONFIG_X86_32 +static DEFINE_SPINLOCK(nmi_print_lock); + +void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) +{ + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; + + spin_lock(&nmi_print_lock); + /* + * We are in trouble anyway, lets at least try + * to get a message out: + */ + bust_spinlocks(1); + printk(KERN_EMERG "%s", str); + printk(" on CPU%d, ip %08lx, registers:\n", + smp_processor_id(), regs->ip); + show_registers(regs); + if (do_panic) + panic("Non maskable interrupt"); + console_silent(); + spin_unlock(&nmi_print_lock); + bust_spinlocks(0); + + /* + * If we are in kernel we are probably nested up pretty bad + * and might aswell get out now while we still can: + */ + if (!user_mode_vm(regs)) { + current->thread.trap_no = 2; + crash_kexec(regs); + } + + do_exit(SIGSEGV); +} +#endif + +static notrace __kprobes void default_do_nmi(struct pt_regs *regs) +{ + unsigned char reason = 0; + int cpu; + + cpu = smp_processor_id(); + + /* Only the BSP gets external NMIs from the system. */ + if (!cpu) + reason = get_nmi_reason(); + + if (!(reason & 0xc0)) { + if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) + == NOTIFY_STOP) + return; +#ifdef CONFIG_X86_LOCAL_APIC + /* + * Ok, so this is none of the documented NMI sources, + * so it must be the NMI watchdog. + */ + if (nmi_watchdog_tick(regs, reason)) + return; + if (!do_nmi_callback(regs, cpu)) + unknown_nmi_error(reason, regs); +#else + unknown_nmi_error(reason, regs); +#endif + + return; + } + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) + return; + + /* AK: following checks seem to be broken on modern chipsets. FIXME */ + if (reason & 0x80) + mem_parity_error(reason, regs); + if (reason & 0x40) + io_check_error(reason, regs); +#ifdef CONFIG_X86_32 + /* + * Reassert NMI in case it became active meanwhile + * as it's edge-triggered: + */ + reassert_nmi(); +#endif +} + +dotraplinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) +{ + nmi_enter(); + +#ifdef CONFIG_X86_32 + { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } +#else + add_pda(__nmi_count, 1); +#endif + + if (!ignore_nmis) + default_do_nmi(regs); + + nmi_exit(); +} + +void stop_nmi(void) +{ + acpi_nmi_disable(); + ignore_nmis++; +} + +void restart_nmi(void) +{ + ignore_nmis--; + acpi_nmi_enable(); +} + +/* May run on IST stack. */ +dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) +{ +#ifdef CONFIG_KPROBES + if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) + == NOTIFY_STOP) + return; +#else + if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) + == NOTIFY_STOP) + return; +#endif + + preempt_conditional_sti(regs); + do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); + preempt_conditional_cli(regs); +} + +#ifdef CONFIG_X86_64 +/* Help handler running on IST stack to switch back to user stack + for scheduling or signal handling. The actual stack switch is done in + entry.S */ +asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) +{ + struct pt_regs *regs = eregs; + /* Did already sync */ + if (eregs == (struct pt_regs *)eregs->sp) + ; + /* Exception from user space */ + else if (user_mode(eregs)) + regs = task_pt_regs(current); + /* Exception from kernel and interrupts are enabled. Move to + kernel process stack. */ + else if (eregs->flags & X86_EFLAGS_IF) + regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); + if (eregs != regs) + *regs = *eregs; + return regs; +} +#endif + +/* + * Our handling of the processor debug registers is non-trivial. + * We do not clear them on entry and exit from the kernel. Therefore + * it is possible to get a watchpoint trap here from inside the kernel. + * However, the code in ./ptrace.c has ensured that the user can + * only set watchpoints on userspace addresses. Therefore the in-kernel + * watchpoint trap can only occur in code which is reading/writing + * from user space. Such code must not hold kernel locks (since it + * can equally take a page fault), therefore it is safe to call + * force_sig_info even though that claims and releases locks. + * + * Code in ./signal.c ensures that the debug control register + * is restored before we deliver any signal, and therefore that + * user code runs with the correct debug control register even though + * we clear it here. + * + * Being careful here means that we don't have to be as careful in a + * lot of more complicated places (task switching can be a bit lazy + * about restoring all the debug state, and ptrace doesn't have to + * find every occurrence of the TF bit that could be saved away even + * by user code) + * + * May run on IST stack. + */ +dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) +{ + struct task_struct *tsk = current; + unsigned long condition; + int si_code; + + get_debugreg(condition, 6); + + /* + * The processor cleared BTF, so don't mark that we need it set. + */ + clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); + tsk->thread.debugctlmsr = 0; + + if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, + SIGTRAP) == NOTIFY_STOP) + return; + + /* It's safe to allow irq's after DR6 has been saved */ + preempt_conditional_sti(regs); + + /* Mask out spurious debug traps due to lazy DR7 setting */ + if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { + if (!tsk->thread.debugreg7) + goto clear_dr7; + } + +#ifdef CONFIG_X86_32 + if (regs->flags & X86_VM_MASK) + goto debug_vm86; +#endif + + /* Save debug status register where ptrace can see it */ + tsk->thread.debugreg6 = condition; + + /* + * Single-stepping through TF: make sure we ignore any events in + * kernel space (but re-enable TF when returning to user mode). + */ + if (condition & DR_STEP) { + if (!user_mode(regs)) + goto clear_TF_reenable; + } + + si_code = get_si_code(condition); + /* Ok, finally something we can handle */ + send_sigtrap(tsk, regs, error_code, si_code); + + /* + * Disable additional traps. They'll be re-enabled when + * the signal is delivered. + */ +clear_dr7: + set_debugreg(0, 7); + preempt_conditional_cli(regs); + return; + +#ifdef CONFIG_X86_32 +debug_vm86: + handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); + preempt_conditional_cli(regs); + return; +#endif + +clear_TF_reenable: + set_tsk_thread_flag(tsk, TIF_SINGLESTEP); + regs->flags &= ~X86_EFLAGS_TF; + preempt_conditional_cli(regs); + return; +} + +#ifdef CONFIG_X86_64 +static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) +{ + if (fixup_exception(regs)) + return 1; + + notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); + /* Illegal floating point operation in the kernel */ + current->thread.trap_no = trapnr; + die(str, regs, 0); + return 0; +} +#endif + +/* + * Note that we play around with the 'TS' bit in an attempt to get + * the correct behaviour even in the presence of the asynchronous + * IRQ13 behaviour + */ +void math_error(void __user *ip) +{ + struct task_struct *task; + siginfo_t info; + unsigned short cwd, swd; + + /* + * Save the info for the exception handler and clear the error. + */ + task = current; + save_init_fpu(task); + task->thread.trap_no = 16; + task->thread.error_code = 0; + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = __SI_FAULT; + info.si_addr = ip; + /* + * (~cwd & swd) will mask out exceptions that are not set to unmasked + * status. 0x3f is the exception bits in these regs, 0x200 is the + * C1 reg you need in case of a stack fault, 0x040 is the stack + * fault bit. We should only be taking one exception at a time, + * so if this combination doesn't produce any single exception, + * then we have a bad program that isn't synchronizing its FPU usage + * and it will suffer the consequences since we won't be able to + * fully reproduce the context of the exception + */ + cwd = get_fpu_cwd(task); + swd = get_fpu_swd(task); + switch (swd & ~cwd & 0x3f) { + case 0x000: /* No unmasked exception */ +#ifdef CONFIG_X86_32 + return; +#endif + default: /* Multiple exceptions */ + break; + case 0x001: /* Invalid Op */ + /* + * swd & 0x240 == 0x040: Stack Underflow + * swd & 0x240 == 0x240: Stack Overflow + * User must clear the SF bit (0x40) if set + */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; + } + force_sig_info(SIGFPE, &info, task); +} + +dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) +{ + conditional_sti(regs); + +#ifdef CONFIG_X86_32 + ignore_fpu_irq = 1; +#else + if (!user_mode(regs) && + kernel_math_error(regs, "kernel x87 math error", 16)) + return; +#endif + + math_error((void __user *)regs->ip); +} + +static void simd_math_error(void __user *ip) +{ + struct task_struct *task; + siginfo_t info; + unsigned short mxcsr; + + /* + * Save the info for the exception handler and clear the error. + */ + task = current; + save_init_fpu(task); + task->thread.trap_no = 19; + task->thread.error_code = 0; + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = __SI_FAULT; + info.si_addr = ip; + /* + * The SIMD FPU exceptions are handled a little differently, as there + * is only a single status/control register. Thus, to determine which + * unmasked exception was caught we must mask the exception mask bits + * at 0x1f80, and then use these to mask the exception bits at 0x3f. + */ + mxcsr = get_fpu_mxcsr(task); + switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { + case 0x000: + default: + break; + case 0x001: /* Invalid Op */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; + } + force_sig_info(SIGFPE, &info, task); +} + +dotraplinkage void +do_simd_coprocessor_error(struct pt_regs *regs, long error_code) +{ + conditional_sti(regs); + +#ifdef CONFIG_X86_32 + if (cpu_has_xmm) { + /* Handle SIMD FPU exceptions on PIII+ processors. */ + ignore_fpu_irq = 1; + simd_math_error((void __user *)regs->ip); + return; + } + /* + * Handle strange cache flush from user space exception + * in all other cases. This is undocumented behaviour. + */ + if (regs->flags & X86_VM_MASK) { + handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); + return; + } + current->thread.trap_no = 19; + current->thread.error_code = error_code; + die_if_kernel("cache flush denied", regs, error_code); + force_sig(SIGSEGV, current); +#else + if (!user_mode(regs) && + kernel_math_error(regs, "kernel simd math error", 19)) + return; + simd_math_error((void __user *)regs->ip); +#endif +} + +dotraplinkage void +do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) +{ + conditional_sti(regs); +#if 0 + /* No need to warn about this any longer. */ + printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); +#endif +} + +#ifdef CONFIG_X86_32 +unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) +{ + struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); + unsigned long base = (kesp - uesp) & -THREAD_SIZE; + unsigned long new_kesp = kesp - base; + unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; + __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; + + /* Set up base for espfix segment */ + desc &= 0x00f0ff0000000000ULL; + desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | + ((((__u64)base) << 32) & 0xff00000000000000ULL) | + ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | + (lim_pages & 0xffff); + *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; + + return new_kesp; +} +#else +asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) +{ +} + +asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) +{ +} +#endif + +/* + * 'math_state_restore()' saves the current math information in the + * old math state array, and gets the new ones from the current task + * + * Careful.. There are problems with IBM-designed IRQ13 behaviour. + * Don't touch unless you *really* know how it works. + * + * Must be called with kernel preemption disabled (in this case, + * local interrupts are disabled at the call-site in entry.S). + */ +asmlinkage void math_state_restore(void) +{ + struct thread_info *thread = current_thread_info(); + struct task_struct *tsk = thread->task; + + if (!tsk_used_math(tsk)) { + local_irq_enable(); + /* + * does a slab alloc which can sleep + */ + if (init_fpu(tsk)) { + /* + * ran out of memory! + */ + do_group_exit(SIGKILL); + return; + } + local_irq_disable(); + } + + clts(); /* Allow maths ops (or we recurse) */ +#ifdef CONFIG_X86_32 + restore_fpu(tsk); +#else + /* + * Paranoid restore. send a SIGSEGV if we fail to restore the state. + */ + if (unlikely(restore_fpu_checking(tsk))) { + stts(); + force_sig(SIGSEGV, tsk); + return; + } +#endif + thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ + tsk->fpu_counter++; +} +EXPORT_SYMBOL_GPL(math_state_restore); + +#ifndef CONFIG_MATH_EMULATION +asmlinkage void math_emulate(long arg) +{ + printk(KERN_EMERG + "math-emulation not enabled and no coprocessor found.\n"); + printk(KERN_EMERG "killing %s.\n", current->comm); + force_sig(SIGFPE, current); + schedule(); +} +#endif /* CONFIG_MATH_EMULATION */ + +dotraplinkage void __kprobes +do_device_not_available(struct pt_regs *regs, long error) +{ +#ifdef CONFIG_X86_32 + if (read_cr0() & X86_CR0_EM) { + conditional_sti(regs); + math_emulate(0); + } else { + math_state_restore(); /* interrupts still off */ + conditional_sti(regs); + } +#else + math_state_restore(); +#endif +} + +#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_MCE +dotraplinkage void __kprobes do_machine_check(struct pt_regs *regs, long error) +{ + conditional_sti(regs); + machine_check_vector(regs, error); +} +#endif + +dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) +{ + siginfo_t info; + local_irq_enable(); + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_BADSTK; + info.si_addr = 0; + if (notify_die(DIE_TRAP, "iret exception", + regs, error_code, 32, SIGILL) == NOTIFY_STOP) + return; + do_trap(32, SIGILL, "iret exception", regs, error_code, &info); +} +#endif + +void __init trap_init(void) +{ +#ifdef CONFIG_X86_32 + int i; +#endif + +#ifdef CONFIG_EISA + void __iomem *p = early_ioremap(0x0FFFD9, 4); + + if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) + EISA_bus = 1; + early_iounmap(p, 4); +#endif + + set_intr_gate(0, ÷_error); + set_intr_gate_ist(1, &debug, DEBUG_STACK); + set_intr_gate_ist(2, &nmi, NMI_STACK); + /* int3 can be called from all */ + set_system_intr_gate_ist(3, &int3, DEBUG_STACK); + /* int4 can be called from all */ + set_system_intr_gate(4, &overflow); + set_intr_gate(5, &bounds); + set_intr_gate(6, &invalid_op); + set_intr_gate(7, &device_not_available); +#ifdef CONFIG_X86_32 + set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); +#else + set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); +#endif + set_intr_gate(9, &coprocessor_segment_overrun); + set_intr_gate(10, &invalid_TSS); + set_intr_gate(11, &segment_not_present); + set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); + set_intr_gate(13, &general_protection); + set_intr_gate(14, &page_fault); + set_intr_gate(15, &spurious_interrupt_bug); + set_intr_gate(16, &coprocessor_error); + set_intr_gate(17, &alignment_check); +#ifdef CONFIG_X86_MCE + set_intr_gate_ist(18, &machine_check, MCE_STACK); +#endif + set_intr_gate(19, &simd_coprocessor_error); + +#ifdef CONFIG_IA32_EMULATION + set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); +#endif + +#ifdef CONFIG_X86_32 + if (cpu_has_fxsr) { + printk(KERN_INFO "Enabling fast FPU save and restore... "); + set_in_cr4(X86_CR4_OSFXSR); + printk("done.\n"); + } + if (cpu_has_xmm) { + printk(KERN_INFO + "Enabling unmasked SIMD FPU exception support... "); + set_in_cr4(X86_CR4_OSXMMEXCPT); + printk("done.\n"); + } + + set_system_trap_gate(SYSCALL_VECTOR, &system_call); + + /* Reserve all the builtin and the syscall vector: */ + for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) + set_bit(i, used_vectors); + + set_bit(SYSCALL_VECTOR, used_vectors); +#endif + /* + * Should be a barrier for any external CPU state: + */ + cpu_init(); + +#ifdef CONFIG_X86_32 + trap_init_hook(); +#endif +} diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c deleted file mode 100644 index ffb131f74f7..00000000000 --- a/arch/x86/kernel/traps_32.c +++ /dev/null @@ -1,1071 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs - * - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - */ - -/* - * Handle hardware traps and faults. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_EISA -#include -#include -#endif - -#ifdef CONFIG_MCA -#include -#endif - -#if defined(CONFIG_EDAC) -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef CONFIG_X86_64 -#include -#include -#include -#else -#include -#include -#include -#include -#include -#include - -#include "cpu/mcheck/mce.h" - -DECLARE_BITMAP(used_vectors, NR_VECTORS); -EXPORT_SYMBOL_GPL(used_vectors); - -asmlinkage int system_call(void); - -/* Do we ignore FPU interrupts ? */ -char ignore_fpu_irq; - -/* - * The IDT has to be page-aligned to simplify the Pentium - * F0 0F bug workaround.. We have a special link segment - * for this. - */ -gate_desc idt_table[256] - __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; -#endif - -static int ignore_nmis; - -static inline void conditional_sti(struct pt_regs *regs) -{ - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); -} - -static inline void preempt_conditional_sti(struct pt_regs *regs) -{ - inc_preempt_count(); - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); -} - -static inline void preempt_conditional_cli(struct pt_regs *regs) -{ - if (regs->flags & X86_EFLAGS_IF) - local_irq_disable(); - dec_preempt_count(); -} - -#ifdef CONFIG_X86_32 -static inline void -die_if_kernel(const char *str, struct pt_regs *regs, long err) -{ - if (!user_mode_vm(regs)) - die(str, regs, err); -} - -/* - * Perform the lazy TSS's I/O bitmap copy. If the TSS has an - * invalid offset set (the LAZY one) and the faulting thread has - * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, - * we set the offset field correctly and return 1. - */ -static int lazy_iobitmap_copy(void) -{ - struct thread_struct *thread; - struct tss_struct *tss; - int cpu; - - cpu = get_cpu(); - tss = &per_cpu(init_tss, cpu); - thread = ¤t->thread; - - if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && - thread->io_bitmap_ptr) { - memcpy(tss->io_bitmap, thread->io_bitmap_ptr, - thread->io_bitmap_max); - /* - * If the previously set map was extending to higher ports - * than the current one, pad extra space with 0xff (no access). - */ - if (thread->io_bitmap_max < tss->io_bitmap_max) { - memset((char *) tss->io_bitmap + - thread->io_bitmap_max, 0xff, - tss->io_bitmap_max - thread->io_bitmap_max); - } - tss->io_bitmap_max = thread->io_bitmap_max; - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - tss->io_bitmap_owner = thread; - put_cpu(); - - return 1; - } - put_cpu(); - - return 0; -} -#endif - -static void __kprobes -do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, - long error_code, siginfo_t *info) -{ - struct task_struct *tsk = current; - -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) { - /* - * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. - * On nmi (interrupt 2), do_trap should not be called. - */ - if (trapnr < 6) - goto vm86_trap; - goto trap_signal; - } -#endif - - if (!user_mode(regs)) - goto kernel_trap; - -#ifdef CONFIG_X86_32 -trap_signal: -#endif - /* - * We want error_code and trap_no set for userspace faults and - * kernelspace faults which result in die(), but not - * kernelspace faults which are fixed up. die() gives the - * process no chance to handle the signal and notice the - * kernel fault information, so that won't result in polluting - * the information about previously queued, but not yet - * delivered, faults. See also do_general_protection below. - */ - tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; - -#ifdef CONFIG_X86_64 - if (show_unhandled_signals && unhandled_signal(tsk, signr) && - printk_ratelimit()) { - printk(KERN_INFO - "%s[%d] trap %s ip:%lx sp:%lx error:%lx", - tsk->comm, tsk->pid, str, - regs->ip, regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } -#endif - - if (info) - force_sig_info(signr, info, tsk); - else - force_sig(signr, tsk); - return; - -kernel_trap: - if (!fixup_exception(regs)) { - tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; - die(str, regs, error_code); - } - return; - -#ifdef CONFIG_X86_32 -vm86_trap: - if (handle_vm86_trap((struct kernel_vm86_regs *) regs, - error_code, trapnr)) - goto trap_signal; - return; -#endif -} - -#define DO_ERROR(trapnr, signr, str, name) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, NULL); \ -} - -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void __user *)siaddr; \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, &info); \ -} - -DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) -DO_ERROR(4, SIGSEGV, "overflow", overflow) -DO_ERROR(5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) -DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) -DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) -#ifdef CONFIG_X86_32 -DO_ERROR(12, SIGBUS, "stack segment", stack_segment) -#endif -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) - -#ifdef CONFIG_X86_64 -/* Runs on IST stack */ -dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) -{ - if (notify_die(DIE_TRAP, "stack segment", regs, error_code, - 12, SIGBUS) == NOTIFY_STOP) - return; - preempt_conditional_sti(regs); - do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); -} - -dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) -{ - static const char str[] = "double fault"; - struct task_struct *tsk = current; - - /* Return not checked because double check cannot be ignored */ - notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); - - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 8; - - /* This is always a kernel trap and never fixable (and thus must - never return). */ - for (;;) - die(str, regs, error_code); -} -#endif - -dotraplinkage void __kprobes -do_general_protection(struct pt_regs *regs, long error_code) -{ - struct task_struct *tsk; - - conditional_sti(regs); - -#ifdef CONFIG_X86_32 - if (lazy_iobitmap_copy()) { - /* restart the faulting instruction */ - return; - } - - if (regs->flags & X86_VM_MASK) - goto gp_in_vm86; -#endif - - tsk = current; - if (!user_mode(regs)) - goto gp_in_kernel; - - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 13; - - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - printk(KERN_INFO - "%s[%d] general protection ip:%lx sp:%lx error:%lx", - tsk->comm, task_pid_nr(tsk), - regs->ip, regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } - - force_sig(SIGSEGV, tsk); - return; - -#ifdef CONFIG_X86_32 -gp_in_vm86: - local_irq_enable(); - handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); - return; -#endif - -gp_in_kernel: - if (fixup_exception(regs)) - return; - - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 13; - if (notify_die(DIE_GPF, "general protection fault", regs, - error_code, 13, SIGSEGV) == NOTIFY_STOP) - return; - die("general protection fault", regs, error_code); -} - -static notrace __kprobes void -mem_parity_error(unsigned char reason, struct pt_regs *regs) -{ - printk(KERN_EMERG - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - printk(KERN_EMERG - "You have some hardware problem, likely on the PCI bus.\n"); - -#if defined(CONFIG_EDAC) - if (edac_handler_set()) { - edac_atomic_assert_error(); - return; - } -#endif - - if (panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); - - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); - - /* Clear and disable the memory parity error line. */ - reason = (reason & 0xf) | 4; - outb(reason, 0x61); -} - -static notrace __kprobes void -io_check_error(unsigned char reason, struct pt_regs *regs) -{ - unsigned long i; - - printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); - show_registers(regs); - - /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & 0xf) | 8; - outb(reason, 0x61); - - i = 2000; - while (--i) - udelay(1000); - - reason &= ~8; - outb(reason, 0x61); -} - -static notrace __kprobes void -unknown_nmi_error(unsigned char reason, struct pt_regs *regs) -{ - if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == - NOTIFY_STOP) - return; -#ifdef CONFIG_MCA - /* - * Might actually be able to figure out what the guilty party - * is: - */ - if (MCA_bus) { - mca_handle_nmi(); - return; - } -#endif - printk(KERN_EMERG - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); - if (panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); - - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); -} - -#ifdef CONFIG_X86_32 -static DEFINE_SPINLOCK(nmi_print_lock); - -void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) -{ - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - spin_lock(&nmi_print_lock); - /* - * We are in trouble anyway, lets at least try - * to get a message out: - */ - bust_spinlocks(1); - printk(KERN_EMERG "%s", str); - printk(" on CPU%d, ip %08lx, registers:\n", - smp_processor_id(), regs->ip); - show_registers(regs); - if (do_panic) - panic("Non maskable interrupt"); - console_silent(); - spin_unlock(&nmi_print_lock); - bust_spinlocks(0); - - /* - * If we are in kernel we are probably nested up pretty bad - * and might aswell get out now while we still can: - */ - if (!user_mode_vm(regs)) { - current->thread.trap_no = 2; - crash_kexec(regs); - } - - do_exit(SIGSEGV); -} -#endif - -static notrace __kprobes void default_do_nmi(struct pt_regs *regs) -{ - unsigned char reason = 0; - int cpu; - - cpu = smp_processor_id(); - - /* Only the BSP gets external NMIs from the system. */ - if (!cpu) - reason = get_nmi_reason(); - - if (!(reason & 0xc0)) { - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Ok, so this is none of the documented NMI sources, - * so it must be the NMI watchdog. - */ - if (nmi_watchdog_tick(regs, reason)) - return; - if (!do_nmi_callback(regs, cpu)) - unknown_nmi_error(reason, regs); -#else - unknown_nmi_error(reason, regs); -#endif - - return; - } - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) - return; - - /* AK: following checks seem to be broken on modern chipsets. FIXME */ - if (reason & 0x80) - mem_parity_error(reason, regs); - if (reason & 0x40) - io_check_error(reason, regs); -#ifdef CONFIG_X86_32 - /* - * Reassert NMI in case it became active meanwhile - * as it's edge-triggered: - */ - reassert_nmi(); -#endif -} - -dotraplinkage notrace __kprobes void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_enter(); - -#ifdef CONFIG_X86_32 - { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } -#else - add_pda(__nmi_count, 1); -#endif - - if (!ignore_nmis) - default_do_nmi(regs); - - nmi_exit(); -} - -void stop_nmi(void) -{ - acpi_nmi_disable(); - ignore_nmis++; -} - -void restart_nmi(void) -{ - ignore_nmis--; - acpi_nmi_enable(); -} - -/* May run on IST stack. */ -dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) -{ -#ifdef CONFIG_KPROBES - if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) - == NOTIFY_STOP) - return; -#else - if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) - == NOTIFY_STOP) - return; -#endif - - preempt_conditional_sti(regs); - do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); - preempt_conditional_cli(regs); -} - -#ifdef CONFIG_X86_64 -/* Help handler running on IST stack to switch back to user stack - for scheduling or signal handling. The actual stack switch is done in - entry.S */ -asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) -{ - struct pt_regs *regs = eregs; - /* Did already sync */ - if (eregs == (struct pt_regs *)eregs->sp) - ; - /* Exception from user space */ - else if (user_mode(eregs)) - regs = task_pt_regs(current); - /* Exception from kernel and interrupts are enabled. Move to - kernel process stack. */ - else if (eregs->flags & X86_EFLAGS_IF) - regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); - if (eregs != regs) - *regs = *eregs; - return regs; -} -#endif - -/* - * Our handling of the processor debug registers is non-trivial. - * We do not clear them on entry and exit from the kernel. Therefore - * it is possible to get a watchpoint trap here from inside the kernel. - * However, the code in ./ptrace.c has ensured that the user can - * only set watchpoints on userspace addresses. Therefore the in-kernel - * watchpoint trap can only occur in code which is reading/writing - * from user space. Such code must not hold kernel locks (since it - * can equally take a page fault), therefore it is safe to call - * force_sig_info even though that claims and releases locks. - * - * Code in ./signal.c ensures that the debug control register - * is restored before we deliver any signal, and therefore that - * user code runs with the correct debug control register even though - * we clear it here. - * - * Being careful here means that we don't have to be as careful in a - * lot of more complicated places (task switching can be a bit lazy - * about restoring all the debug state, and ptrace doesn't have to - * find every occurrence of the TF bit that could be saved away even - * by user code) - * - * May run on IST stack. - */ -dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) -{ - struct task_struct *tsk = current; - unsigned long condition; - int si_code; - - get_debugreg(condition, 6); - - /* - * The processor cleared BTF, so don't mark that we need it set. - */ - clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); - tsk->thread.debugctlmsr = 0; - - if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, - SIGTRAP) == NOTIFY_STOP) - return; - - /* It's safe to allow irq's after DR6 has been saved */ - preempt_conditional_sti(regs); - - /* Mask out spurious debug traps due to lazy DR7 setting */ - if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg7) - goto clear_dr7; - } - -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto debug_vm86; -#endif - - /* Save debug status register where ptrace can see it */ - tsk->thread.debugreg6 = condition; - - /* - * Single-stepping through TF: make sure we ignore any events in - * kernel space (but re-enable TF when returning to user mode). - */ - if (condition & DR_STEP) { - if (!user_mode(regs)) - goto clear_TF_reenable; - } - - si_code = get_si_code(condition); - /* Ok, finally something we can handle */ - send_sigtrap(tsk, regs, error_code, si_code); - - /* - * Disable additional traps. They'll be re-enabled when - * the signal is delivered. - */ -clear_dr7: - set_debugreg(0, 7); - preempt_conditional_cli(regs); - return; - -#ifdef CONFIG_X86_32 -debug_vm86: - handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - preempt_conditional_cli(regs); - return; -#endif - -clear_TF_reenable: - set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - regs->flags &= ~X86_EFLAGS_TF; - preempt_conditional_cli(regs); - return; -} - -#ifdef CONFIG_X86_64 -static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) -{ - if (fixup_exception(regs)) - return 1; - - notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); - /* Illegal floating point operation in the kernel */ - current->thread.trap_no = trapnr; - die(str, regs, 0); - return 0; -} -#endif - -/* - * Note that we play around with the 'TS' bit in an attempt to get - * the correct behaviour even in the presence of the asynchronous - * IRQ13 behaviour - */ -void math_error(void __user *ip) -{ - struct task_struct *task; - siginfo_t info; - unsigned short cwd, swd; - - /* - * Save the info for the exception handler and clear the error. - */ - task = current; - save_init_fpu(task); - task->thread.trap_no = 16; - task->thread.error_code = 0; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = __SI_FAULT; - info.si_addr = ip; - /* - * (~cwd & swd) will mask out exceptions that are not set to unmasked - * status. 0x3f is the exception bits in these regs, 0x200 is the - * C1 reg you need in case of a stack fault, 0x040 is the stack - * fault bit. We should only be taking one exception at a time, - * so if this combination doesn't produce any single exception, - * then we have a bad program that isn't synchronizing its FPU usage - * and it will suffer the consequences since we won't be able to - * fully reproduce the context of the exception - */ - cwd = get_fpu_cwd(task); - swd = get_fpu_swd(task); - switch (swd & ~cwd & 0x3f) { - case 0x000: /* No unmasked exception */ -#ifdef CONFIG_X86_32 - return; -#endif - default: /* Multiple exceptions */ - break; - case 0x001: /* Invalid Op */ - /* - * swd & 0x240 == 0x040: Stack Underflow - * swd & 0x240 == 0x240: Stack Overflow - * User must clear the SF bit (0x40) if set - */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; - } - force_sig_info(SIGFPE, &info, task); -} - -dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) -{ - conditional_sti(regs); - -#ifdef CONFIG_X86_32 - ignore_fpu_irq = 1; -#else - if (!user_mode(regs) && - kernel_math_error(regs, "kernel x87 math error", 16)) - return; -#endif - - math_error((void __user *)regs->ip); -} - -static void simd_math_error(void __user *ip) -{ - struct task_struct *task; - siginfo_t info; - unsigned short mxcsr; - - /* - * Save the info for the exception handler and clear the error. - */ - task = current; - save_init_fpu(task); - task->thread.trap_no = 19; - task->thread.error_code = 0; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = __SI_FAULT; - info.si_addr = ip; - /* - * The SIMD FPU exceptions are handled a little differently, as there - * is only a single status/control register. Thus, to determine which - * unmasked exception was caught we must mask the exception mask bits - * at 0x1f80, and then use these to mask the exception bits at 0x3f. - */ - mxcsr = get_fpu_mxcsr(task); - switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { - case 0x000: - default: - break; - case 0x001: /* Invalid Op */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; - } - force_sig_info(SIGFPE, &info, task); -} - -dotraplinkage void -do_simd_coprocessor_error(struct pt_regs *regs, long error_code) -{ - conditional_sti(regs); - -#ifdef CONFIG_X86_32 - if (cpu_has_xmm) { - /* Handle SIMD FPU exceptions on PIII+ processors. */ - ignore_fpu_irq = 1; - simd_math_error((void __user *)regs->ip); - return; - } - /* - * Handle strange cache flush from user space exception - * in all other cases. This is undocumented behaviour. - */ - if (regs->flags & X86_VM_MASK) { - handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); - return; - } - current->thread.trap_no = 19; - current->thread.error_code = error_code; - die_if_kernel("cache flush denied", regs, error_code); - force_sig(SIGSEGV, current); -#else - if (!user_mode(regs) && - kernel_math_error(regs, "kernel simd math error", 19)) - return; - simd_math_error((void __user *)regs->ip); -#endif -} - -dotraplinkage void -do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) -{ - conditional_sti(regs); -#if 0 - /* No need to warn about this any longer. */ - printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); -#endif -} - -#ifdef CONFIG_X86_32 -unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) -{ - struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); - unsigned long base = (kesp - uesp) & -THREAD_SIZE; - unsigned long new_kesp = kesp - base; - unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; - __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; - - /* Set up base for espfix segment */ - desc &= 0x00f0ff0000000000ULL; - desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | - ((((__u64)base) << 32) & 0xff00000000000000ULL) | - ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | - (lim_pages & 0xffff); - *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; - - return new_kesp; -} -#else -asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) -{ -} - -asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) -{ -} -#endif - -/* - * 'math_state_restore()' saves the current math information in the - * old math state array, and gets the new ones from the current task - * - * Careful.. There are problems with IBM-designed IRQ13 behaviour. - * Don't touch unless you *really* know how it works. - * - * Must be called with kernel preemption disabled (in this case, - * local interrupts are disabled at the call-site in entry.S). - */ -asmlinkage void math_state_restore(void) -{ - struct thread_info *thread = current_thread_info(); - struct task_struct *tsk = thread->task; - - if (!tsk_used_math(tsk)) { - local_irq_enable(); - /* - * does a slab alloc which can sleep - */ - if (init_fpu(tsk)) { - /* - * ran out of memory! - */ - do_group_exit(SIGKILL); - return; - } - local_irq_disable(); - } - - clts(); /* Allow maths ops (or we recurse) */ -#ifdef CONFIG_X86_32 - restore_fpu(tsk); -#else - /* - * Paranoid restore. send a SIGSEGV if we fail to restore the state. - */ - if (unlikely(restore_fpu_checking(tsk))) { - stts(); - force_sig(SIGSEGV, tsk); - return; - } -#endif - thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ - tsk->fpu_counter++; -} -EXPORT_SYMBOL_GPL(math_state_restore); - -#ifndef CONFIG_MATH_EMULATION -asmlinkage void math_emulate(long arg) -{ - printk(KERN_EMERG - "math-emulation not enabled and no coprocessor found.\n"); - printk(KERN_EMERG "killing %s.\n", current->comm); - force_sig(SIGFPE, current); - schedule(); -} -#endif /* CONFIG_MATH_EMULATION */ - -dotraplinkage void __kprobes -do_device_not_available(struct pt_regs *regs, long error) -{ -#ifdef CONFIG_X86_32 - if (read_cr0() & X86_CR0_EM) { - conditional_sti(regs); - math_emulate(0); - } else { - math_state_restore(); /* interrupts still off */ - conditional_sti(regs); - } -#else - math_state_restore(); -#endif -} - -#ifdef CONFIG_X86_32 -#ifdef CONFIG_X86_MCE -dotraplinkage void __kprobes do_machine_check(struct pt_regs *regs, long error) -{ - conditional_sti(regs); - machine_check_vector(regs, error); -} -#endif - -dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) -{ - siginfo_t info; - local_irq_enable(); - - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_BADSTK; - info.si_addr = 0; - if (notify_die(DIE_TRAP, "iret exception", - regs, error_code, 32, SIGILL) == NOTIFY_STOP) - return; - do_trap(32, SIGILL, "iret exception", regs, error_code, &info); -} -#endif - -void __init trap_init(void) -{ -#ifdef CONFIG_X86_32 - int i; -#endif - -#ifdef CONFIG_EISA - void __iomem *p = early_ioremap(0x0FFFD9, 4); - - if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) - EISA_bus = 1; - early_iounmap(p, 4); -#endif - - set_intr_gate(0, ÷_error); - set_intr_gate_ist(1, &debug, DEBUG_STACK); - set_intr_gate_ist(2, &nmi, NMI_STACK); - /* int3 can be called from all */ - set_system_intr_gate_ist(3, &int3, DEBUG_STACK); - /* int4 can be called from all */ - set_system_intr_gate(4, &overflow); - set_intr_gate(5, &bounds); - set_intr_gate(6, &invalid_op); - set_intr_gate(7, &device_not_available); -#ifdef CONFIG_X86_32 - set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); -#else - set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); -#endif - set_intr_gate(9, &coprocessor_segment_overrun); - set_intr_gate(10, &invalid_TSS); - set_intr_gate(11, &segment_not_present); - set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); - set_intr_gate(13, &general_protection); - set_intr_gate(14, &page_fault); - set_intr_gate(15, &spurious_interrupt_bug); - set_intr_gate(16, &coprocessor_error); - set_intr_gate(17, &alignment_check); -#ifdef CONFIG_X86_MCE - set_intr_gate_ist(18, &machine_check, MCE_STACK); -#endif - set_intr_gate(19, &simd_coprocessor_error); - -#ifdef CONFIG_IA32_EMULATION - set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); -#endif - -#ifdef CONFIG_X86_32 - if (cpu_has_fxsr) { - printk(KERN_INFO "Enabling fast FPU save and restore... "); - set_in_cr4(X86_CR4_OSFXSR); - printk("done.\n"); - } - if (cpu_has_xmm) { - printk(KERN_INFO - "Enabling unmasked SIMD FPU exception support... "); - set_in_cr4(X86_CR4_OSXMMEXCPT); - printk("done.\n"); - } - - set_system_trap_gate(SYSCALL_VECTOR, &system_call); - - /* Reserve all the builtin and the syscall vector: */ - for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) - set_bit(i, used_vectors); - - set_bit(SYSCALL_VECTOR, used_vectors); -#endif - /* - * Should be a barrier for any external CPU state: - */ - cpu_init(); - -#ifdef CONFIG_X86_32 - trap_init_hook(); -#endif -} diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c deleted file mode 100644 index 60ecc855ab8..00000000000 --- a/arch/x86/kernel/traps_64.c +++ /dev/null @@ -1,1070 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs - * - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - */ - -/* - * Handle hardware traps and faults. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_EISA -#include -#include -#endif - -#ifdef CONFIG_MCA -#include -#endif - -#if defined(CONFIG_EDAC) -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef CONFIG_X86_64 -#include -#include -#include -#else -#include -#include -#include -#include -#include - -#include "cpu/mcheck/mce.h" - -DECLARE_BITMAP(used_vectors, NR_VECTORS); -EXPORT_SYMBOL_GPL(used_vectors); - -asmlinkage int system_call(void); - -/* Do we ignore FPU interrupts ? */ -char ignore_fpu_irq; - -/* - * The IDT has to be page-aligned to simplify the Pentium - * F0 0F bug workaround.. We have a special link segment - * for this. - */ -gate_desc idt_table[256] - __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; -#endif - -static int ignore_nmis; - -static inline void conditional_sti(struct pt_regs *regs) -{ - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); -} - -static inline void preempt_conditional_sti(struct pt_regs *regs) -{ - inc_preempt_count(); - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); -} - -static inline void preempt_conditional_cli(struct pt_regs *regs) -{ - if (regs->flags & X86_EFLAGS_IF) - local_irq_disable(); - dec_preempt_count(); -} - -#ifdef CONFIG_X86_32 -static inline void -die_if_kernel(const char *str, struct pt_regs *regs, long err) -{ - if (!user_mode_vm(regs)) - die(str, regs, err); -} - -/* - * Perform the lazy TSS's I/O bitmap copy. If the TSS has an - * invalid offset set (the LAZY one) and the faulting thread has - * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, - * we set the offset field correctly and return 1. - */ -static int lazy_iobitmap_copy(void) -{ - struct thread_struct *thread; - struct tss_struct *tss; - int cpu; - - cpu = get_cpu(); - tss = &per_cpu(init_tss, cpu); - thread = ¤t->thread; - - if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && - thread->io_bitmap_ptr) { - memcpy(tss->io_bitmap, thread->io_bitmap_ptr, - thread->io_bitmap_max); - /* - * If the previously set map was extending to higher ports - * than the current one, pad extra space with 0xff (no access). - */ - if (thread->io_bitmap_max < tss->io_bitmap_max) { - memset((char *) tss->io_bitmap + - thread->io_bitmap_max, 0xff, - tss->io_bitmap_max - thread->io_bitmap_max); - } - tss->io_bitmap_max = thread->io_bitmap_max; - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - tss->io_bitmap_owner = thread; - put_cpu(); - - return 1; - } - put_cpu(); - - return 0; -} -#endif - -static void __kprobes -do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, - long error_code, siginfo_t *info) -{ - struct task_struct *tsk = current; - -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) { - /* - * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. - * On nmi (interrupt 2), do_trap should not be called. - */ - if (trapnr < 6) - goto vm86_trap; - goto trap_signal; - } -#endif - - if (!user_mode(regs)) - goto kernel_trap; - -#ifdef CONFIG_X86_32 -trap_signal: -#endif - /* - * We want error_code and trap_no set for userspace faults and - * kernelspace faults which result in die(), but not - * kernelspace faults which are fixed up. die() gives the - * process no chance to handle the signal and notice the - * kernel fault information, so that won't result in polluting - * the information about previously queued, but not yet - * delivered, faults. See also do_general_protection below. - */ - tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; - -#ifdef CONFIG_X86_64 - if (show_unhandled_signals && unhandled_signal(tsk, signr) && - printk_ratelimit()) { - printk(KERN_INFO - "%s[%d] trap %s ip:%lx sp:%lx error:%lx", - tsk->comm, tsk->pid, str, - regs->ip, regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } -#endif - - if (info) - force_sig_info(signr, info, tsk); - else - force_sig(signr, tsk); - return; - -kernel_trap: - if (!fixup_exception(regs)) { - tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; - die(str, regs, error_code); - } - return; - -#ifdef CONFIG_X86_32 -vm86_trap: - if (handle_vm86_trap((struct kernel_vm86_regs *) regs, - error_code, trapnr)) - goto trap_signal; - return; -#endif -} - -#define DO_ERROR(trapnr, signr, str, name) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, NULL); \ -} - -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void __user *)siaddr; \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, &info); \ -} - -DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) -DO_ERROR(4, SIGSEGV, "overflow", overflow) -DO_ERROR(5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) -DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) -DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) -#ifdef CONFIG_X86_32 -DO_ERROR(12, SIGBUS, "stack segment", stack_segment) -#endif -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) - -#ifdef CONFIG_X86_64 -/* Runs on IST stack */ -dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) -{ - if (notify_die(DIE_TRAP, "stack segment", regs, error_code, - 12, SIGBUS) == NOTIFY_STOP) - return; - preempt_conditional_sti(regs); - do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); -} - -dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) -{ - static const char str[] = "double fault"; - struct task_struct *tsk = current; - - /* Return not checked because double check cannot be ignored */ - notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); - - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 8; - - /* This is always a kernel trap and never fixable (and thus must - never return). */ - for (;;) - die(str, regs, error_code); -} -#endif - -dotraplinkage void __kprobes -do_general_protection(struct pt_regs *regs, long error_code) -{ - struct task_struct *tsk; - - conditional_sti(regs); - -#ifdef CONFIG_X86_32 - if (lazy_iobitmap_copy()) { - /* restart the faulting instruction */ - return; - } - - if (regs->flags & X86_VM_MASK) - goto gp_in_vm86; -#endif - - tsk = current; - if (!user_mode(regs)) - goto gp_in_kernel; - - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 13; - - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - printk(KERN_INFO - "%s[%d] general protection ip:%lx sp:%lx error:%lx", - tsk->comm, task_pid_nr(tsk), - regs->ip, regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } - - force_sig(SIGSEGV, tsk); - return; - -#ifdef CONFIG_X86_32 -gp_in_vm86: - local_irq_enable(); - handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); - return; -#endif - -gp_in_kernel: - if (fixup_exception(regs)) - return; - - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 13; - if (notify_die(DIE_GPF, "general protection fault", regs, - error_code, 13, SIGSEGV) == NOTIFY_STOP) - return; - die("general protection fault", regs, error_code); -} - -static notrace __kprobes void -mem_parity_error(unsigned char reason, struct pt_regs *regs) -{ - printk(KERN_EMERG - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - printk(KERN_EMERG - "You have some hardware problem, likely on the PCI bus.\n"); - -#if defined(CONFIG_EDAC) - if (edac_handler_set()) { - edac_atomic_assert_error(); - return; - } -#endif - - if (panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); - - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); - - /* Clear and disable the memory parity error line. */ - reason = (reason & 0xf) | 4; - outb(reason, 0x61); -} - -static notrace __kprobes void -io_check_error(unsigned char reason, struct pt_regs *regs) -{ - unsigned long i; - - printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); - show_registers(regs); - - /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & 0xf) | 8; - outb(reason, 0x61); - - i = 2000; - while (--i) - udelay(1000); - - reason &= ~8; - outb(reason, 0x61); -} - -static notrace __kprobes void -unknown_nmi_error(unsigned char reason, struct pt_regs *regs) -{ - if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == - NOTIFY_STOP) - return; -#ifdef CONFIG_MCA - /* - * Might actually be able to figure out what the guilty party - * is: - */ - if (MCA_bus) { - mca_handle_nmi(); - return; - } -#endif - printk(KERN_EMERG - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); - if (panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); - - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); -} - -#ifdef CONFIG_X86_32 -static DEFINE_SPINLOCK(nmi_print_lock); - -void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) -{ - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - spin_lock(&nmi_print_lock); - /* - * We are in trouble anyway, lets at least try - * to get a message out: - */ - bust_spinlocks(1); - printk(KERN_EMERG "%s", str); - printk(" on CPU%d, ip %08lx, registers:\n", - smp_processor_id(), regs->ip); - show_registers(regs); - if (do_panic) - panic("Non maskable interrupt"); - console_silent(); - spin_unlock(&nmi_print_lock); - bust_spinlocks(0); - - /* - * If we are in kernel we are probably nested up pretty bad - * and might aswell get out now while we still can: - */ - if (!user_mode_vm(regs)) { - current->thread.trap_no = 2; - crash_kexec(regs); - } - - do_exit(SIGSEGV); -} -#endif - -static notrace __kprobes void default_do_nmi(struct pt_regs *regs) -{ - unsigned char reason = 0; - int cpu; - - cpu = smp_processor_id(); - - /* Only the BSP gets external NMIs from the system. */ - if (!cpu) - reason = get_nmi_reason(); - - if (!(reason & 0xc0)) { - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Ok, so this is none of the documented NMI sources, - * so it must be the NMI watchdog. - */ - if (nmi_watchdog_tick(regs, reason)) - return; - if (!do_nmi_callback(regs, cpu)) - unknown_nmi_error(reason, regs); -#else - unknown_nmi_error(reason, regs); -#endif - - return; - } - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) - return; - - /* AK: following checks seem to be broken on modern chipsets. FIXME */ - if (reason & 0x80) - mem_parity_error(reason, regs); - if (reason & 0x40) - io_check_error(reason, regs); -#ifdef CONFIG_X86_32 - /* - * Reassert NMI in case it became active meanwhile - * as it's edge-triggered: - */ - reassert_nmi(); -#endif -} - -dotraplinkage notrace __kprobes void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_enter(); - -#ifdef CONFIG_X86_32 - { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } -#else - add_pda(__nmi_count, 1); -#endif - - if (!ignore_nmis) - default_do_nmi(regs); - - nmi_exit(); -} - -void stop_nmi(void) -{ - acpi_nmi_disable(); - ignore_nmis++; -} - -void restart_nmi(void) -{ - ignore_nmis--; - acpi_nmi_enable(); -} - -/* May run on IST stack. */ -dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) -{ -#ifdef CONFIG_KPROBES - if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) - == NOTIFY_STOP) - return; -#else - if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) - == NOTIFY_STOP) - return; -#endif - - preempt_conditional_sti(regs); - do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); - preempt_conditional_cli(regs); -} - -#ifdef CONFIG_X86_64 -/* Help handler running on IST stack to switch back to user stack - for scheduling or signal handling. The actual stack switch is done in - entry.S */ -asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) -{ - struct pt_regs *regs = eregs; - /* Did already sync */ - if (eregs == (struct pt_regs *)eregs->sp) - ; - /* Exception from user space */ - else if (user_mode(eregs)) - regs = task_pt_regs(current); - /* Exception from kernel and interrupts are enabled. Move to - kernel process stack. */ - else if (eregs->flags & X86_EFLAGS_IF) - regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); - if (eregs != regs) - *regs = *eregs; - return regs; -} -#endif - -/* - * Our handling of the processor debug registers is non-trivial. - * We do not clear them on entry and exit from the kernel. Therefore - * it is possible to get a watchpoint trap here from inside the kernel. - * However, the code in ./ptrace.c has ensured that the user can - * only set watchpoints on userspace addresses. Therefore the in-kernel - * watchpoint trap can only occur in code which is reading/writing - * from user space. Such code must not hold kernel locks (since it - * can equally take a page fault), therefore it is safe to call - * force_sig_info even though that claims and releases locks. - * - * Code in ./signal.c ensures that the debug control register - * is restored before we deliver any signal, and therefore that - * user code runs with the correct debug control register even though - * we clear it here. - * - * Being careful here means that we don't have to be as careful in a - * lot of more complicated places (task switching can be a bit lazy - * about restoring all the debug state, and ptrace doesn't have to - * find every occurrence of the TF bit that could be saved away even - * by user code) - * - * May run on IST stack. - */ -dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) -{ - struct task_struct *tsk = current; - unsigned long condition; - int si_code; - - get_debugreg(condition, 6); - - /* - * The processor cleared BTF, so don't mark that we need it set. - */ - clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); - tsk->thread.debugctlmsr = 0; - - if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, - SIGTRAP) == NOTIFY_STOP) - return; - - /* It's safe to allow irq's after DR6 has been saved */ - preempt_conditional_sti(regs); - - /* Mask out spurious debug traps due to lazy DR7 setting */ - if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg7) - goto clear_dr7; - } - -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto debug_vm86; -#endif - - /* Save debug status register where ptrace can see it */ - tsk->thread.debugreg6 = condition; - - /* - * Single-stepping through TF: make sure we ignore any events in - * kernel space (but re-enable TF when returning to user mode). - */ - if (condition & DR_STEP) { - if (!user_mode(regs)) - goto clear_TF_reenable; - } - - si_code = get_si_code(condition); - /* Ok, finally something we can handle */ - send_sigtrap(tsk, regs, error_code, si_code); - - /* - * Disable additional traps. They'll be re-enabled when - * the signal is delivered. - */ -clear_dr7: - set_debugreg(0, 7); - preempt_conditional_cli(regs); - return; - -#ifdef CONFIG_X86_32 -debug_vm86: - handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - preempt_conditional_cli(regs); - return; -#endif - -clear_TF_reenable: - set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - regs->flags &= ~X86_EFLAGS_TF; - preempt_conditional_cli(regs); - return; -} - -#ifdef CONFIG_X86_64 -static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) -{ - if (fixup_exception(regs)) - return 1; - - notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); - /* Illegal floating point operation in the kernel */ - current->thread.trap_no = trapnr; - die(str, regs, 0); - return 0; -} -#endif - -/* - * Note that we play around with the 'TS' bit in an attempt to get - * the correct behaviour even in the presence of the asynchronous - * IRQ13 behaviour - */ -void math_error(void __user *ip) -{ - struct task_struct *task; - siginfo_t info; - unsigned short cwd, swd; - - /* - * Save the info for the exception handler and clear the error. - */ - task = current; - save_init_fpu(task); - task->thread.trap_no = 16; - task->thread.error_code = 0; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = __SI_FAULT; - info.si_addr = ip; - /* - * (~cwd & swd) will mask out exceptions that are not set to unmasked - * status. 0x3f is the exception bits in these regs, 0x200 is the - * C1 reg you need in case of a stack fault, 0x040 is the stack - * fault bit. We should only be taking one exception at a time, - * so if this combination doesn't produce any single exception, - * then we have a bad program that isn't synchronizing its FPU usage - * and it will suffer the consequences since we won't be able to - * fully reproduce the context of the exception - */ - cwd = get_fpu_cwd(task); - swd = get_fpu_swd(task); - switch (swd & ~cwd & 0x3f) { - case 0x000: /* No unmasked exception */ -#ifdef CONFIG_X86_32 - return; -#endif - default: /* Multiple exceptions */ - break; - case 0x001: /* Invalid Op */ - /* - * swd & 0x240 == 0x040: Stack Underflow - * swd & 0x240 == 0x240: Stack Overflow - * User must clear the SF bit (0x40) if set - */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; - } - force_sig_info(SIGFPE, &info, task); -} - -dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) -{ - conditional_sti(regs); - -#ifdef CONFIG_X86_32 - ignore_fpu_irq = 1; -#else - if (!user_mode(regs) && - kernel_math_error(regs, "kernel x87 math error", 16)) - return; -#endif - - math_error((void __user *)regs->ip); -} - -static void simd_math_error(void __user *ip) -{ - struct task_struct *task; - siginfo_t info; - unsigned short mxcsr; - - /* - * Save the info for the exception handler and clear the error. - */ - task = current; - save_init_fpu(task); - task->thread.trap_no = 19; - task->thread.error_code = 0; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = __SI_FAULT; - info.si_addr = ip; - /* - * The SIMD FPU exceptions are handled a little differently, as there - * is only a single status/control register. Thus, to determine which - * unmasked exception was caught we must mask the exception mask bits - * at 0x1f80, and then use these to mask the exception bits at 0x3f. - */ - mxcsr = get_fpu_mxcsr(task); - switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { - case 0x000: - default: - break; - case 0x001: /* Invalid Op */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; - } - force_sig_info(SIGFPE, &info, task); -} - -dotraplinkage void -do_simd_coprocessor_error(struct pt_regs *regs, long error_code) -{ - conditional_sti(regs); - -#ifdef CONFIG_X86_32 - if (cpu_has_xmm) { - /* Handle SIMD FPU exceptions on PIII+ processors. */ - ignore_fpu_irq = 1; - simd_math_error((void __user *)regs->ip); - return; - } - /* - * Handle strange cache flush from user space exception - * in all other cases. This is undocumented behaviour. - */ - if (regs->flags & X86_VM_MASK) { - handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); - return; - } - current->thread.trap_no = 19; - current->thread.error_code = error_code; - die_if_kernel("cache flush denied", regs, error_code); - force_sig(SIGSEGV, current); -#else - if (!user_mode(regs) && - kernel_math_error(regs, "kernel simd math error", 19)) - return; - simd_math_error((void __user *)regs->ip); -#endif -} - -dotraplinkage void -do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) -{ - conditional_sti(regs); -#if 0 - /* No need to warn about this any longer. */ - printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); -#endif -} - -#ifdef CONFIG_X86_32 -unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) -{ - struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); - unsigned long base = (kesp - uesp) & -THREAD_SIZE; - unsigned long new_kesp = kesp - base; - unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; - __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; - - /* Set up base for espfix segment */ - desc &= 0x00f0ff0000000000ULL; - desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | - ((((__u64)base) << 32) & 0xff00000000000000ULL) | - ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | - (lim_pages & 0xffff); - *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; - - return new_kesp; -} -#else -asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) -{ -} - -asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) -{ -} -#endif - -/* - * 'math_state_restore()' saves the current math information in the - * old math state array, and gets the new ones from the current task - * - * Careful.. There are problems with IBM-designed IRQ13 behaviour. - * Don't touch unless you *really* know how it works. - * - * Must be called with kernel preemption disabled (in this case, - * local interrupts are disabled at the call-site in entry.S). - */ -asmlinkage void math_state_restore(void) -{ - struct thread_info *thread = current_thread_info(); - struct task_struct *tsk = thread->task; - - if (!tsk_used_math(tsk)) { - local_irq_enable(); - /* - * does a slab alloc which can sleep - */ - if (init_fpu(tsk)) { - /* - * ran out of memory! - */ - do_group_exit(SIGKILL); - return; - } - local_irq_disable(); - } - - clts(); /* Allow maths ops (or we recurse) */ -#ifdef CONFIG_X86_32 - restore_fpu(tsk); -#else - /* - * Paranoid restore. send a SIGSEGV if we fail to restore the state. - */ - if (unlikely(restore_fpu_checking(tsk))) { - stts(); - force_sig(SIGSEGV, tsk); - return; - } -#endif - thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ - tsk->fpu_counter++; -} -EXPORT_SYMBOL_GPL(math_state_restore); - -#ifndef CONFIG_MATH_EMULATION -asmlinkage void math_emulate(long arg) -{ - printk(KERN_EMERG - "math-emulation not enabled and no coprocessor found.\n"); - printk(KERN_EMERG "killing %s.\n", current->comm); - force_sig(SIGFPE, current); - schedule(); -} -#endif /* CONFIG_MATH_EMULATION */ - -dotraplinkage void __kprobes -do_device_not_available(struct pt_regs *regs, long error) -{ -#ifdef CONFIG_X86_32 - if (read_cr0() & X86_CR0_EM) { - conditional_sti(regs); - math_emulate(0); - } else { - math_state_restore(); /* interrupts still off */ - conditional_sti(regs); - } -#else - math_state_restore(); -#endif -} - -#ifdef CONFIG_X86_32 -#ifdef CONFIG_X86_MCE -dotraplinkage void __kprobes do_machine_check(struct pt_regs *regs, long error) -{ - conditional_sti(regs); - machine_check_vector(regs, error); -} -#endif - -dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) -{ - siginfo_t info; - local_irq_enable(); - - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_BADSTK; - info.si_addr = 0; - if (notify_die(DIE_TRAP, "iret exception", - regs, error_code, 32, SIGILL) == NOTIFY_STOP) - return; - do_trap(32, SIGILL, "iret exception", regs, error_code, &info); -} -#endif - -void __init trap_init(void) -{ -#ifdef CONFIG_X86_32 - int i; -#endif - -#ifdef CONFIG_EISA - void __iomem *p = early_ioremap(0x0FFFD9, 4); - - if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) - EISA_bus = 1; - early_iounmap(p, 4); -#endif - - set_intr_gate(0, ÷_error); - set_intr_gate_ist(1, &debug, DEBUG_STACK); - set_intr_gate_ist(2, &nmi, NMI_STACK); - /* int3 can be called from all */ - set_system_intr_gate_ist(3, &int3, DEBUG_STACK); - /* int4 can be called from all */ - set_system_intr_gate(4, &overflow); - set_intr_gate(5, &bounds); - set_intr_gate(6, &invalid_op); - set_intr_gate(7, &device_not_available); -#ifdef CONFIG_X86_32 - set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); -#else - set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); -#endif - set_intr_gate(9, &coprocessor_segment_overrun); - set_intr_gate(10, &invalid_TSS); - set_intr_gate(11, &segment_not_present); - set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); - set_intr_gate(13, &general_protection); - set_intr_gate(14, &page_fault); - set_intr_gate(15, &spurious_interrupt_bug); - set_intr_gate(16, &coprocessor_error); - set_intr_gate(17, &alignment_check); -#ifdef CONFIG_X86_MCE - set_intr_gate_ist(18, &machine_check, MCE_STACK); -#endif - set_intr_gate(19, &simd_coprocessor_error); - -#ifdef CONFIG_IA32_EMULATION - set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); -#endif - -#ifdef CONFIG_X86_32 - if (cpu_has_fxsr) { - printk(KERN_INFO "Enabling fast FPU save and restore... "); - set_in_cr4(X86_CR4_OSFXSR); - printk("done.\n"); - } - if (cpu_has_xmm) { - printk(KERN_INFO - "Enabling unmasked SIMD FPU exception support... "); - set_in_cr4(X86_CR4_OSXMMEXCPT); - printk("done.\n"); - } - - set_system_trap_gate(SYSCALL_VECTOR, &system_call); - - /* Reserve all the builtin and the syscall vector: */ - for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) - set_bit(i, used_vectors); - - set_bit(SYSCALL_VECTOR, used_vectors); -#endif - /* - * Should be a barrier for any external CPU state: - */ - cpu_init(); - -#ifdef CONFIG_X86_32 - trap_init_hook(); -#endif -} -- cgit v1.2.3 From dd6e4eba1c03c9562fced21736453396c045f869 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sat, 4 Oct 2008 23:12:40 +0200 Subject: dumpstack: x86: move die_nmi to dumpstack_32.c For some reason die_nmi is still defined in traps.c for i386, but is found in dumpstack_64.c for x86_64. Move it to dumpstack_32.c Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack_32.c | 36 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/traps.c | 37 ------------------------------------- 2 files changed, 36 insertions(+), 37 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index c398b27df6c..dc9ca7ee1c4 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -403,6 +403,42 @@ void die(const char *str, struct pt_regs *regs, long err) oops_end(flags, regs, SIGSEGV); } +static DEFINE_SPINLOCK(nmi_print_lock); + +void notrace __kprobes +die_nmi(char *str, struct pt_regs *regs, int do_panic) +{ + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; + + spin_lock(&nmi_print_lock); + /* + * We are in trouble anyway, lets at least try + * to get a message out: + */ + bust_spinlocks(1); + printk(KERN_EMERG "%s", str); + printk(" on CPU%d, ip %08lx, registers:\n", + smp_processor_id(), regs->ip); + show_registers(regs); + if (do_panic) + panic("Non maskable interrupt"); + console_silent(); + spin_unlock(&nmi_print_lock); + bust_spinlocks(0); + + /* + * If we are in kernel we are probably nested up pretty bad + * and might aswell get out now while we still can: + */ + if (!user_mode_vm(regs)) { + current->thread.trap_no = 2; + crash_kexec(regs); + } + + do_exit(SIGSEGV); +} + static int __init kstack_setup(char *s) { kstack_depth_to_print = simple_strtoul(s, NULL, 0); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ffb131f74f7..e062974cce3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -429,43 +429,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); } -#ifdef CONFIG_X86_32 -static DEFINE_SPINLOCK(nmi_print_lock); - -void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) -{ - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - spin_lock(&nmi_print_lock); - /* - * We are in trouble anyway, lets at least try - * to get a message out: - */ - bust_spinlocks(1); - printk(KERN_EMERG "%s", str); - printk(" on CPU%d, ip %08lx, registers:\n", - smp_processor_id(), regs->ip); - show_registers(regs); - if (do_panic) - panic("Non maskable interrupt"); - console_silent(); - spin_unlock(&nmi_print_lock); - bust_spinlocks(0); - - /* - * If we are in kernel we are probably nested up pretty bad - * and might aswell get out now while we still can: - */ - if (!user_mode_vm(regs)) { - current->thread.trap_no = 2; - crash_kexec(regs); - } - - do_exit(SIGSEGV); -} -#endif - static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; -- cgit v1.2.3 From 161827903bdc124655f4cd976b9f0a5ac6ebf21c Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sat, 4 Oct 2008 23:12:41 +0200 Subject: dumpstack: x86: make printk_address equal - x86_64: use %p to print an address - make i386-version the same as the above The result should be the same on x86_64; on i386 the output only changes if CONFIG_KALLSYMS is turned off, in which case the address is printed twice. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack_32.c | 27 ++------------------------- arch/x86/kernel/dumpstack_64.c | 4 ++-- 2 files changed, 4 insertions(+), 27 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index dc9ca7ee1c4..4e67a005c7a 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -23,31 +23,8 @@ static int die_counter; void printk_address(unsigned long address, int reliable) { -#ifdef CONFIG_KALLSYMS - unsigned long offset = 0; - unsigned long symsize; - const char *symname; - char *modname; - char *delim = ":"; - char namebuf[KSYM_NAME_LEN]; - char reliab[4] = ""; - - symname = kallsyms_lookup(address, &symsize, &offset, - &modname, namebuf); - if (!symname) { - printk(" [<%08lx>]\n", address); - return; - } - if (!reliable) - strcpy(reliab, "? "); - - if (!modname) - modname = delim = ""; - printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n", - address, reliab, delim, modname, delim, symname, offset, symsize); -#else - printk(" [<%08lx>]\n", address); -#endif + printk(" [<%p>] %s%pS\n", (void *) address, + reliable ? "" : "? ", (void *) address); } static inline int valid_stack_ptr(struct thread_info *tinfo, diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 6f1505074db..563554c9068 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -23,8 +23,8 @@ static int die_counter; void printk_address(unsigned long address, int reliable) { - printk(" [<%016lx>] %s%pS\n", - address, reliable ? "" : "? ", (void *) address); + printk(" [<%p>] %s%pS\n", (void *) address, + reliable ? "" : "? ", (void *) address); } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, -- cgit v1.2.3 From 3a18512db00e0eedca86e3db4d2e81f8fe0b1774 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sat, 4 Oct 2008 23:12:42 +0200 Subject: dumpstack: x86: add "end" parameter to valid_stack_ptr and print_context_stack - Add "end" parameter to valid_stack_ptr and print_context_stack - use sizeof(long) as the size of a word on the stack Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack_32.c | 19 +++++++++++++------ arch/x86/kernel/dumpstack_64.c | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 4e67a005c7a..466d55dfeb8 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -28,10 +28,16 @@ void printk_address(unsigned long address, int reliable) } static inline int valid_stack_ptr(struct thread_info *tinfo, - void *p, unsigned int size) + void *p, unsigned int size, void *end) { void *t = tinfo; - return p > t && p <= t + THREAD_SIZE - size; + if (end) { + if (p < end && p >= (end-THREAD_SIZE)) + return 1; + else + return 0; + } + return p > t && p < t + THREAD_SIZE - size; } /* The form of the top of the frame on the stack */ @@ -43,16 +49,17 @@ struct stack_frame { static inline unsigned long print_context_stack(struct thread_info *tinfo, unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data, + unsigned long *end) { struct stack_frame *frame = (struct stack_frame *)bp; - while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) { + while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { unsigned long addr; addr = *stack; if (__kernel_text_address(addr)) { - if ((unsigned long) stack == bp + 4) { + if ((unsigned long) stack == bp + sizeof(long)) { ops->address(data, addr, 1); frame = frame->next_frame; bp = (unsigned long) frame; @@ -96,7 +103,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, data); + bp = print_context_stack(context, stack, bp, ops, data, NULL); /* * Should be after the line below, but somewhere * in early boot context comes out corrupted and we diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 563554c9068..361afa8864b 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -141,7 +141,7 @@ print_context_stack(struct thread_info *tinfo, addr = *stack; if (__kernel_text_address(addr)) { - if ((unsigned long) stack == bp + 8) { + if ((unsigned long) stack == bp + sizeof(long)) { ops->address(data, addr, 1); frame = frame->next_frame; bp = (unsigned long) frame; -- cgit v1.2.3 From 2ac53721f37c79acddaf60f6ff232f56b7abddba Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sat, 4 Oct 2008 23:12:43 +0200 Subject: dumptrace: x86: consistently include loglevel, print stack switch - i386 and x86_64: always printk the 'data' parameter - i386: announce stack switch (irq -> normal) - i386: check if there is a stack switch before announcing it There is a warning that 'context' might come out corrupt in early boot. If this is true it should be fixed, not worked around. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack_32.c | 17 ++++++----------- arch/x86/kernel/dumpstack_64.c | 9 +++++++-- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 466d55dfeb8..517b507bc56 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -104,16 +104,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); bp = print_context_stack(context, stack, bp, ops, data, NULL); - /* - * Should be after the line below, but somewhere - * in early boot context comes out corrupted and we - * can't reference it: - */ - if (ops->stack(data, "IRQ") < 0) - break; + stack = (unsigned long *)context->previous_esp; if (!stack) break; + if (ops->stack(data, "IRQ") < 0) + break; touch_nmi_watchdog(); } } @@ -134,6 +130,7 @@ static void print_trace_warning(void *data, char *msg) static int print_trace_stack(void *data, char *name) { + printk("%s <%s> ", (char *)data, name); return 0; } @@ -142,11 +139,9 @@ static int print_trace_stack(void *data, char *name) */ static void print_trace_address(void *data, unsigned long addr, int reliable) { - printk("%s [<%08lx>] ", (char *)data, addr); - if (!reliable) - printk("? "); - print_symbol("%s\n", addr); touch_nmi_watchdog(); + printk(data); + printk_address(addr, reliable); } static const struct stacktrace_ops print_trace_ops = { diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 361afa8864b..521c833cdc3 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -247,24 +247,29 @@ EXPORT_SYMBOL(dump_trace); static void print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) { + printk(data); print_symbol(msg, symbol); printk("\n"); } static void print_trace_warning(void *data, char *msg) { - printk("%s\n", msg); + printk("%s%s\n", (char *)data, msg); } static int print_trace_stack(void *data, char *name) { - printk(" <%s> ", name); + printk("%s <%s> ", (char *)data, name); return 0; } +/* + * Print one address/symbol entries per line. + */ static void print_trace_address(void *data, unsigned long addr, int reliable) { touch_nmi_watchdog(); + printk(data); printk_address(addr, reliable); } -- cgit v1.2.3 From ca0a816403c53411bb6b6fb8bf60cef30695b09d Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sat, 4 Oct 2008 23:12:44 +0200 Subject: dumpstack: x86: use log_lvl and unify trace formatting - x86: Write log_lvl strings if available - start raw stack dumps on new line - i386: Remove extra indentation for raw stack dumps Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack_32.c | 18 +++++++++--------- arch/x86/kernel/dumpstack_64.c | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 517b507bc56..09bc3330d55 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -155,8 +155,8 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl) { + printk("%sCall Trace:\n", log_lvl); dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); - printk("%s =======================\n", log_lvl); } void show_trace(struct task_struct *task, struct pt_regs *regs, @@ -184,17 +184,16 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, if (kstack_end(stack)) break; if (i && ((i % 8) == 0)) - printk("\n%s ", log_lvl); - printk("%08lx ", *stack++); + printk("\n%s", log_lvl); + printk(" %08lx", *stack++); + touch_nmi_watchdog(); } - printk("\n%sCall Trace:\n", log_lvl); - + printk("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } void show_stack(struct task_struct *task, unsigned long *sp) { - printk(" "); show_stack_log_lvl(task, NULL, sp, 0, ""); } @@ -229,7 +228,7 @@ void show_registers(struct pt_regs *regs) print_modules(); __show_regs(regs, 0); - printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", + printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", TASK_COMM_LEN, current->comm, task_pid_nr(current), current_thread_info(), current, task_thread_info(current)); /* @@ -242,8 +241,9 @@ void show_registers(struct pt_regs *regs) unsigned char c; u8 *ip; - printk("\n" KERN_EMERG "Stack: "); - show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); + printk(KERN_EMERG "Stack:\n"); + show_stack_log_lvl(NULL, regs, ®s->sp, + 0, KERN_EMERG); printk(KERN_EMERG "Code: "); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 521c833cdc3..7fd32944cea 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -284,7 +284,7 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl) { - printk("Call Trace:\n"); + printk("%sCall Trace:\n", log_lvl); dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); } @@ -330,7 +330,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, break; } if (i && ((i % 4) == 0)) - printk("\n"); + printk("\n%s", log_lvl); printk(" %016lx", *stack++); touch_nmi_watchdog(); } @@ -388,9 +388,9 @@ void show_registers(struct pt_regs *regs) unsigned char c; u8 *ip; - printk("Stack: "); + printk(KERN_EMERG "Stack:\n"); show_stack_log_lvl(NULL, regs, (unsigned long *)sp, - regs->bp, ""); + regs->bp, KERN_EMERG); printk(KERN_EMERG "Code: "); -- cgit v1.2.3 From 802a67de0cbd1ef10df80ad48b840e2103b13e52 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sat, 4 Oct 2008 23:12:45 +0200 Subject: dumpstack: i386: make kstack= an early boot-param and add oops=panic - make kstack= and early_param - add oops=panic, setting panic_on_oops Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack_32.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 09bc3330d55..9a8920abe4b 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -418,13 +418,24 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic) do_exit(SIGSEGV); } +static int __init oops_setup(char *s) +{ + if (!s) + return -EINVAL; + if (!strcmp(s, "panic")) + panic_on_oops = 1; + return 0; +} +early_param("oops", oops_setup); + static int __init kstack_setup(char *s) { + if (!s) + return -EINVAL; kstack_depth_to_print = simple_strtoul(s, NULL, 0); - - return 1; + return 0; } -__setup("kstack=", kstack_setup); +early_param("kstack", kstack_setup); static int __init code_bytes_setup(char *s) { -- cgit v1.2.3 From 8a541665b9063c5006b370d4488cf9f6beb6083f Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sat, 4 Oct 2008 23:12:46 +0200 Subject: dumpstack: x86: various small unification steps - define STACKSLOTS_PER_LINE and use it - define get_bp macro to hide the %%ebp/%%rbp difference - i386: check task==NULL in dump_trace, like x86_64 - i386: show_trace(NULL, ...) uses current automatically - x86_64: use [#%d] for die_counter, like i386 - whitespace and comments Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack_32.c | 23 +++++++++++------------ arch/x86/kernel/dumpstack_64.c | 15 +++++++++------ 2 files changed, 20 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 9a8920abe4b..201ee359a1a 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -16,8 +16,11 @@ #include +#define STACKSLOTS_PER_LINE 8 +#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) + int panic_on_unrecovered_nmi; -int kstack_depth_to_print = 24; +int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; static unsigned int code_bytes = 64; static int die_counter; @@ -82,7 +85,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!stack) { unsigned long dummy; stack = &dummy; - if (task != current) + if (task && task != current) stack = (unsigned long *)task->thread.sp; } @@ -90,7 +93,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!bp) { if (task == current) { /* Grab bp right from our regs */ - asm("movl %%ebp, %0" : "=r" (bp) :); + get_bp(bp); } else { /* bp is the last reg pushed by switch_to */ bp = *(unsigned long *) task->thread.sp; @@ -167,7 +170,7 @@ void show_trace(struct task_struct *task, struct pt_regs *regs, static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, unsigned long bp, char *log_lvl) { unsigned long *stack; int i; @@ -183,7 +186,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, for (i = 0; i < kstack_depth_to_print; i++) { if (kstack_end(stack)) break; - if (i && ((i % 8) == 0)) + if (i && ((i % STACKSLOTS_PER_LINE) == 0)) printk("\n%s", log_lvl); printk(" %08lx", *stack++); touch_nmi_watchdog(); @@ -207,7 +210,7 @@ void dump_stack(void) #ifdef CONFIG_FRAME_POINTER if (!bp) - asm("movl %%ebp, %0" : "=r" (bp):); + get_bp(bp); #endif printk("Pid: %d, comm: %.20s %s %s %.*s\n", @@ -215,8 +218,7 @@ void dump_stack(void) init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - - show_trace(current, NULL, &stack, bp); + show_trace(NULL, NULL, &stack, bp); } EXPORT_SYMBOL(dump_stack); @@ -249,7 +251,7 @@ void show_registers(struct pt_regs *regs) ip = (u8 *)regs->ip - code_prologue; if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - /* try starting at EIP */ + /* try starting at IP */ ip = (u8 *)regs->ip; code_len = code_len - code_prologue + 1; } @@ -317,13 +319,10 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) if (kexec_should_crash(current)) crash_kexec(regs); - if (in_interrupt()) panic("Fatal exception in interrupt"); - if (panic_on_oops) panic("Fatal exception"); - oops_exit(); do_exit(signr); } diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 7fd32944cea..13379a98829 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -16,8 +16,11 @@ #include +#define STACKSLOTS_PER_LINE 4 +#define get_bp(bp) asm("movl %%rbp, %0" : "=r" (bp) :) + int panic_on_unrecovered_nmi; -int kstack_depth_to_print = 12; +int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; static unsigned int code_bytes = 64; static int die_counter; @@ -177,7 +180,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!bp) { if (task == current) { /* Grab bp right from our regs */ - asm("movq %%rbp, %0" : "=r" (bp) : ); + get_bp(bp); } else { /* bp is the last reg pushed by switch_to */ bp = *(unsigned long *) task->thread.sp; @@ -329,7 +332,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, if (((long) stack & (THREAD_SIZE-1)) == 0) break; } - if (i && ((i % 4) == 0)) + if (i && ((i % STACKSLOTS_PER_LINE) == 0)) printk("\n%s", log_lvl); printk(" %016lx", *stack++); touch_nmi_watchdog(); @@ -353,7 +356,7 @@ void dump_stack(void) #ifdef CONFIG_FRAME_POINTER if (!bp) - asm("movq %%rbp, %0" : "=r" (bp) : ); + get_bp(bp); #endif printk("Pid: %d, comm: %.20s %s %s %.*s\n", @@ -396,7 +399,7 @@ void show_registers(struct pt_regs *regs) ip = (u8 *)regs->ip - code_prologue; if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - /* try starting at RIP */ + /* try starting at IP */ ip = (u8 *)regs->ip; code_len = code_len - code_prologue + 1; } @@ -475,7 +478,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) int __kprobes __die(const char *str, struct pt_regs *regs, long err) { - printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter); + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT printk("PREEMPT "); #endif -- cgit v1.2.3 From 649c6653fa94ec8f3ea32b19c97b790ec4e8e4ac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Oct 2008 16:52:24 +0200 Subject: x86: improve UP kernel when CPU-hotplug and SMP is enabled num_possible_cpus() can be > 1 when disabled CPUs have been accounted. Disabled CPUs are not in the cpu_present_map, so we can use num_present_cpus() as a safe indicator to switch to UP alternatives. Reported-by: Chuck Ebbert Signed-off-by: Thomas Gleixner Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index fb04e49776b..a84ac7b570e 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -444,7 +444,7 @@ void __init alternative_instructions(void) _text, _etext); /* Only switch to UP mode if we don't immediately boot others */ - if (num_possible_cpus() == 1 || setup_max_cpus <= 1) + if (num_present_cpus() == 1 || setup_max_cpus <= 1) alternatives_smp_switch(0); } #endif -- cgit v1.2.3 From b807305059c28fb8197496c944bfaa6b372a40ad Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Oct 2008 17:12:36 +0200 Subject: x86: remove additional_cpus configurability additional_cpus= parameter is dangerous and broken: for example if we boot additional_cpus=-2 on a stock dual-core system it will crash the box on bootup. So reduce the maze of code a bit by removingthe user-configurability angle. Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 857a88bb919..8dd201c3132 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1261,7 +1261,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) check_nmi_watchdog(); } -static int additional_cpus __initdata = CONFIG_HOTPLUG_ADDITIONAL_CPUS; +static int additional_cpus = -1; /* * cpu_possible_map should be static, it cannot change as cpu's @@ -1334,12 +1334,6 @@ static void remove_siblinginfo(int cpu) cpu_clear(cpu, cpu_sibling_setup_map); } -static __init int setup_additional_cpus(char *s) -{ - return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL; -} -early_param("additional_cpus", setup_additional_cpus); - static void __ref remove_cpu_from_maps(int cpu) { cpu_clear(cpu, cpu_online_map); -- cgit v1.2.3 From cb48bb59995d2d14a0c732835c80bbcfb354de31 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Oct 2008 17:51:52 +0200 Subject: x86: remove additional_cpus remove remainder of additional_cpus logic. We now just listen to the disabled_cpus value like we did for years. disabled_cpus is always >= 0 so no need for an extra check. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8dd201c3132..8c3aca7cb34 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1261,8 +1261,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) check_nmi_watchdog(); } -static int additional_cpus = -1; - /* * cpu_possible_map should be static, it cannot change as cpu's * are onlined, or offlined. The reason is per-cpu data-structures @@ -1282,21 +1280,13 @@ static int additional_cpus = -1; */ __init void prefill_possible_map(void) { - int i; - int possible; + int i, possible; /* no processor from mptable or madt */ if (!num_processors) num_processors = 1; - if (additional_cpus == -1) { - if (disabled_cpus > 0) - additional_cpus = disabled_cpus; - else - additional_cpus = 0; - } - - possible = num_processors + additional_cpus; + possible = num_processors + disabled_cpus; if (possible > NR_CPUS) possible = NR_CPUS; -- cgit v1.2.3 From 6a2ae2d9f9212de47c16e23080162aada882c8b6 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sun, 5 Oct 2008 12:39:36 +0200 Subject: dumpstack: x86: various small unification steps, fix After "dumpstack: x86: various small unification steps", the assembler gives the following compile error. The error is in dumpstack_64.c. {standard input}: Assembler messages: {standard input}:720: Error: Incorrect register `%rbx' used with `l' suffix {standard input}:1340: Error: Incorrect register `%r12' used with `l' suffix Indeed the suffix in get_bp() was wrong. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 13379a98829..086cc8118e3 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -17,7 +17,7 @@ #include #define STACKSLOTS_PER_LINE 4 -#define get_bp(bp) asm("movl %%rbp, %0" : "=r" (bp) :) +#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) int panic_on_unrecovered_nmi; int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; -- cgit v1.2.3 From 758a7f7bb86b520aadc484f23da85e547b3bf3d8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 14 Oct 2008 17:01:03 -0600 Subject: x86: register a platform RTC device if PNP doesn't describe it Most if not all x86 platforms have an RTC device, but sometimes the RTC is not exposed as a PNP0b00/PNP0b01/PNP0b02 device in PNPBIOS or ACPI: http://bugzilla.kernel.org/show_bug.cgi?id=11580 https://bugzilla.redhat.com/show_bug.cgi?id=451188 It's best if we can discover the RTC via PNP because then we know which flavor of device it is, where it lives, and which IRQ it uses. But if we can't, we should register a platform device using the compiled-in RTC_PORT/RTC_IRQ resource assumptions. Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki Acked-by: David Brownell Reported-by: Rik Theys Reported-by: shr_msn@yahoo.com.tw Signed-off-by: Linus Torvalds --- arch/x86/kernel/rtc.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 05191bbc68b..0a23b5795b2 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -223,11 +223,25 @@ static struct platform_device rtc_device = { static __init int add_rtc_cmos(void) { #ifdef CONFIG_PNP - if (!pnp_platform_devices) - platform_device_register(&rtc_device); -#else + static const char *ids[] __initconst = + { "PNP0b00", "PNP0b01", "PNP0b02", }; + struct pnp_dev *dev; + struct pnp_id *id; + int i; + + pnp_for_each_dev(dev) { + for (id = dev->id; id; id = id->next) { + for (i = 0; i < ARRAY_SIZE(ids); i++) { + if (compare_pnp_id(id, ids[i]) != 0) + return 0; + } + } + } +#endif + platform_device_register(&rtc_device); -#endif /* CONFIG_PNP */ + dev_info(&rtc_device.dev, + "registered platform RTC device (no PNP device found)\n"); return 0; } device_initcall(add_rtc_cmos); -- cgit v1.2.3 From 3807f345b2c610336c17c7624a0d496a38df75a0 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 28 Jul 2008 11:47:52 -0300 Subject: x86: paravirt: factor out cpu_khz to common code KVM intends to use paravirt code to calibrate khz. Xen current code will do just fine. So as a first step, factor out code to pvclock.c. Signed-off-by: Glauber Costa Signed-off-by: Avi Kivity --- arch/x86/kernel/pvclock.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 05fbe9a0325..1c54b5fb7ae 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -97,6 +97,18 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, return dst->version; } +unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) +{ + u64 tsc_khz = 1000000ULL << 32; + + do_div(tsc_khz, src->tsc_to_system_mul); + if (src->tsc_shift < 0) + tsc_khz <<= -src->tsc_shift; + else + tsc_khz >>= src->tsc_shift; + return tsc_khz; +} + cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) { struct pvclock_shadow_time shadow; -- cgit v1.2.3 From 0293615f3fb9886b6b23800c121be293bb7483e9 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 28 Jul 2008 11:47:53 -0300 Subject: x86: KVM guest: use paravirt function to calculate cpu khz We're currently facing timing problems in guests that do calibration under heavy load, and then the load vanishes. This means we'll have a much lower lpj than we actually should, and delays end up taking less time than they should, which is a nasty bug. Solution is to pass on the lpj value from host to guest, and have it preset. Signed-off-by: Glauber Costa Signed-off-by: Avi Kivity --- arch/x86/kernel/kvmclock.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d02def06ca9..774ac499156 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -78,6 +78,34 @@ static cycle_t kvm_clock_read(void) return ret; } +/* + * If we don't do that, there is the possibility that the guest + * will calibrate under heavy load - thus, getting a lower lpj - + * and execute the delays themselves without load. This is wrong, + * because no delay loop can finish beforehand. + * Any heuristics is subject to fail, because ultimately, a large + * poll of guests can be running and trouble each other. So we preset + * lpj here + */ +static unsigned long kvm_get_tsc_khz(void) +{ + return preset_lpj; +} + +static void kvm_get_preset_lpj(void) +{ + struct pvclock_vcpu_time_info *src; + unsigned long khz; + u64 lpj; + + src = &per_cpu(hv_clock, 0); + khz = pvclock_tsc_khz(src); + + lpj = ((u64)khz * 1000); + do_div(lpj, HZ); + preset_lpj = lpj; +} + static struct clocksource kvm_clock = { .name = "kvm-clock", .read = kvm_clock_read, @@ -153,6 +181,7 @@ void __init kvmclock_init(void) pv_time_ops.get_wallclock = kvm_get_wallclock; pv_time_ops.set_wallclock = kvm_set_wallclock; pv_time_ops.sched_clock = kvm_clock_read; + pv_time_ops.get_tsc_khz = kvm_get_tsc_khz; #ifdef CONFIG_X86_LOCAL_APIC pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; #endif @@ -163,6 +192,7 @@ void __init kvmclock_init(void) #ifdef CONFIG_KEXEC machine_ops.crash_shutdown = kvm_crash_shutdown; #endif + kvm_get_preset_lpj(); clocksource_register(&kvm_clock); } } -- cgit v1.2.3 From a08546001c2b0f584ffc81987340943a7d6d6acb Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 23 Sep 2008 11:01:45 -0700 Subject: x86: pvclock: fix shadowed variable warning arch/x86/kernel/pvclock.c:102:6: warning: symbol 'tsc_khz' shadows an earlier one include/asm/tsc.h:18:21: originally declared here Signed-off-by: Harvey Harrison Signed-off-by: Andrew Morton Signed-off-by: Avi Kivity --- arch/x86/kernel/pvclock.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 1c54b5fb7ae..4f9c55f3a7c 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -99,14 +99,14 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) { - u64 tsc_khz = 1000000ULL << 32; + u64 pv_tsc_khz = 1000000ULL << 32; - do_div(tsc_khz, src->tsc_to_system_mul); + do_div(pv_tsc_khz, src->tsc_to_system_mul); if (src->tsc_shift < 0) - tsc_khz <<= -src->tsc_shift; + pv_tsc_khz <<= -src->tsc_shift; else - tsc_khz >>= src->tsc_shift; - return tsc_khz; + pv_tsc_khz >>= src->tsc_shift; + return pv_tsc_khz; } cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) -- cgit v1.2.3 From ae87221d3ce49d9de1e43756da834fd0bf05a2ad Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 24 Aug 2007 16:11:54 -0700 Subject: sysfs: crash debugging Print the name of the last-accessed sysfs file when we oops, to help track down oopses which occur in sysfs store/read handlers. Because these oopses tend to not leave any trace of the offending code in the stack traces. Cc: Kay Sievers Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/dumpstack_32.c | 2 ++ arch/x86/kernel/dumpstack_64.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 201ee359a1a..1a78180f08d 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -343,6 +344,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) printk("DEBUG_PAGEALLOC"); #endif printk("\n"); + sysfs_printk_last_file(); if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) return 1; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 086cc8118e3..96a5db7da8a 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -489,6 +490,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) printk("DEBUG_PAGEALLOC"); #endif printk("\n"); + sysfs_printk_last_file(); if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) return 1; -- cgit v1.2.3 From a9b12619f7b6f19c871437ec24a088787a04b1de Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 21 Jul 2008 20:03:34 -0700 Subject: device create: misc: convert device_create_drvdata to device_create Now that device_create() has been audited, rename things back to the original call to be sane. Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpuid.c | 4 ++-- arch/x86/kernel/msr.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 6a44d646599..72cefd1e649 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -147,8 +147,8 @@ static __cpuinit int cpuid_device_create(int cpu) { struct device *dev; - dev = device_create_drvdata(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), - NULL, "cpu%d", cpu); + dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), NULL, + "cpu%d", cpu); return IS_ERR(dev) ? PTR_ERR(dev) : 0; } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 2e2af5d1819..82a7c7ed6d4 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -163,8 +163,8 @@ static int __cpuinit msr_device_create(int cpu) { struct device *dev; - dev = device_create_drvdata(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), - NULL, "msr%d", cpu); + dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), NULL, + "msr%d", cpu); return IS_ERR(dev) ? PTR_ERR(dev) : 0; } -- cgit v1.2.3 From 25ddbb18aae33ad255eb9f35aacebe3af01e1e9c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 15 Oct 2008 22:01:41 -0700 Subject: Make the taint flags reliable It's somewhat unlikely that it happens, but right now a race window between interrupts or machine checks or oopses could corrupt the tainted bitmap because it is modified in a non atomic fashion. Convert the taint variable to an unsigned long and use only atomic bit operations on it. Unfortunately this means the intvec sysctl functions cannot be used on it anymore. It turned out the taint sysctl handler could actually be simplified a bit (since it only increases capabilities) so this patch actually removes code. [akpm@linux-foundation.org: remove unneeded include] Signed-off-by: Andi Kleen Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/smpboot.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8c3aca7cb34..7ed9e070a6e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -282,6 +282,8 @@ static void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } +static int __cpuinitdata unsafe_smp; + /* * Activate a secondary processor. */ @@ -397,7 +399,7 @@ static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) goto valid_k7; /* If we get here, not a certified SMP capable AMD system. */ - add_taint(TAINT_UNSAFE_SMP); + unsafe_smp = 1; } valid_k7: @@ -414,12 +416,10 @@ static void __cpuinit smp_checks(void) * Don't taint if we are running SMP kernel on a single non-MP * approved Athlon */ - if (tainted & TAINT_UNSAFE_SMP) { - if (num_online_cpus()) - printk(KERN_INFO "WARNING: This combination of AMD" - "processors is not suitable for SMP.\n"); - else - tainted &= ~TAINT_UNSAFE_SMP; + if (unsafe_smp && num_online_cpus() > 1) { + printk(KERN_INFO "WARNING: This combination of AMD" + "processors is not suitable for SMP.\n"); + add_taint(TAINT_UNSAFE_SMP); } } -- cgit v1.2.3 From bdab0ba3d9ad8de257ee6236daf314723748fde6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 15 Oct 2008 22:02:07 -0700 Subject: x86: rename iommu_num_pages function to iommu_nr_pages This series of patches re-introduces the iommu_num_pages function so that it can be used by each architecture specific IOMMU implementations. The series also changes IOMMU implementations for X86, Alpha, PowerPC and UltraSparc. The other implementations are not yet changed because the modifications required are not obvious and I can't test them on real hardware. This patch: This is a preparation patch for introducing a generic iommu_num_pages function. Signed-off-by: Joerg Roedel Cc: "David S. Miller" Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Ingo Molnar Cc: Thomas Gleixner Cc: FUJITA Tomonori Cc: Muli Ben-Yehuda Cc: Dave Airlie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/amd_iommu.c | 8 ++++---- arch/x86/kernel/pci-dma.c | 4 ++-- arch/x86/kernel/pci-gart_64.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 34e4d112b1e..10646acba9b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -295,7 +295,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, u64 address, size_t size) { int s = 0; - unsigned pages = iommu_num_pages(address, size); + unsigned pages = iommu_nr_pages(address, size); address &= PAGE_MASK; @@ -679,7 +679,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, if (iommu->exclusion_start && iommu->exclusion_start < dma_dom->aperture_size) { unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; - int pages = iommu_num_pages(iommu->exclusion_start, + int pages = iommu_nr_pages(iommu->exclusion_start, iommu->exclusion_length); dma_ops_reserve_addresses(dma_dom, startpage, pages); } @@ -935,7 +935,7 @@ static dma_addr_t __map_single(struct device *dev, unsigned long align_mask = 0; int i; - pages = iommu_num_pages(paddr, size); + pages = iommu_nr_pages(paddr, size); paddr &= PAGE_MASK; if (align) @@ -980,7 +980,7 @@ static void __unmap_single(struct amd_iommu *iommu, if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) return; - pages = iommu_num_pages(dma_addr, size); + pages = iommu_nr_pages(dma_addr, size); dma_addr &= PAGE_MASK; start = dma_addr; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0a3824e837b..19262482021 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -125,13 +125,13 @@ void __init pci_iommu_alloc(void) pci_swiotlb_init(); } -unsigned long iommu_num_pages(unsigned long addr, unsigned long len) +unsigned long iommu_nr_pages(unsigned long addr, unsigned long len) { unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); return size >> PAGE_SHIFT; } -EXPORT_SYMBOL(iommu_num_pages); +EXPORT_SYMBOL(iommu_nr_pages); #endif void *dma_generic_alloc_coherent(struct device *dev, size_t size, diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 145f1c83369..14f1b41348f 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -231,7 +231,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, size_t size, int dir, unsigned long align_mask) { - unsigned long npages = iommu_num_pages(phys_mem, size); + unsigned long npages = iommu_nr_pages(phys_mem, size); unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); int i; @@ -285,7 +285,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, return; iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; - npages = iommu_num_pages(dma_addr, size); + npages = iommu_nr_pages(dma_addr, size); for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; CLEAR_LEAK(iommu_page + i); @@ -368,7 +368,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, } addr = phys_addr; - pages = iommu_num_pages(s->offset, s->length); + pages = iommu_nr_pages(s->offset, s->length); while (pages--) { iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); SET_LEAK(iommu_page); @@ -451,7 +451,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) seg_size += s->length; need = nextneed; - pages += iommu_num_pages(s->offset, s->length); + pages += iommu_nr_pages(s->offset, s->length); ps = s; } if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) -- cgit v1.2.3 From 1477b8e5f13266bbf0389baafd39a90ff29c5f61 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 15 Oct 2008 22:02:11 -0700 Subject: x86: convert GART driver to generic iommu_num_pages function Signed-off-by: Joerg Roedel Cc: Ingo Molnar Cc: Thomas Gleixner Cc: FUJITA Tomonori Cc: Muli Ben-Yehuda Cc: Dave Airlie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/pci-gart_64.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 14f1b41348f..e3f75bbcede 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -231,7 +231,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, size_t size, int dir, unsigned long align_mask) { - unsigned long npages = iommu_nr_pages(phys_mem, size); + unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE); unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); int i; @@ -285,7 +285,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, return; iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; - npages = iommu_nr_pages(dma_addr, size); + npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; CLEAR_LEAK(iommu_page + i); @@ -368,7 +368,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, } addr = phys_addr; - pages = iommu_nr_pages(s->offset, s->length); + pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE); while (pages--) { iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); SET_LEAK(iommu_page); @@ -451,7 +451,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) seg_size += s->length; need = nextneed; - pages += iommu_nr_pages(s->offset, s->length); + pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE); ps = s; } if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) -- cgit v1.2.3 From e3c449f526cebb8d287241c7e82faafd9709668b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 15 Oct 2008 22:02:11 -0700 Subject: x86, AMD IOMMU: convert driver to generic iommu_num_pages function Signed-off-by: Joerg Roedel Cc: Ingo Molnar Cc: Thomas Gleixner Cc: FUJITA Tomonori Cc: Muli Ben-Yehuda Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/amd_iommu.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 10646acba9b..a8fd9ebdc8e 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -295,7 +295,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, u64 address, size_t size) { int s = 0; - unsigned pages = iommu_nr_pages(address, size); + unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); address &= PAGE_MASK; @@ -679,8 +679,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, if (iommu->exclusion_start && iommu->exclusion_start < dma_dom->aperture_size) { unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; - int pages = iommu_nr_pages(iommu->exclusion_start, - iommu->exclusion_length); + int pages = iommu_num_pages(iommu->exclusion_start, + iommu->exclusion_length, + PAGE_SIZE); dma_ops_reserve_addresses(dma_dom, startpage, pages); } @@ -935,7 +936,7 @@ static dma_addr_t __map_single(struct device *dev, unsigned long align_mask = 0; int i; - pages = iommu_nr_pages(paddr, size); + pages = iommu_num_pages(paddr, size, PAGE_SIZE); paddr &= PAGE_MASK; if (align) @@ -980,7 +981,7 @@ static void __unmap_single(struct amd_iommu *iommu, if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) return; - pages = iommu_nr_pages(dma_addr, size); + pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); dma_addr &= PAGE_MASK; start = dma_addr; -- cgit v1.2.3 From 036b4c50fe99a2f308f36561335b9904ab507972 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 15 Oct 2008 22:02:12 -0700 Subject: x86: convert Calgary IOMMU driver to generic iommu_num_pages function Signed-off-by: Joerg Roedel Cc: Ingo Molnar Cc: Thomas Gleixner Cc: FUJITA Tomonori Acked-by: Muli Ben-Yehuda Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/pci-calgary_64.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 080d1d27f37..e1e731d78f3 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -217,16 +217,6 @@ static inline unsigned long verify_bit_range(unsigned long* bitmap, #endif /* CONFIG_IOMMU_DEBUG */ -static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen) -{ - unsigned int npages; - - npages = PAGE_ALIGN(dma + dmalen) - (dma & PAGE_MASK); - npages >>= PAGE_SHIFT; - - return npages; -} - static inline int translation_enabled(struct iommu_table *tbl) { /* only PHBs with translation enabled have an IOMMU table */ @@ -408,7 +398,7 @@ static void calgary_unmap_sg(struct device *dev, if (dmalen == 0) break; - npages = num_dma_pages(dma, dmalen); + npages = iommu_num_pages(dma, dmalen, PAGE_SIZE); iommu_free(tbl, dma, npages); } } @@ -427,7 +417,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, BUG_ON(!sg_page(s)); vaddr = (unsigned long) sg_virt(s); - npages = num_dma_pages(vaddr, s->length); + npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); entry = iommu_range_alloc(dev, tbl, npages); if (entry == bad_dma_address) { @@ -464,7 +454,7 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, struct iommu_table *tbl = find_iommu_table(dev); uaddr = (unsigned long)vaddr; - npages = num_dma_pages(uaddr, size); + npages = iommu_num_pages(uaddr, size, PAGE_SIZE); return iommu_alloc(dev, tbl, vaddr, npages, direction); } @@ -475,7 +465,7 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, struct iommu_table *tbl = find_iommu_table(dev); unsigned int npages; - npages = num_dma_pages(dma_handle, size); + npages = iommu_num_pages(dma_handle, size, PAGE_SIZE); iommu_free(tbl, dma_handle, npages); } -- cgit v1.2.3