aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/mm/fault_32.c100
-rw-r--r--arch/x86/mm/fault_64.c93
2 files changed, 177 insertions, 16 deletions
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 7d9ecbbba74..4da4625c696 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -48,7 +48,11 @@ static inline int notify_page_fault(struct pt_regs *regs)
int ret = 0;
/* kprobe_running() needs smp_processor_id() */
+#ifdef CONFIG_X86_32
if (!user_mode_vm(regs)) {
+#else
+ if (!user_mode(regs)) {
+#endif
preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 14))
ret = 1;
@@ -430,11 +434,15 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
#endif
/*
+ * X86_32
* Handle a fault on the vmalloc or module mapping area
*
+ * X86_64
+ * Handle a fault on the vmalloc area
+ *
* This assumes no large pages in there.
*/
-static inline int vmalloc_fault(unsigned long address)
+static int vmalloc_fault(unsigned long address)
{
#ifdef CONFIG_X86_32
unsigned long pgd_paddr;
@@ -509,6 +517,9 @@ int show_unhandled_signals = 1;
* and the problem, and then passes it off to one of the appropriate
* routines.
*/
+#ifdef CONFIG_X86_64
+asmlinkage
+#endif
void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
struct task_struct *tsk;
@@ -517,6 +528,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
unsigned long address;
int write, si_code;
int fault;
+#ifdef CONFIG_X86_64
+ unsigned long flags;
+#endif
/*
* We can fault from pretty much anywhere, with unknown IRQ state.
@@ -548,6 +562,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
* (error_code & 4) == 0, and that the fault was not a
* protection error (error_code & 9) == 0.
*/
+#ifdef CONFIG_X86_32
if (unlikely(address >= TASK_SIZE)) {
if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
vmalloc_fault(address) >= 0)
@@ -570,7 +585,45 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
*/
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
+#else /* CONFIG_X86_64 */
+ if (unlikely(address >= TASK_SIZE64)) {
+ /*
+ * Don't check for the module range here: its PML4
+ * is always initialized because it's shared with the main
+ * kernel text. Only vmalloc may need PML4 syncups.
+ */
+ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+ ((address >= VMALLOC_START && address < VMALLOC_END))) {
+ if (vmalloc_fault(address) >= 0)
+ return;
+ }
+ /*
+ * Don't take the mm semaphore here. If we fixup a prefetch
+ * fault we could otherwise deadlock.
+ */
+ goto bad_area_nosemaphore;
+ }
+ if (likely(regs->flags & X86_EFLAGS_IF))
+ local_irq_enable();
+
+ if (unlikely(error_code & PF_RSVD))
+ pgtable_bad(address, regs, error_code);
+
+ /*
+ * If we're in an interrupt, have no user context or are running in an
+ * atomic region then we must not take the fault.
+ */
+ if (unlikely(in_atomic() || !mm))
+ goto bad_area_nosemaphore;
+ /*
+ * User-mode registers count as a user access even for any
+ * potential system fault or CPU buglet.
+ */
+ if (user_mode_vm(regs))
+ error_code |= PF_USER;
+again:
+#endif
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -596,7 +649,11 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
vma = find_vma(mm, address);
if (!vma)
goto bad_area;
+#ifdef CONFIG_X86_32
if (vma->vm_start <= address)
+#else
+ if (likely(vma->vm_start <= address))
+#endif
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
@@ -634,7 +691,9 @@ good_area:
goto bad_area;
}
- survive:
+#ifdef CONFIG_X86_32
+survive:
+#endif
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
@@ -705,6 +764,7 @@ bad_area_nosemaphore:
print_vma_addr(" in ", regs->ip);
printk("\n");
}
+
tsk->thread.cr2 = address;
/* Kernel addresses are always protection faults */
tsk->thread.error_code = error_code | (address >= TASK_SIZE);
@@ -722,9 +782,13 @@ no_context:
return;
/*
+ * X86_32
* Valid to do another page fault here, because if this fault
* had been triggered by is_prefetch fixup_exception would have
* handled it.
+ *
+ * X86_64
+ * Hall of shame of CPU/BIOS bugs.
*/
if (is_prefetch(regs, address, error_code))
return;
@@ -736,7 +800,7 @@ no_context:
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
-
+#ifdef CONFIG_X86_32
bust_spinlocks(1);
show_fault_oops(regs, error_code, address);
@@ -747,6 +811,20 @@ no_context:
die("Oops", regs, error_code);
bust_spinlocks(0);
do_exit(SIGKILL);
+#else /* CONFIG_X86_64 */
+ flags = oops_begin();
+
+ show_fault_oops(regs, error_code, address);
+
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
+ if (__die("Oops", regs, error_code))
+ regs = NULL;
+ /* Executive summary in case the body of the oops scrolled away */
+ printk(KERN_EMERG "CR2: %016lx\n", address);
+ oops_end(flags, regs, SIGKILL);
+#endif
/*
* We ran out of memory, or some other thing happened to us that made
@@ -754,11 +832,18 @@ no_context:
*/
out_of_memory:
up_read(&mm->mmap_sem);
+#ifdef CONFIG_X86_32
if (is_global_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
}
+#else
+ if (is_global_init(current)) {
+ yield();
+ goto again;
+ }
+#endif
printk("VM: killing process %s\n", tsk->comm);
if (error_code & PF_USER)
do_group_exit(SIGKILL);
@@ -770,17 +855,22 @@ do_sigbus:
/* Kernel mode? Handle exceptions or die */
if (!(error_code & PF_USER))
goto no_context;
-
+#ifdef CONFIG_X86_32
/* User space => ok to do another page fault */
if (is_prefetch(regs, address, error_code))
return;
-
+#endif
tsk->thread.cr2 = address;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
}
+#ifdef CONFIG_X86_64
+DEFINE_SPINLOCK(pgd_lock);
+LIST_HEAD(pgd_list);
+#endif
+
void vmalloc_sync_all(void)
{
#ifdef CONFIG_X86_32
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index edca689c62d..0902719388b 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -51,7 +51,11 @@ static inline int notify_page_fault(struct pt_regs *regs)
int ret = 0;
/* kprobe_running() needs smp_processor_id() */
+#ifdef CONFIG_X86_32
+ if (!user_mode_vm(regs)) {
+#else
if (!user_mode(regs)) {
+#endif
preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 14))
ret = 1;
@@ -433,6 +437,10 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
#endif
/*
+ * X86_32
+ * Handle a fault on the vmalloc or module mapping area
+ *
+ * X86_64
* Handle a fault on the vmalloc area
*
* This assumes no large pages in there.
@@ -512,16 +520,20 @@ int show_unhandled_signals = 1;
* and the problem, and then passes it off to one of the appropriate
* routines.
*/
-asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
- unsigned long error_code)
+#ifdef CONFIG_X86_64
+asmlinkage
+#endif
+void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
unsigned long address;
- int write, fault;
+ int write, si_code;
+ int fault;
+#ifdef CONFIG_X86_64
unsigned long flags;
- int si_code;
+#endif
/*
* We can fault from pretty much anywhere, with unknown IRQ state.
@@ -553,6 +565,30 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
* (error_code & 4) == 0, and that the fault was not a
* protection error (error_code & 9) == 0.
*/
+#ifdef CONFIG_X86_32
+ if (unlikely(address >= TASK_SIZE)) {
+ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+ vmalloc_fault(address) >= 0)
+ return;
+ /*
+ * Don't take the mm semaphore here. If we fixup a prefetch
+ * fault we could otherwise deadlock.
+ */
+ goto bad_area_nosemaphore;
+ }
+
+ /* It's safe to allow irq's after cr2 has been saved and the vmalloc
+ fault has been handled. */
+ if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
+ local_irq_enable();
+
+ /*
+ * If we're in an interrupt, have no user context or are running in an
+ * atomic region then we must not take the fault.
+ */
+ if (in_atomic() || !mm)
+ goto bad_area_nosemaphore;
+#else /* CONFIG_X86_64 */
if (unlikely(address >= TASK_SIZE64)) {
/*
* Don't check for the module range here: its PML4
@@ -570,7 +606,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
*/
goto bad_area_nosemaphore;
}
-
if (likely(regs->flags & X86_EFLAGS_IF))
local_irq_enable();
@@ -590,8 +625,8 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
*/
if (user_mode_vm(regs))
error_code |= PF_USER;
-
- again:
+again:
+#endif
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -617,7 +652,11 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
vma = find_vma(mm, address);
if (!vma)
goto bad_area;
+#ifdef CONFIG_X86_32
+ if (vma->vm_start <= address)
+#else
if (likely(vma->vm_start <= address))
+#endif
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
@@ -655,6 +694,9 @@ good_area:
goto bad_area;
}
+#ifdef CONFIG_X86_32
+survive:
+#endif
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
@@ -730,7 +772,6 @@ bad_area_nosemaphore:
/* Kernel addresses are always protection faults */
tsk->thread.error_code = error_code | (address >= TASK_SIZE);
tsk->thread.trap_no = 14;
-
force_sig_info_fault(SIGSEGV, si_code, address, tsk);
return;
}
@@ -744,9 +785,14 @@ no_context:
return;
/*
+ * X86_32
+ * Valid to do another page fault here, because if this fault
+ * had been triggered by is_prefetch fixup_exception would have
+ * handled it.
+ *
+ * X86_64
* Hall of shame of CPU/BIOS bugs.
*/
-
if (is_prefetch(regs, address, error_code))
return;
@@ -757,7 +803,18 @@ no_context:
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
+#ifdef CONFIG_X86_32
+ bust_spinlocks(1);
+
+ show_fault_oops(regs, error_code, address);
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
+ die("Oops", regs, error_code);
+ bust_spinlocks(0);
+ do_exit(SIGKILL);
+#else /* CONFIG_X86_64 */
flags = oops_begin();
show_fault_oops(regs, error_code, address);
@@ -770,6 +827,7 @@ no_context:
/* Executive summary in case the body of the oops scrolled away */
printk(KERN_EMERG "CR2: %016lx\n", address);
oops_end(flags, regs, SIGKILL);
+#endif
/*
* We ran out of memory, or some other thing happened to us that made
@@ -777,10 +835,18 @@ no_context:
*/
out_of_memory:
up_read(&mm->mmap_sem);
+#ifdef CONFIG_X86_32
+ if (is_global_init(tsk)) {
+ yield();
+ down_read(&mm->mmap_sem);
+ goto survive;
+ }
+#else
if (is_global_init(current)) {
yield();
goto again;
}
+#endif
printk("VM: killing process %s\n", tsk->comm);
if (error_code & PF_USER)
do_group_exit(SIGKILL);
@@ -792,16 +858,21 @@ do_sigbus:
/* Kernel mode? Handle exceptions or die */
if (!(error_code & PF_USER))
goto no_context;
-
+#ifdef CONFIG_X86_32
+ /* User space => ok to do another page fault */
+ if (is_prefetch(regs, address, error_code))
+ return;
+#endif
tsk->thread.cr2 = address;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
- return;
}
+#ifdef CONFIG_X86_64
DEFINE_SPINLOCK(pgd_lock);
LIST_HEAD(pgd_list);
+#endif
void vmalloc_sync_all(void)
{