From 22e2c507c301c3dbbcf91b4948b88f78842ee6c9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Jun 2005 10:55:12 +0200 Subject: [PATCH] Update cfq io scheduler to time sliced design This updates the CFQ io scheduler to the new time sliced design (cfq v3). It provides full process fairness, while giving excellent aggregate system throughput even for many competing processes. It supports io priorities, either inherited from the cpu nice value or set directly with the ioprio_get/set syscalls. The latter closely mimic set/getpriority. This import is based on my latest from -mm. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- arch/i386/kernel/syscall_table.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index 442a6e937b1..3db9a04aec6 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -289,3 +289,5 @@ ENTRY(sys_call_table) .long sys_add_key .long sys_request_key .long sys_keyctl + .long sys_ioprio_set + .long sys_ioprio_get /* 290 */ -- cgit v1.2.3 From ffaa8bd6c904d1ab79b677905067349a5ff51d84 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Mon, 27 Jun 2005 14:36:36 -0700 Subject: [PATCH] seccomp: tsc disable I believe at least for seccomp it's worth to turn off the tsc, not just for HT but for the L2 cache too. So it's up to you, either you turn it off completely (which isn't very nice IMHO) or I recommend to apply this below patch. This has been tested successfully on x86-64 against current cogito repository (i686 compiles so I didn't bother testing ;). People selling the cpu through cpushare may appreciate this bit for a peace of mind. There's no way to get any timing info anymore with this applied (gettimeofday is forbidden of course). The seccomp environment is completely deterministic so it can't be allowed to get timing info, it has to be deterministic so in the future I can enable a computing mode that does a parallel computing for each task with server side transparent checkpointing and verification that the output is the same from all the 2/3 seller computers for each task, without the buyer even noticing (for now the verification is left to the buyer client side and there's no checkpointing, since that would require more kernel changes to track the dirty bits but it'll be easy to extend once the basic mode is finished). Eliminating a cold-cache read of the cr4 global variable will save one cacheline during the tlb flush while making the code per-cpu-safe at the same time. Thanks to Mikael Pettersson for noticing the tlb flush wasn't per-cpu-safe. The global tlb flush can run from irq (IPI calling do_flush_tlb_all) but it'll be transparent to the switch_to code since the IPI won't make any change to the cr4 contents from the point of view of the interrupted code and since it's now all per-cpu stuff, it will not race. So no need to disable irqs in switch_to slow path. Signed-off-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/process.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 5f8cfa6b794..ba243a4cc11 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -616,6 +616,33 @@ handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; } +/* + * This function selects if the context switch from prev to next + * has to tweak the TSC disable bit in the cr4. + */ +static inline void disable_tsc(struct task_struct *prev_p, + struct task_struct *next_p) +{ + struct thread_info *prev, *next; + + /* + * gcc should eliminate the ->thread_info dereference if + * has_secure_computing returns 0 at compile time (SECCOMP=n). + */ + prev = prev_p->thread_info; + next = next_p->thread_info; + + if (has_secure_computing(prev) || has_secure_computing(next)) { + /* slow path here */ + if (has_secure_computing(prev) && + !has_secure_computing(next)) { + write_cr4(read_cr4() & ~X86_CR4_TSD); + } else if (!has_secure_computing(prev) && + has_secure_computing(next)) + write_cr4(read_cr4() | X86_CR4_TSD); + } +} + /* * switch_to(x,yn) should switch tasks from x to y. * @@ -695,6 +722,8 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) handle_io_bitmap(next, tss); + disable_tsc(prev_p, next_p); + return prev_p; } -- cgit v1.2.3 From 4bdbd37f6d01abc4c002bb8de90ea2c3bc7abe7e Mon Sep 17 00:00:00 2001 From: Rusty Lynch Date: Mon, 27 Jun 2005 15:17:09 -0700 Subject: [PATCH] Return probe redesign: i386 specific changes The following patch contains the i386 specific changes for the new return probe design. Changes include: * Removing the architecture specific functions for querying a return probe instance off a stack address * Complete rework onf arch_prepare_kretprobe() and trampoline_probe_handler() * Removing trampoline_post_handler() * Adding arch_init() so that now we handle registering the return probe trampoline instead of kernel/kprobes.c doing it Signed-off-by: Rusty Lynch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 133 ++++++++++++++++++++++++--------------------- 1 file changed, 70 insertions(+), 63 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 3762f6b35ab..fc8b1752176 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -127,48 +127,23 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->eip = (unsigned long)&p->ainsn.insn; } -struct task_struct *arch_get_kprobe_task(void *ptr) -{ - return ((struct thread_info *) (((unsigned long) ptr) & - (~(THREAD_SIZE -1))))->task; -} - void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { unsigned long *sara = (unsigned long *)®s->esp; - struct kretprobe_instance *ri; - static void *orig_ret_addr; + struct kretprobe_instance *ri; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->task = current; + ri->ret_addr = (kprobe_opcode_t *) *sara; - /* - * Save the return address when the return probe hits - * the first time, and use it to populate the (krprobe - * instance)->ret_addr for subsequent return probes at - * the same addrress since stack address would have - * the kretprobe_trampoline by then. - */ - if (((void*) *sara) != kretprobe_trampoline) - orig_ret_addr = (void*) *sara; - - if ((ri = get_free_rp_inst(rp)) != NULL) { - ri->rp = rp; - ri->stack_addr = sara; - ri->ret_addr = orig_ret_addr; - add_rp_inst(ri); /* Replace the return addr with trampoline addr */ *sara = (unsigned long) &kretprobe_trampoline; - } else { - rp->nmissed++; - } -} -void arch_kprobe_flush_task(struct task_struct *tk) -{ - struct kretprobe_instance *ri; - while ((ri = get_rp_inst_tsk(tk)) != NULL) { - *((unsigned long *)(ri->stack_addr)) = - (unsigned long) ri->ret_addr; - recycle_rp_inst(ri); - } + add_rp_inst(ri); + } else { + rp->nmissed++; + } } /* @@ -286,36 +261,59 @@ no_kprobe: */ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - struct task_struct *tsk; - struct kretprobe_instance *ri; - struct hlist_head *head; - struct hlist_node *node; - unsigned long *sara = ((unsigned long *) ®s->esp) - 1; - - tsk = arch_get_kprobe_task(sara); - head = kretprobe_inst_table_head(tsk); - - hlist_for_each_entry(ri, node, head, hlist) { - if (ri->stack_addr == sara && ri->rp) { - if (ri->rp->handler) - ri->rp->handler(ri, regs); - } - } - return 0; -} + struct kretprobe_instance *ri = NULL; + struct hlist_head *head; + struct hlist_node *node, *tmp; + unsigned long orig_ret_address = 0; + unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; -void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, - unsigned long flags) -{ - struct kretprobe_instance *ri; - /* RA already popped */ - unsigned long *sara = ((unsigned long *)®s->esp) - 1; + head = kretprobe_inst_table_head(current); - while ((ri = get_rp_inst(sara))) { - regs->eip = (unsigned long)ri->ret_addr; + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more then one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; } - regs->eflags &= ~TF_MASK; + + BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); + regs->eip = orig_ret_address; + + unlock_kprobes(); + preempt_enable_no_resched(); + + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we have handled unlocking + * and re-enabling preemption. + */ + return 1; } /* @@ -403,8 +401,7 @@ static inline int post_kprobe_handler(struct pt_regs *regs) current_kprobe->post_handler(current_kprobe, regs, 0); } - if (current_kprobe->post_handler != trampoline_post_handler) - resume_execution(current_kprobe, regs); + resume_execution(current_kprobe, regs); regs->eflags |= kprobe_saved_eflags; /*Restore back the original saved kprobes variables and continue. */ @@ -534,3 +531,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } return 0; } + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init(void) +{ + return register_kprobe(&trampoline_p); +} -- cgit v1.2.3