diff options
author | Ingo Molnar <mingo@elte.hu> | 2006-03-27 01:16:22 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-27 08:44:49 -0800 |
commit | 0771dfefc9e538f077d0b43b6dec19a5a67d0e70 (patch) | |
tree | 696267e69228b7406b337f9651dedc75055a589e /include/linux | |
parent | e9056f13bfcdd054a0c3d730e4e096748d8a363a (diff) |
[PATCH] lightweight robust futexes: core
Add the core infrastructure for robust futexes: structure definitions, the new
syscalls and the do_exit() based cleanup mechanism.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Acked-by: Ulrich Drepper <drepper@redhat.com>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/futex.h | 95 | ||||
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | include/linux/threads.h | 3 |
3 files changed, 100 insertions, 1 deletions
diff --git a/include/linux/futex.h b/include/linux/futex.h index 10f96c31971..20face6b798 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -1,6 +1,8 @@ #ifndef _LINUX_FUTEX_H #define _LINUX_FUTEX_H +#include <linux/sched.h> + /* Second argument to futex syscall */ @@ -11,10 +13,103 @@ #define FUTEX_CMP_REQUEUE 4 #define FUTEX_WAKE_OP 5 +/* + * Support for robust futexes: the kernel cleans up held futexes at + * thread exit time. + */ + +/* + * Per-lock list entry - embedded in user-space locks, somewhere close + * to the futex field. (Note: user-space uses a double-linked list to + * achieve O(1) list add and remove, but the kernel only needs to know + * about the forward link) + * + * NOTE: this structure is part of the syscall ABI, and must not be + * changed. + */ +struct robust_list { + struct robust_list __user *next; +}; + +/* + * Per-thread list head: + * + * NOTE: this structure is part of the syscall ABI, and must only be + * changed if the change is first communicated with the glibc folks. + * (When an incompatible change is done, we'll increase the structure + * size, which glibc will detect) + */ +struct robust_list_head { + /* + * The head of the list. Points back to itself if empty: + */ + struct robust_list list; + + /* + * This relative offset is set by user-space, it gives the kernel + * the relative position of the futex field to examine. This way + * we keep userspace flexible, to freely shape its data-structure, + * without hardcoding any particular offset into the kernel: + */ + long futex_offset; + + /* + * The death of the thread may race with userspace setting + * up a lock's links. So to handle this race, userspace first + * sets this field to the address of the to-be-taken lock, + * then does the lock acquire, and then adds itself to the + * list, and then clears this field. Hence the kernel will + * always have full knowledge of all locks that the thread + * _might_ have taken. We check the owner TID in any case, + * so only truly owned locks will be handled. + */ + struct robust_list __user *list_op_pending; +}; + +/* + * Are there any waiters for this robust futex: + */ +#define FUTEX_WAITERS 0x80000000 + +/* + * The kernel signals via this bit that a thread holding a futex + * has exited without unlocking the futex. The kernel also does + * a FUTEX_WAKE on such futexes, after setting the bit, to wake + * up any possible waiters: + */ +#define FUTEX_OWNER_DIED 0x40000000 + +/* + * Reserved bit: + */ +#define FUTEX_OWNER_PENDING 0x20000000 + +/* + * The rest of the robust-futex field is for the TID: + */ +#define FUTEX_TID_MASK 0x1fffffff + +/* + * A limit of one million locks held per thread (!) ought to be enough + * for some time. This also protects against a deliberately circular + * list. Not worth introducing an rlimit for this: + */ +#define ROBUST_LIST_LIMIT 1048576 + long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, unsigned long uaddr2, int val2, int val3); +extern int handle_futex_death(unsigned int *uaddr, struct task_struct *curr); + +#ifdef CONFIG_FUTEX +extern void exit_robust_list(struct task_struct *curr); +#else +static inline void exit_robust_list(struct task_struct *curr) +{ +} +#endif + #define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */ #define FUTEX_OP_ADD 1 /* *(int *)UADDR2 += OPARG; */ #define FUTEX_OP_OR 2 /* *(int *)UADDR2 |= OPARG; */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 036d14d2bf9..fd4848f2d75 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -35,6 +35,7 @@ #include <linux/topology.h> #include <linux/seccomp.h> #include <linux/rcupdate.h> +#include <linux/futex.h> #include <linux/auxvec.h> /* For AT_VECTOR_SIZE */ @@ -872,6 +873,8 @@ struct task_struct { int cpuset_mems_generation; int cpuset_mem_spread_rotor; #endif + struct robust_list_head __user *robust_list; + atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; }; diff --git a/include/linux/threads.h b/include/linux/threads.h index b59738ac619..e646bcdf261 100644 --- a/include/linux/threads.h +++ b/include/linux/threads.h @@ -28,7 +28,8 @@ #define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000) /* - * A maximum of 4 million PIDs should be enough for a while: + * A maximum of 4 million PIDs should be enough for a while. + * [NOTE: PID/TIDs are limited to 2^29 ~= 500+ million, see futex.h.] */ #define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \ (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT)) |