aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/kprobes.txt89
-rw-r--r--arch/alpha/kernel/alpha_ksyms.c3
-rw-r--r--arch/alpha/kernel/entry.S10
-rw-r--r--arch/alpha/kernel/osf_sys.c26
-rw-r--r--arch/alpha/kernel/srmcons.c2
-rw-r--r--arch/arm/kernel/setup.c2
-rw-r--r--arch/arm/kernel/smp.c2
-rw-r--r--arch/arm/kernel/sys_arm.c4
-rw-r--r--arch/arm26/kernel/setup.c2
-rw-r--r--arch/arm26/kernel/sys_arm.c4
-rw-r--r--arch/avr32/kernel/sys_avr32.c14
-rw-r--r--arch/cris/arch-v32/kernel/smp.c1
-rw-r--r--arch/cris/kernel/setup.c2
-rw-r--r--arch/frv/kernel/Makefile2
-rw-r--r--arch/frv/kernel/kernel_execve.S33
-rw-r--r--arch/h8300/kernel/sys_h8300.c24
-rw-r--r--arch/i386/Kconfig2
-rw-r--r--arch/i386/kernel/kprobes.c41
-rw-r--r--arch/i386/kernel/process.c6
-rw-r--r--arch/i386/kernel/smpboot.c1
-rw-r--r--arch/i386/kernel/sys_i386.c42
-rw-r--r--arch/i386/kernel/traps.c6
-rw-r--r--arch/ia64/Kconfig2
-rw-r--r--arch/ia64/hp/sim/simserial.c2
-rw-r--r--arch/ia64/kernel/entry.S4
-rw-r--r--arch/ia64/kernel/kprobes.c98
-rw-r--r--arch/ia64/kernel/numa.c1
-rw-r--r--arch/ia64/kernel/process.c2
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_hwperf.c2
-rw-r--r--arch/m32r/kernel/sys_m32r.c22
-rw-r--r--arch/m68k/kernel/sys_m68k.c16
-rw-r--r--arch/m68knommu/kernel/sys_m68k.c15
-rw-r--r--arch/mips/kernel/linux32.c2
-rw-r--r--arch/mips/kernel/signal_n32.c4
-rw-r--r--arch/mips/kernel/syscall.c56
-rw-r--r--arch/mips/kernel/sysirix.c12
-rw-r--r--arch/mips/sgi-ip22/ip22-reset.c3
-rw-r--r--arch/mips/sgi-ip32/ip32-reset.c2
-rw-r--r--arch/parisc/hpux/sys_hpux.c37
-rw-r--r--arch/parisc/kernel/process.c9
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/kernel/kprobes.c33
-rw-r--r--arch/powerpc/kernel/misc_32.S2
-rw-r--r--arch/powerpc/kernel/misc_64.S2
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/setup_64.c2
-rw-r--r--arch/powerpc/kernel/syscalls.c14
-rw-r--r--arch/powerpc/platforms/iseries/mf.c2
-rw-r--r--arch/powerpc/platforms/pseries/setup.c2
-rw-r--r--arch/ppc/4xx_io/serial_sicc.c2
-rw-r--r--arch/ppc/kernel/misc.S18
-rw-r--r--arch/s390/kernel/kprobes.c9
-rw-r--r--arch/s390/kernel/sys_s390.c20
-rw-r--r--arch/sh/kernel/setup.c2
-rw-r--r--arch/sh/kernel/smp.c1
-rw-r--r--arch/sh/kernel/sys_sh.c19
-rw-r--r--arch/sh64/kernel/process.c270
-rw-r--r--arch/sh64/kernel/sys_sh64.c21
-rw-r--r--arch/sparc/kernel/sys_sparc.c27
-rw-r--r--arch/sparc/kernel/sys_sunos.c15
-rw-r--r--arch/sparc64/Kconfig2
-rw-r--r--arch/sparc64/kernel/power.c5
-rw-r--r--arch/sparc64/kernel/sys_sparc.c25
-rw-r--r--arch/sparc64/kernel/sys_sunos32.c10
-rw-r--r--arch/sparc64/solaris/misc.c6
-rw-r--r--arch/um/drivers/line.c6
-rw-r--r--arch/um/drivers/mconsole_kern.c6
-rw-r--r--arch/um/include/line.h7
-rw-r--r--arch/um/kernel/syscall.c35
-rw-r--r--arch/um/kernel/um_arch.c6
-rw-r--r--arch/um/os-Linux/process.c5
-rw-r--r--arch/um/os-Linux/sys-i386/tls.c5
-rw-r--r--arch/um/os-Linux/tls.c8
-rw-r--r--arch/um/sys-i386/unmap.c11
-rw-r--r--arch/um/sys-x86_64/syscalls.c2
-rw-r--r--arch/um/sys-x86_64/sysrq.c2
-rw-r--r--arch/um/sys-x86_64/unmap.c11
-rw-r--r--arch/v850/kernel/memcons.c2
-rw-r--r--arch/v850/kernel/simcons.c2
-rw-r--r--arch/v850/kernel/syscalls.c20
-rw-r--r--arch/x86_64/Kconfig2
-rw-r--r--arch/x86_64/ia32/sys_ia32.c38
-rw-r--r--arch/x86_64/kernel/entry.S4
-rw-r--r--arch/x86_64/kernel/kprobes.c48
-rw-r--r--arch/x86_64/kernel/process.c6
-rw-r--r--arch/x86_64/kernel/sys_x86_64.c2
-rw-r--r--arch/xtensa/kernel/syscalls.c22
-rw-r--r--arch/xtensa/platform-iss/console.c2
-rw-r--r--drivers/char/amiserial.c2
-rw-r--r--drivers/char/cyclades.c2
-rw-r--r--drivers/char/epca.c2
-rw-r--r--drivers/char/esp.c2
-rw-r--r--drivers/char/hvc_console.c2
-rw-r--r--drivers/char/hvcs.c2
-rw-r--r--drivers/char/hvsi.c2
-rw-r--r--drivers/char/ip2/ip2main.c2
-rw-r--r--drivers/char/isicom.c2
-rw-r--r--drivers/char/istallion.c2
-rw-r--r--drivers/char/keyboard.c16
-rw-r--r--drivers/char/moxa.c2
-rw-r--r--drivers/char/mxser.c2
-rw-r--r--drivers/char/nwbutton.c5
-rw-r--r--drivers/char/pcmcia/synclink_cs.c2
-rw-r--r--drivers/char/pty.c2
-rw-r--r--drivers/char/random.c4
-rw-r--r--drivers/char/rio/rio_linux.c2
-rw-r--r--drivers/char/riscom8.c2
-rw-r--r--drivers/char/rocket.c2
-rw-r--r--drivers/char/ser_a2232.c2
-rw-r--r--drivers/char/serial167.c2
-rw-r--r--drivers/char/snsc_event.c2
-rw-r--r--drivers/char/specialix.c2
-rw-r--r--drivers/char/stallion.c2
-rw-r--r--drivers/char/sx.c2
-rw-r--r--drivers/char/synclink.c2
-rw-r--r--drivers/char/synclink_gt.c2
-rw-r--r--drivers/char/synclinkmp.c2
-rw-r--r--drivers/char/tty_io.c3
-rw-r--r--drivers/char/viocons.c2
-rw-r--r--drivers/char/vme_scc.c2
-rw-r--r--drivers/char/vt.c3
-rw-r--r--drivers/char/vt_ioctl.c17
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c2
-rw-r--r--drivers/isdn/capi/capi.c2
-rw-r--r--drivers/isdn/gigaset/interface.c2
-rw-r--r--drivers/isdn/gigaset/proc.c3
-rw-r--r--drivers/isdn/hisax/hisax.h13
-rw-r--r--drivers/isdn/i4l/isdn_tty.c2
-rw-r--r--drivers/media/dvb/dvb-core/dvb_ringbuffer.c1
-rw-r--r--drivers/misc/Makefile3
-rw-r--r--drivers/misc/lkdtm.c342
-rw-r--r--drivers/net/tun.c2
-rw-r--r--drivers/net/wireless/ipw2100.c1
-rw-r--r--drivers/parisc/led.c2
-rw-r--r--drivers/parisc/power.c3
-rw-r--r--drivers/s390/char/con3215.c2
-rw-r--r--drivers/s390/char/fs3270.c11
-rw-r--r--drivers/s390/char/sclp_tty.c2
-rw-r--r--drivers/s390/char/sclp_vt220.c2
-rw-r--r--drivers/s390/char/tty3270.c2
-rw-r--r--drivers/s390/s390mach.c2
-rw-r--r--drivers/sbus/char/aurora.c2
-rw-r--r--drivers/sbus/char/bbc_envctrl.c5
-rw-r--r--drivers/sbus/char/envctrl.c7
-rw-r--r--drivers/scsi/lpfc/lpfc_ct.c8
-rw-r--r--drivers/serial/68328serial.c2
-rw-r--r--drivers/serial/68360serial.c2
-rw-r--r--drivers/serial/crisv10.c2
-rw-r--r--drivers/serial/mcfserial.c2
-rw-r--r--drivers/serial/serial_core.c2
-rw-r--r--drivers/tc/zs.c2
-rw-r--r--drivers/usb/class/cdc-acm.c2
-rw-r--r--drivers/usb/core/devio.c10
-rw-r--r--drivers/usb/core/hcd.c4
-rw-r--r--drivers/usb/core/inode.c2
-rw-r--r--drivers/usb/core/usb.h2
-rw-r--r--drivers/usb/gadget/ether.c2
-rw-r--r--drivers/usb/gadget/file_storage.c2
-rw-r--r--drivers/usb/gadget/gmidi.c2
-rw-r--r--drivers/usb/gadget/serial.c4
-rw-r--r--drivers/usb/gadget/zero.c2
-rw-r--r--drivers/usb/serial/usb-serial.c2
-rw-r--r--fs/cifs/connect.c22
-rw-r--r--fs/cifs/sess.c6
-rw-r--r--fs/compat.c2
-rw-r--r--fs/dnotify.c2
-rw-r--r--fs/exec.c2
-rw-r--r--fs/fcntl.c79
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/inode.c14
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/clntproc.c4
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svc.c74
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/lockd/xdr.c2
-rw-r--r--fs/locks.c2
-rw-r--r--fs/namespace.c22
-rw-r--r--fs/nfs/callback.c7
-rw-r--r--fs/nfs/client.c3
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfsd/export.c12
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4proc.c2
-rw-r--r--fs/nfsd/nfsctl.c152
-rw-r--r--fs/nfsd/nfssvc.c318
-rw-r--r--fs/nfsd/vfs.c8
-rw-r--r--fs/nls/nls_base.c2
-rw-r--r--fs/proc/array.c85
-rw-r--r--fs/proc/base.c1759
-rw-r--r--fs/proc/proc_misc.c3
-rw-r--r--fs/proc/root.c12
-rw-r--r--include/asm-alpha/unistd.h69
-rw-r--r--include/asm-arm/unistd.h24
-rw-r--r--include/asm-arm26/unistd.h24
-rw-r--r--include/asm-avr32/unistd.h80
-rw-r--r--include/asm-cris/unistd.h61
-rw-r--r--include/asm-frv/unistd.h25
-rw-r--r--include/asm-h8300/unistd.h51
-rw-r--r--include/asm-i386/bugs.h2
-rw-r--r--include/asm-i386/elf.h2
-rw-r--r--include/asm-i386/ptrace.h3
-rw-r--r--include/asm-i386/unistd.h39
-rw-r--r--include/asm-ia64/ptrace.h3
-rw-r--r--include/asm-ia64/unistd.h72
-rw-r--r--include/asm-m32r/unistd.h37
-rw-r--r--include/asm-m68k/unistd.h6
-rw-r--r--include/asm-m68knommu/unistd.h55
-rw-r--r--include/asm-mips/unistd.h39
-rw-r--r--include/asm-parisc/unistd.h86
-rw-r--r--include/asm-powerpc/kprobes.h22
-rw-r--r--include/asm-powerpc/ptrace.h2
-rw-r--r--include/asm-powerpc/unistd.h7
-rw-r--r--include/asm-s390/ptrace.h1
-rw-r--r--include/asm-s390/unistd.h51
-rw-r--r--include/asm-sh/bugs.h2
-rw-r--r--include/asm-sh/unistd.h70
-rw-r--r--include/asm-sh64/unistd.h41
-rw-r--r--include/asm-sparc/unistd.h47
-rw-r--r--include/asm-sparc64/unistd.h42
-rw-r--r--include/asm-um/unistd.h28
-rw-r--r--include/asm-v850/unistd.h51
-rw-r--r--include/asm-x86_64/ptrace.h2
-rw-r--r--include/asm-x86_64/unistd.h93
-rw-r--r--include/asm-xtensa/unistd.h5
-rw-r--r--include/linux/compat.h1
-rw-r--r--include/linux/console_struct.h2
-rw-r--r--include/linux/fs.h5
-rw-r--r--include/linux/genalloc.h1
-rw-r--r--include/linux/init_task.h12
-rw-r--r--include/linux/ipc.h54
-rw-r--r--include/linux/kprobes.h8
-rw-r--r--include/linux/lockd/bind.h2
-rw-r--r--include/linux/lockd/lockd.h2
-rw-r--r--include/linux/module.h2
-rw-r--r--include/linux/namespace.h6
-rw-r--r--include/linux/nfsd/nfsd.h5
-rw-r--r--include/linux/nfsd/nfsfh.h11
-rw-r--r--include/linux/nfsd/syscall.h17
-rw-r--r--include/linux/nodemask.h2
-rw-r--r--include/linux/nsproxy.h52
-rw-r--r--include/linux/pid.h53
-rw-r--r--include/linux/proc_fs.h10
-rw-r--r--include/linux/pspace.h23
-rw-r--r--include/linux/sched.h55
-rw-r--r--include/linux/sunrpc/svc.h69
-rw-r--r--include/linux/sunrpc/svcsock.h15
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/linux/tty_driver.h3
-rw-r--r--include/linux/unistd.h6
-rw-r--r--include/linux/utsname.h55
-rw-r--r--include/linux/vt_kern.h7
-rw-r--r--init/Kconfig17
-rw-r--r--init/do_mounts_initrd.c3
-rw-r--r--init/main.c6
-rw-r--r--init/version.c23
-rw-r--r--ipc/mqueue.c27
-rw-r--r--ipc/msg.c157
-rw-r--r--ipc/sem.c206
-rw-r--r--ipc/shm.c254
-rw-r--r--ipc/util.c132
-rw-r--r--ipc/util.h24
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/compat.c2
-rw-r--r--kernel/exit.c11
-rw-r--r--kernel/fork.c81
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/kallsyms.c1
-rw-r--r--kernel/kmod.c5
-rw-r--r--kernel/kprobes.c53
-rw-r--r--kernel/lockdep.c6
-rw-r--r--kernel/module.c37
-rw-r--r--kernel/nsproxy.c139
-rw-r--r--kernel/pid.c111
-rw-r--r--kernel/power/snapshot.c10
-rw-r--r--kernel/sched.c3
-rw-r--r--kernel/signal.c65
-rw-r--r--kernel/sys.c22
-rw-r--r--kernel/sysctl.c350
-rw-r--r--kernel/utsname.c95
-rw-r--r--lib/Kconfig.debug14
-rw-r--r--lib/Makefile2
-rw-r--r--lib/cpumask.c16
-rw-r--r--lib/errno.c7
-rw-r--r--lib/genalloc.c63
-rw-r--r--net/bluetooth/rfcomm/tty.c2
-rw-r--r--net/ipv4/ipconfig.c16
-rw-r--r--net/ipv4/tcp_probe.c6
-rw-r--r--net/irda/ircomm/ircomm_tty.c2
-rw-r--r--net/socket.c2
-rw-r--r--net/sunrpc/clnt.c4
-rw-r--r--net/sunrpc/sunrpc_syms.c2
-rw-r--r--net/sunrpc/svc.c457
-rw-r--r--net/sunrpc/svcauth_unix.c5
-rw-r--r--net/sunrpc/svcsock.c375
-rw-r--r--sound/core/info_oss.c10
296 files changed, 5303 insertions, 3695 deletions
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 2c3b1eae428..ba26201d502 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -151,9 +151,9 @@ So that you can load and unload Kprobes-based instrumentation modules,
make sure "Loadable module support" (CONFIG_MODULES) and "Module
unloading" (CONFIG_MODULE_UNLOAD) are set to "y".
-You may also want to ensure that CONFIG_KALLSYMS and perhaps even
-CONFIG_KALLSYMS_ALL are set to "y", since kallsyms_lookup_name()
-is a handy, version-independent way to find a function's address.
+Also make sure that CONFIG_KALLSYMS and perhaps even CONFIG_KALLSYMS_ALL
+are set to "y", since kallsyms_lookup_name() is used by the in-kernel
+kprobe address resolution code.
If you need to insert a probe in the middle of a function, you may find
it useful to "Compile the kernel with debug info" (CONFIG_DEBUG_INFO),
@@ -179,6 +179,27 @@ occurs during execution of kp->pre_handler or kp->post_handler,
or during single-stepping of the probed instruction, Kprobes calls
kp->fault_handler. Any or all handlers can be NULL.
+NOTE:
+1. With the introduction of the "symbol_name" field to struct kprobe,
+the probepoint address resolution will now be taken care of by the kernel.
+The following will now work:
+
+ kp.symbol_name = "symbol_name";
+
+(64-bit powerpc intricacies such as function descriptors are handled
+transparently)
+
+2. Use the "offset" field of struct kprobe if the offset into the symbol
+to install a probepoint is known. This field is used to calculate the
+probepoint.
+
+3. Specify either the kprobe "symbol_name" OR the "addr". If both are
+specified, kprobe registration will fail with -EINVAL.
+
+4. With CISC architectures (such as i386 and x86_64), the kprobes code
+does not validate if the kprobe.addr is at an instruction boundary.
+Use "offset" with caution.
+
register_kprobe() returns 0 on success, or a negative errno otherwise.
User's pre-handler (kp->pre_handler):
@@ -225,6 +246,12 @@ control to Kprobes.) If the probed function is declared asmlinkage,
fastcall, or anything else that affects how args are passed, the
handler's declaration must match.
+NOTE: A macro JPROBE_ENTRY is provided to handle architecture-specific
+aliasing of jp->entry. In the interest of portability, it is advised
+to use:
+
+ jp->entry = JPROBE_ENTRY(handler);
+
register_jprobe() returns 0 on success, or a negative errno otherwise.
4.3 register_kretprobe
@@ -251,6 +278,11 @@ of interest:
- ret_addr: the return address
- rp: points to the corresponding kretprobe object
- task: points to the corresponding task struct
+
+The regs_return_value(regs) macro provides a simple abstraction to
+extract the return value from the appropriate register as defined by
+the architecture's ABI.
+
The handler's return value is currently ignored.
4.4 unregister_*probe
@@ -369,7 +401,6 @@ stack trace and selected i386 registers when do_fork() is called.
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>
-#include <linux/kallsyms.h>
#include <linux/sched.h>
/*For each probe you need to allocate a kprobe structure*/
@@ -403,18 +434,14 @@ int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
return 0;
}
-int init_module(void)
+static int __init kprobe_init(void)
{
int ret;
kp.pre_handler = handler_pre;
kp.post_handler = handler_post;
kp.fault_handler = handler_fault;
- kp.addr = (kprobe_opcode_t*) kallsyms_lookup_name("do_fork");
- /* register the kprobe now */
- if (!kp.addr) {
- printk("Couldn't find %s to plant kprobe\n", "do_fork");
- return -1;
- }
+ kp.symbol_name = "do_fork";
+
if ((ret = register_kprobe(&kp) < 0)) {
printk("register_kprobe failed, returned %d\n", ret);
return -1;
@@ -423,12 +450,14 @@ int init_module(void)
return 0;
}
-void cleanup_module(void)
+static void __exit kprobe_exit(void)
{
unregister_kprobe(&kp);
printk("kprobe unregistered\n");
}
+module_init(kprobe_init)
+module_exit(kprobe_exit)
MODULE_LICENSE("GPL");
----- cut here -----
@@ -463,7 +492,6 @@ the arguments of do_fork().
#include <linux/fs.h>
#include <linux/uio.h>
#include <linux/kprobes.h>
-#include <linux/kallsyms.h>
/*
* Jumper probe for do_fork.
@@ -485,17 +513,13 @@ long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
}
static struct jprobe my_jprobe = {
- .entry = (kprobe_opcode_t *) jdo_fork
+ .entry = JPROBE_ENTRY(jdo_fork)
};
-int init_module(void)
+static int __init jprobe_init(void)
{
int ret;
- my_jprobe.kp.addr = (kprobe_opcode_t *) kallsyms_lookup_name("do_fork");
- if (!my_jprobe.kp.addr) {
- printk("Couldn't find %s to plant jprobe\n", "do_fork");
- return -1;
- }
+ my_jprobe.kp.symbol_name = "do_fork";
if ((ret = register_jprobe(&my_jprobe)) <0) {
printk("register_jprobe failed, returned %d\n", ret);
@@ -506,12 +530,14 @@ int init_module(void)
return 0;
}
-void cleanup_module(void)
+static void __exit jprobe_exit(void)
{
unregister_jprobe(&my_jprobe);
printk("jprobe unregistered\n");
}
+module_init(jprobe_init)
+module_exit(jprobe_exit)
MODULE_LICENSE("GPL");
----- cut here -----
@@ -530,16 +556,13 @@ report failed calls to sys_open().
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>
-#include <linux/kallsyms.h>
static const char *probed_func = "sys_open";
/* Return-probe handler: If the probed function fails, log the return value. */
static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
{
- // Substitute the appropriate register name for your architecture --
- // e.g., regs->rax for x86_64, regs->gpr[3] for ppc64.
- int retval = (int) regs->eax;
+ int retval = regs_return_value(regs);
if (retval < 0) {
printk("%s returns %d\n", probed_func, retval);
}
@@ -552,15 +575,11 @@ static struct kretprobe my_kretprobe = {
.maxactive = 20
};
-int init_module(void)
+static int __init kretprobe_init(void)
{
int ret;
- my_kretprobe.kp.addr =
- (kprobe_opcode_t *) kallsyms_lookup_name(probed_func);
- if (!my_kretprobe.kp.addr) {
- printk("Couldn't find %s to plant return probe\n", probed_func);
- return -1;
- }
+ my_kretprobe.kp.symbol_name = (char *)probed_func;
+
if ((ret = register_kretprobe(&my_kretprobe)) < 0) {
printk("register_kretprobe failed, returned %d\n", ret);
return -1;
@@ -569,7 +588,7 @@ int init_module(void)
return 0;
}
-void cleanup_module(void)
+static void __exit kretprobe_exit(void)
{
unregister_kretprobe(&my_kretprobe);
printk("kretprobe unregistered\n");
@@ -578,6 +597,8 @@ void cleanup_module(void)
my_kretprobe.nmissed, probed_func);
}
+module_init(kretprobe_init)
+module_exit(kretprobe_exit)
MODULE_LICENSE("GPL");
----- cut here -----
@@ -590,3 +611,5 @@ messages.)
For additional information on Kprobes, refer to the following URLs:
http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe
http://www.redhat.com/magazine/005mar05/features/kprobes/
+http://www-users.cs.umn.edu/~boutcher/kprobes/
+http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115)
diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c
index f042cc42b00..dbe327d32b6 100644
--- a/arch/alpha/kernel/alpha_ksyms.c
+++ b/arch/alpha/kernel/alpha_ksyms.c
@@ -36,7 +36,6 @@
#include <asm/cacheflush.h>
#include <asm/vga.h>
-#define __KERNEL_SYSCALLS__
#include <asm/unistd.h>
extern struct hwrpb_struct *hwrpb;
@@ -116,7 +115,7 @@ EXPORT_SYMBOL(sys_dup);
EXPORT_SYMBOL(sys_exit);
EXPORT_SYMBOL(sys_write);
EXPORT_SYMBOL(sys_lseek);
-EXPORT_SYMBOL(execve);
+EXPORT_SYMBOL(kernel_execve);
EXPORT_SYMBOL(sys_setsid);
EXPORT_SYMBOL(sys_wait4);
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index 01ecd09d4a6..c95e95e1ab0 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -655,12 +655,12 @@ kernel_thread:
.end kernel_thread
/*
- * execve(path, argv, envp)
+ * kernel_execve(path, argv, envp)
*/
.align 4
- .globl execve
- .ent execve
-execve:
+ .globl kernel_execve
+ .ent kernel_execve
+kernel_execve:
/* We can be called from a module. */
ldgp $gp, 0($27)
lda $sp, -(32+SIZEOF_PT_REGS+8)($sp)
@@ -704,7 +704,7 @@ execve:
1: lda $sp, 32+SIZEOF_PT_REGS+8($sp)
ret
-.end execve
+.end kernel_execve
/*
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 73c7622b529..8a31fc1bfb1 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -402,15 +402,15 @@ osf_utsname(char __user *name)
down_read(&uts_sem);
error = -EFAULT;
- if (copy_to_user(name + 0, system_utsname.sysname, 32))
+ if (copy_to_user(name + 0, utsname()->sysname, 32))
goto out;
- if (copy_to_user(name + 32, system_utsname.nodename, 32))
+ if (copy_to_user(name + 32, utsname()->nodename, 32))
goto out;
- if (copy_to_user(name + 64, system_utsname.release, 32))
+ if (copy_to_user(name + 64, utsname()->release, 32))
goto out;
- if (copy_to_user(name + 96, system_utsname.version, 32))
+ if (copy_to_user(name + 96, utsname()->version, 32))
goto out;
- if (copy_to_user(name + 128, system_utsname.machine, 32))
+ if (copy_to_user(name + 128, utsname()->machine, 32))
goto out;
error = 0;
@@ -449,8 +449,8 @@ osf_getdomainname(char __user *name, int namelen)
down_read(&uts_sem);
for (i = 0; i < len; ++i) {
- __put_user(system_utsname.domainname[i], name + i);
- if (system_utsname.domainname[i] == '\0')
+ __put_user(utsname()->domainname[i], name + i);
+ if (utsname()->domainname[i] == '\0')
break;
}
up_read(&uts_sem);
@@ -607,12 +607,12 @@ osf_sigstack(struct sigstack __user *uss, struct sigstack __user *uoss)
asmlinkage long
osf_sysinfo(int command, char __user *buf, long count)
{
- static char * sysinfo_table[] = {
- system_utsname.sysname,
- system_utsname.nodename,
- system_utsname.release,
- system_utsname.version,
- system_utsname.machine,
+ char *sysinfo_table[] = {
+ utsname()->sysname,
+ utsname()->nodename,
+ utsname()->release,
+ utsname()->version,
+ utsname()->machine,
"alpha", /* instruction set architecture */
"dummy", /* hardware serial number */
"dummy", /* hardware manufacturer */
diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
index 9d7dff27f81..75692320386 100644
--- a/arch/alpha/kernel/srmcons.c
+++ b/arch/alpha/kernel/srmcons.c
@@ -229,7 +229,7 @@ srmcons_close(struct tty_struct *tty, struct file *filp)
static struct tty_driver *srmcons_driver;
-static struct tty_operations srmcons_ops = {
+static const struct tty_operations srmcons_ops = {
.open = srmcons_open,
.close = srmcons_close,
.write = srmcons_write,
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 0a722e77c14..6bbd93dd186 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -348,7 +348,7 @@ static void __init setup_processor(void)
cpu_name, processor_id, (int)processor_id & 15,
proc_arch[cpu_architecture()], cr_alignment);
- sprintf(system_utsname.machine, "%s%c", list->arch_name, ENDIANNESS);
+ sprintf(init_utsname()->machine, "%s%c", list->arch_name, ENDIANNESS);
sprintf(elf_platform, "%s%c", list->elf_name, ENDIANNESS);
elf_hwcap = list->elf_hwcap;
#ifndef CONFIG_ARM_THUMB
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 68e9634d260..421329f5e18 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -36,7 +36,9 @@
* The online bitmask indicates that the CPU is up and running.
*/
cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
cpumask_t cpu_online_map;
+EXPORT_SYMBOL(cpu_online_map);
/*
* as from 2.5, kernels no longer have an init_tasks structure
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index 8170af47143..00c18d35913 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -279,7 +279,7 @@ out:
return error;
}
-long execve(const char *filename, char **argv, char **envp)
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
{
struct pt_regs regs;
int ret;
@@ -317,7 +317,7 @@ long execve(const char *filename, char **argv, char **envp)
out:
return ret;
}
-EXPORT_SYMBOL(execve);
+EXPORT_SYMBOL(kernel_execve);
/*
* Since loff_t is a 64 bit type we avoid a lot of ABI hastle
diff --git a/arch/arm26/kernel/setup.c b/arch/arm26/kernel/setup.c
index e7eb070f794..466ddb54b44 100644
--- a/arch/arm26/kernel/setup.c
+++ b/arch/arm26/kernel/setup.c
@@ -143,7 +143,7 @@ static void __init setup_processor(void)
dump_cpu_info();
- sprintf(system_utsname.machine, "%s", list->arch_name);
+ sprintf(init_utsname()->machine, "%s", list->arch_name);
sprintf(elf_platform, "%s", list->elf_name);
elf_hwcap = list->elf_hwcap;
diff --git a/arch/arm26/kernel/sys_arm.c b/arch/arm26/kernel/sys_arm.c
index 85457897b8a..dc05aba58ba 100644
--- a/arch/arm26/kernel/sys_arm.c
+++ b/arch/arm26/kernel/sys_arm.c
@@ -283,7 +283,7 @@ out:
}
/* FIXME - see if this is correct for arm26 */
-long execve(const char *filename, char **argv, char **envp)
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
{
struct pt_regs regs;
int ret;
@@ -320,4 +320,4 @@ long execve(const char *filename, char **argv, char **envp)
return ret;
}
-EXPORT_SYMBOL(execve);
+EXPORT_SYMBOL(kernel_execve);
diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c
index 6ec5693da44..8deb6003ee6 100644
--- a/arch/avr32/kernel/sys_avr32.c
+++ b/arch/avr32/kernel/sys_avr32.c
@@ -49,3 +49,17 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
fput(file);
return error;
}
+
+int kernel_execve(const char *file, char **argv, char **envp)
+{
+ register long scno asm("r8") = __NR_execve;
+ register long sc1 asm("r12") = (long)file;
+ register long sc2 asm("r11") = (long)argv;
+ register long sc3 asm("r10") = (long)envp;
+
+ asm volatile("scall"
+ : "=r"(sc1)
+ : "r"(scno), "0"(sc1), "r"(sc2), "r"(sc3)
+ : "cc", "memory");
+ return sc1;
+}
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 464ecaec3bc..2d0023f2d49 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -28,6 +28,7 @@ spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED};
/* CPU masks */
cpumask_t cpu_online_map = CPU_MASK_NONE;
+EXPORT_SYMBOL(cpu_online_map);
cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
EXPORT_SYMBOL(phys_cpu_present_map);
diff --git a/arch/cris/kernel/setup.c b/arch/cris/kernel/setup.c
index 7af3d5d43e4..ca8b45a0fe2 100644
--- a/arch/cris/kernel/setup.c
+++ b/arch/cris/kernel/setup.c
@@ -160,7 +160,7 @@ setup_arch(char **cmdline_p)
show_etrax_copyright();
/* Setup utsname */
- strcpy(system_utsname.machine, cris_machine_name);
+ strcpy(init_utsname()->machine, cris_machine_name);
}
static void *c_start(struct seq_file *m, loff_t *pos)
diff --git a/arch/frv/kernel/Makefile b/arch/frv/kernel/Makefile
index 32db3499c46..e8f73ed28b5 100644
--- a/arch/frv/kernel/Makefile
+++ b/arch/frv/kernel/Makefile
@@ -8,7 +8,7 @@ heads-$(CONFIG_MMU) := head-mmu-fr451.o
extra-y:= head.o init_task.o vmlinux.lds
obj-y := $(heads-y) entry.o entry-table.o break.o switch_to.o kernel_thread.o \
- process.o traps.o ptrace.o signal.o dma.o \
+ kernel_execve.o process.o traps.o ptrace.o signal.o dma.o \
sys_frv.o time.o semaphore.o setup.o frv_ksyms.o \
debug-stub.o irq.o sleep.o uaccess.o
diff --git a/arch/frv/kernel/kernel_execve.S b/arch/frv/kernel/kernel_execve.S
new file mode 100644
index 00000000000..9b074a16a05
--- /dev/null
+++ b/arch/frv/kernel/kernel_execve.S
@@ -0,0 +1,33 @@
+/* in-kernel program execution
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+###############################################################################
+#
+# Do a system call from kernel instead of calling sys_execve so we end up with
+# proper pt_regs.
+#
+# int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+#
+# On entry: GR8/GR9/GR10: arguments to function
+# On return: GR8: syscall return.
+#
+###############################################################################
+ .globl kernel_execve
+ .type kernel_execve,@function
+kernel_execve:
+ setlos __NR_execve,gr7
+ tira gr0,#0
+ bralr
+
+ .size kernel_execve,.-kernel_execve
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index 0f61b7ad69a..302a2dfe634 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -25,6 +25,7 @@
#include <asm/cachectl.h>
#include <asm/traps.h>
#include <asm/ipc.h>
+#include <asm/unistd.h>
/*
* sys_pipe() is the normal C calling standard for creating
@@ -280,3 +281,26 @@ asmlinkage void syscall_print(void *dummy,...)
((regs->pc)&0xffffff)-2,regs->orig_er0,regs->er1,regs->er2,regs->er3,regs->er0);
}
#endif
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ register long res __asm__("er0");
+ register const char * _a __asm__("er1") = filename;
+ register void *_b __asm__("er2") = argv;
+ register void *_c __asm__("er3") = envp;
+ __asm__ __volatile__ ("mov.l %1,er0\n\t"
+ "trapa #0\n\t"
+ : "=r" (res)
+ : "g" (__NR_execve),
+ "g" (_a),
+ "g" (_b),
+ "g" (_c)
+ : "cc", "memory");
+ return res;
+}
+
+
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 3fd2f256f2b..af219e51734 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1142,7 +1142,7 @@ source "arch/i386/oprofile/Kconfig"
config KPROBES
bool "Kprobes (EXPERIMENTAL)"
- depends on EXPERIMENTAL && MODULES
+ depends on KALLSYMS && EXPERIMENTAL && MODULES
help
Kprobes allows you to trap at almost any kernel address and
execute a callback function. register_kprobe() establishes
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index afe6505ca0b..d98e44b16fe 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -230,20 +230,20 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
struct pt_regs *regs)
{
unsigned long *sara = (unsigned long *)&regs->esp;
- struct kretprobe_instance *ri;
- if ((ri = get_free_rp_inst(rp)) != NULL) {
- ri->rp = rp;
- ri->task = current;
+ struct kretprobe_instance *ri;
+
+ if ((ri = get_free_rp_inst(rp)) != NULL) {
+ ri->rp = rp;
+ ri->task = current;
ri->ret_addr = (kprobe_opcode_t *) *sara;
/* Replace the return addr with trampoline addr */
*sara = (unsigned long) &kretprobe_trampoline;
-
- add_rp_inst(ri);
- } else {
- rp->nmissed++;
- }
+ add_rp_inst(ri);
+ } else {
+ rp->nmissed++;
+ }
}
/*
@@ -359,7 +359,7 @@ no_kprobe:
void __kprobes kretprobe_trampoline_holder(void)
{
asm volatile ( ".global kretprobe_trampoline\n"
- "kretprobe_trampoline: \n"
+ "kretprobe_trampoline: \n"
" pushf\n"
/* skip cs, eip, orig_eax, es, ds */
" subl $20, %esp\n"
@@ -395,14 +395,15 @@ no_kprobe:
*/
fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
{
- struct kretprobe_instance *ri = NULL;
- struct hlist_head *head;
- struct hlist_node *node, *tmp;
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *node, *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+ INIT_HLIST_HEAD(&empty_rp);
spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ head = kretprobe_inst_table_head(current);
/*
* It is possible to have multiple instances associated with a given
@@ -413,14 +414,14 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
* We can handle this because:
* - instances are always inserted at the head of the list
* - when multiple return probes are registered for the same
- * function, the first instance's ret_addr will point to the
+ * function, the first instance's ret_addr will point to the
* real return address, and all the rest will point to
* kretprobe_trampoline
*/
hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
- if (ri->task != current)
+ if (ri->task != current)
/* another task is sharing our hash bucket */
- continue;
+ continue;
if (ri->rp && ri->rp->handler){
__get_cpu_var(current_kprobe) = &ri->rp->kp;
@@ -429,7 +430,7 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
}
orig_ret_address = (unsigned long)ri->ret_addr;
- recycle_rp_inst(ri);
+ recycle_rp_inst(ri, &empty_rp);
if (orig_ret_address != trampoline_address)
/*
@@ -444,6 +445,10 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
spin_unlock_irqrestore(&kretprobe_lock, flags);
+ hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
return (void*)orig_ret_address;
}
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 96cd0232e1e..dad02a960e0 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -297,9 +297,9 @@ void show_regs(struct pt_regs * regs)
if (user_mode_vm(regs))
printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
printk(" EFLAGS: %08lx %s (%s %.*s)\n",
- regs->eflags, print_tainted(), system_utsname.release,
- (int)strcspn(system_utsname.version, " "),
- system_utsname.version);
+ regs->eflags, print_tainted(), init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
regs->eax,regs->ebx,regs->ecx,regs->edx);
printk("ESI: %08lx EDI: %08lx EBP: %08lx",
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 0831f709f77..9d93ecf6d99 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -612,6 +612,7 @@ extern struct {
/* which logical CPUs are on which nodes */
cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
+EXPORT_SYMBOL(node_2_cpu_mask);
/* which node each logical CPU is on */
int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
EXPORT_SYMBOL(cpu_2_node);
diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c
index 8fdb1fb17a5..4048397f174 100644
--- a/arch/i386/kernel/sys_i386.c
+++ b/arch/i386/kernel/sys_i386.c
@@ -21,6 +21,7 @@
#include <linux/utsname.h>
#include <asm/uaccess.h>
+#include <asm/unistd.h>
#include <asm/ipc.h>
/*
@@ -210,7 +211,7 @@ asmlinkage int sys_uname(struct old_utsname __user * name)
if (!name)
return -EFAULT;
down_read(&uts_sem);
- err=copy_to_user(name, &system_utsname, sizeof (*name));
+ err = copy_to_user(name, utsname(), sizeof (*name));
up_read(&uts_sem);
return err?-EFAULT:0;
}
@@ -226,16 +227,21 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name)
down_read(&uts_sem);
- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
- error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
- error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
- error |= __put_user(0,name->release+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
- error |= __put_user(0,name->version+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
- error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+ error = __copy_to_user(&name->sysname, &utsname()->sysname,
+ __OLD_UTS_LEN);
+ error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
+ error |= __copy_to_user(&name->nodename, &utsname()->nodename,
+ __OLD_UTS_LEN);
+ error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
+ error |= __copy_to_user(&name->release, &utsname()->release,
+ __OLD_UTS_LEN);
+ error |= __put_user(0, name->release + __OLD_UTS_LEN);
+ error |= __copy_to_user(&name->version, &utsname()->version,
+ __OLD_UTS_LEN);
+ error |= __put_user(0, name->version + __OLD_UTS_LEN);
+ error |= __copy_to_user(&name->machine, &utsname()->machine,
+ __OLD_UTS_LEN);
+ error |= __put_user(0, name->machine + __OLD_UTS_LEN);
up_read(&uts_sem);
@@ -243,3 +249,17 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name)
return error;
}
+
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ long __res;
+ asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx"
+ : "=a" (__res)
+ : "0" (__NR_execve),"ri" (filename),"c" (argv), "d" (envp) : "memory");
+ return __res;
+}
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 6820b8d643c..00489b706d2 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -357,9 +357,9 @@ void show_registers(struct pt_regs *regs)
KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
smp_processor_id(), 0xffff & regs->xcs, regs->eip,
- print_tainted(), regs->eflags, system_utsname.release,
- (int)strcspn(system_utsname.version, " "),
- system_utsname.version);
+ print_tainted(), regs->eflags, init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
regs->eax, regs->ebx, regs->ecx, regs->edx);
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 0b7f701d5cf..70f7eb9fed3 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -516,7 +516,7 @@ source "arch/ia64/oprofile/Kconfig"
config KPROBES
bool "Kprobes (EXPERIMENTAL)"
- depends on EXPERIMENTAL && MODULES
+ depends on KALLSYMS && EXPERIMENTAL && MODULES
help
Kprobes allows you to trap at almost any kernel address and
execute a callback function. register_kprobe() establishes
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 0daacc20ed3..246eb3d3757 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -940,7 +940,7 @@ static inline void show_serial_version(void)
printk(KERN_INFO " no serial options enabled\n");
}
-static struct tty_operations hp_ops = {
+static const struct tty_operations hp_ops = {
.open = rs_open,
.close = rs_close,
.write = rs_write,
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 12701cf32d9..e5b1be51b19 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -492,11 +492,11 @@ GLOBAL_ENTRY(prefetch_stack)
br.ret.sptk.many rp
END(prefetch_stack)
-GLOBAL_ENTRY(execve)
+GLOBAL_ENTRY(kernel_execve)
mov r15=__NR_execve // put syscall number in place
break __BREAK_SYSCALL
br.ret.sptk.many rp
-END(execve)
+END(kernel_execve)
GLOBAL_ENTRY(clone)
mov r15=__NR_clone // put syscall number in place
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 169ec3a7156..51217d63285 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -90,7 +90,7 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint slot,
p->ainsn.target_br_reg = 0;
/* Check for Break instruction
- * Bits 37:40 Major opcode to be zero
+ * Bits 37:40 Major opcode to be zero
* Bits 27:32 X6 to be zero
* Bits 32:35 X3 to be zero
*/
@@ -104,19 +104,19 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint slot,
switch (major_opcode) {
case INDIRECT_CALL_OPCODE:
p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
- p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
- break;
+ p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+ break;
case IP_RELATIVE_PREDICT_OPCODE:
case IP_RELATIVE_BRANCH_OPCODE:
p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
- break;
+ break;
case IP_RELATIVE_CALL_OPCODE:
- p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
- p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
- p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
- break;
+ p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
+ p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
+ p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+ break;
}
- } else if (bundle_encoding[template][slot] == X) {
+ } else if (bundle_encoding[template][slot] == X) {
switch (major_opcode) {
case LONG_CALL_OPCODE:
p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
@@ -258,18 +258,18 @@ static void __kprobes get_kprobe_inst(bundle_t *bundle, uint slot,
switch (slot) {
case 0:
- *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT);
- *kprobe_inst = bundle->quad0.slot0;
- break;
+ *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT);
+ *kprobe_inst = bundle->quad0.slot0;
+ break;
case 1:
- *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT);
- kprobe_inst_p0 = bundle->quad0.slot1_p0;
- kprobe_inst_p1 = bundle->quad1.slot1_p1;
- *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46));
+ *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT);
+ kprobe_inst_p0 = bundle->quad0.slot1_p0;
+ kprobe_inst_p1 = bundle->quad1.slot1_p1;
+ *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46));
break;
case 2:
- *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT);
- *kprobe_inst = bundle->quad1.slot2;
+ *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT);
+ *kprobe_inst = bundle->quad1.slot2;
break;
}
}
@@ -290,11 +290,11 @@ static int __kprobes valid_kprobe_addr(int template, int slot,
return -EINVAL;
}
- if (in_ivt_functions(addr)) {
- printk(KERN_WARNING "Kprobes can't be inserted inside "
+ if (in_ivt_functions(addr)) {
+ printk(KERN_WARNING "Kprobes can't be inserted inside "
"IVT functions at 0x%lx\n", addr);
- return -EINVAL;
- }
+ return -EINVAL;
+ }
if (slot == 1 && bundle_encoding[template][1] != L) {
printk(KERN_WARNING "Inserting kprobes on slot #1 "
@@ -338,12 +338,13 @@ static void kretprobe_trampoline(void)
int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kretprobe_instance *ri = NULL;
- struct hlist_head *head;
+ struct hlist_head *head, empty_rp;
struct hlist_node *node, *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address =
((struct fnptr *)kretprobe_trampoline)->ip;
+ INIT_HLIST_HEAD(&empty_rp);
spin_lock_irqsave(&kretprobe_lock, flags);
head = kretprobe_inst_table_head(current);
@@ -369,7 +370,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
ri->rp->handler(ri, regs);
orig_ret_address = (unsigned long)ri->ret_addr;
- recycle_rp_inst(ri);
+ recycle_rp_inst(ri, &empty_rp);
if (orig_ret_address != trampoline_address)
/*
@@ -387,6 +388,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
spin_unlock_irqrestore(&kretprobe_lock, flags);
preempt_enable_no_resched();
+ hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
/*
* By returning a non-zero value, we are telling
* kprobe_handler() that we don't want the post_handler
@@ -424,14 +429,14 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
bundle_t *bundle;
bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle;
- template = bundle->quad0.template;
+ template = bundle->quad0.template;
if(valid_kprobe_addr(template, slot, addr))
return -EINVAL;
/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
- if (slot == 1 && bundle_encoding[template][1] == L)
- slot++;
+ if (slot == 1 && bundle_encoding[template][1] == L)
+ slot++;
/* Get kprobe_inst and major_opcode from the bundle */
get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
@@ -489,21 +494,22 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
*/
static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
{
- unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle);
- unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL;
- unsigned long template;
- int slot = ((unsigned long)p->addr & 0xf);
+ unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle);
+ unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL;
+ unsigned long template;
+ int slot = ((unsigned long)p->addr & 0xf);
template = p->ainsn.insn->bundle.quad0.template;
- if (slot == 1 && bundle_encoding[template][1] == L)
- slot = 2;
+ if (slot == 1 && bundle_encoding[template][1] == L)
+ slot = 2;
if (p->ainsn.inst_flag) {
if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
/* Fix relative IP address */
- regs->cr_iip = (regs->cr_iip - bundle_addr) + resume_addr;
+ regs->cr_iip = (regs->cr_iip - bundle_addr) +
+ resume_addr;
}
if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) {
@@ -540,18 +546,18 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
}
if (slot == 2) {
- if (regs->cr_iip == bundle_addr + 0x10) {
- regs->cr_iip = resume_addr + 0x10;
- }
- } else {
- if (regs->cr_iip == bundle_addr) {
- regs->cr_iip = resume_addr;
- }
+ if (regs->cr_iip == bundle_addr + 0x10) {
+ regs->cr_iip = resume_addr + 0x10;
+ }
+ } else {
+ if (regs->cr_iip == bundle_addr) {
+ regs->cr_iip = resume_addr;
+ }
}
turn_ss_off:
- /* Turn off Single Step bit */
- ia64_psr(regs)->ss = 0;
+ /* Turn off Single Step bit */
+ ia64_psr(regs)->ss = 0;
}
static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
@@ -587,7 +593,7 @@ static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
if (slot == 1 && bundle_encoding[template][1] == L)
- slot++;
+ slot++;
/* Get Kprobe probe instruction at given slot*/
get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode);
@@ -627,7 +633,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
if (p) {
if ((kcb->kprobe_status == KPROBE_HIT_SS) &&
(p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) {
- ia64_psr(regs)->ss = 0;
+ ia64_psr(regs)->ss = 0;
goto no_kprobe;
}
/* We have reentered the pre_kprobe_handler(), since
@@ -887,7 +893,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
* fix the return address to our jprobe_inst_return() function
* in the jprobes.S file
*/
- regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip;
+ regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip;
return 1;
}
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index 20340631179..a78b45f5fe2 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -28,6 +28,7 @@ u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_to_node_map);
cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+EXPORT_SYMBOL(node_to_cpu_mask);
void __cpuinit map_cpu_to_node(int cpu, int nid)
{
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index ea914cc6812..51922b98086 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -8,8 +8,6 @@
* 2005-10-07 Keith Owens <kaos@sgi.com>
* Add notify_die() hooks.
*/
-#define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */
-
#include <linux/cpu.h>
#include <linux/pm.h>
#include <linux/elf.h>
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index b632b9c1e3b..462ea178f49 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -423,7 +423,7 @@ static int sn_topology_show(struct seq_file *s, void *d)
"coherency_domain %d, "
"region_size %d\n",
- partid, system_utsname.nodename,
+ partid, utsname()->nodename,
shubtype ? "shub2" : "shub1",
(u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift,
system_size, sharing_size, coher, region_size);
diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c
index a9cea32eb82..b567351f3c5 100644
--- a/arch/m32r/kernel/sys_m32r.c
+++ b/arch/m32r/kernel/sys_m32r.c
@@ -25,6 +25,8 @@
#include <asm/cachectl.h>
#include <asm/cacheflush.h>
#include <asm/ipc.h>
+#include <asm/syscall.h>
+#include <asm/unistd.h>
/*
* sys_tas() - test-and-set
@@ -205,7 +207,7 @@ asmlinkage int sys_uname(struct old_utsname * name)
if (!name)
return -EFAULT;
down_read(&uts_sem);
- err=copy_to_user(name, &system_utsname, sizeof (*name));
+ err = copy_to_user(name, utsname(), sizeof (*name));
up_read(&uts_sem);
return err?-EFAULT:0;
}
@@ -223,3 +225,21 @@ asmlinkage int sys_cachectl(char *addr, int nbytes, int op)
return -ENOSYS;
}
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ register long __scno __asm__ ("r7") = __NR_execve;
+ register long __arg3 __asm__ ("r2") = (long)(envp);
+ register long __arg2 __asm__ ("r1") = (long)(argv);
+ register long __res __asm__ ("r0") = (long)(filename);
+ __asm__ __volatile__ (
+ "trap #" SYSCALL_VECTOR "|| nop"
+ : "=r" (__res)
+ : "r" (__scno), "0" (__res), "r" (__arg2),
+ "r" (__arg3)
+ : "memory");
+ return __res;
+}
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 143c552d38f..90238a8c9e1 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -27,6 +27,7 @@
#include <asm/traps.h>
#include <asm/ipc.h>
#include <asm/page.h>
+#include <asm/unistd.h>
/*
* sys_pipe() is the normal C calling standard for creating
@@ -663,3 +664,18 @@ asmlinkage int sys_getpagesize(void)
{
return PAGE_SIZE;
}
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ register long __res asm ("%d0") = __NR_execve;
+ register long __a asm ("%d1") = (long)(filename);
+ register long __b asm ("%d2") = (long)(argv);
+ register long __c asm ("%d3") = (long)(envp);
+ asm volatile ("trap #0" : "+d" (__res)
+ : "d" (__a), "d" (__b), "d" (__c));
+ return __res;
+}
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index d87e1e0a133..c3494b8447d 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -26,6 +26,7 @@
#include <asm/traps.h>
#include <asm/ipc.h>
#include <asm/cacheflush.h>
+#include <asm/unistd.h>
/*
* sys_pipe() is the normal C calling standard for creating
@@ -206,3 +207,17 @@ asmlinkage int sys_getpagesize(void)
return PAGE_SIZE;
}
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ register long __res asm ("%d0") = __NR_execve;
+ register long __a asm ("%d1") = (long)(filename);
+ register long __b asm ("%d2") = (long)(argv);
+ register long __c asm ("%d3") = (long)(envp);
+ asm volatile ("trap #0" : "+d" (__res)
+ : "d" (__a), "d" (__b), "d" (__c));
+ return __res;
+}
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 43b1162d714..52cada45b35 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -1039,7 +1039,7 @@ asmlinkage long sys32_newuname(struct new_utsname __user * name)
int ret = 0;
down_read(&uts_sem);
- if (copy_to_user(name,&system_utsname,sizeof *name))
+ if (copy_to_user(name, utsname(), sizeof *name))
ret = -EFAULT;
up_read(&uts_sem);
diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c
index 50c17eaa7f2..477c5334ec1 100644
--- a/arch/mips/kernel/signal_n32.c
+++ b/arch/mips/kernel/signal_n32.c
@@ -42,8 +42,6 @@
#include "signal-common.h"
-extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
-
/*
* Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
*/
@@ -83,6 +81,8 @@ struct rt_sigframe_n32 {
#endif
};
+extern void sigset_from_compat (sigset_t *set, compat_sigset_t *compat);
+
save_static_function(sysn32_rt_sigsuspend);
__attribute_used__ noinline static int
_sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs)
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 9951240cc3f..26e1a7e78d1 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -231,7 +231,7 @@ out:
*/
asmlinkage int sys_uname(struct old_utsname __user * name)
{
- if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
+ if (name && !copy_to_user(name, utsname(), sizeof (*name)))
return 0;
return -EFAULT;
}
@@ -248,16 +248,21 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name)
if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
return -EFAULT;
- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
- error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
- error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
- error -= __put_user(0,name->release+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
- error -= __put_user(0,name->version+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
- error = __put_user(0,name->machine+__OLD_UTS_LEN);
+ error = __copy_to_user(&name->sysname, &utsname()->sysname,
+ __OLD_UTS_LEN);
+ error -= __put_user(0, name->sysname + __OLD_UTS_LEN);
+ error -= __copy_to_user(&name->nodename, &utsname()->nodename,
+ __OLD_UTS_LEN);
+ error -= __put_user(0, name->nodename + __OLD_UTS_LEN);
+ error -= __copy_to_user(&name->release, &utsname()->release,
+ __OLD_UTS_LEN);
+ error -= __put_user(0, name->release + __OLD_UTS_LEN);
+ error -= __copy_to_user(&name->version, &utsname()->version,
+ __OLD_UTS_LEN);
+ error -= __put_user(0, name->version + __OLD_UTS_LEN);
+ error -= __copy_to_user(&name->machine, &utsname()->machine,
+ __OLD_UTS_LEN);
+ error = __put_user(0, name->machine + __OLD_UTS_LEN);
error = error ? -EFAULT : 0;
return error;
@@ -401,3 +406,32 @@ asmlinkage void bad_stack(void)
{
do_exit(SIGSEGV);
}
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ register unsigned long __a0 asm("$4") = (unsigned long) filename;
+ register unsigned long __a1 asm("$5") = (unsigned long) argv;
+ register unsigned long __a2 asm("$6") = (unsigned long) envp;
+ register unsigned long __a3 asm("$7");
+ unsigned long __v0;
+
+ __asm__ volatile (" \n"
+ " .set noreorder \n"
+ " li $2, %5 # __NR_execve \n"
+ " syscall \n"
+ " move %0, $2 \n"
+ " .set reorder \n"
+ : "=&r" (__v0), "=r" (__a3)
+ : "r" (__a0), "r" (__a1), "r" (__a2), "i" (__NR_execve)
+ : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24",
+ "memory");
+
+ if (__a3 == 0)
+ return __v0;
+
+ return -__v0;
+}
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
index 1137dd6ea7a..11bb9717497 100644
--- a/arch/mips/kernel/sysirix.c
+++ b/arch/mips/kernel/sysirix.c
@@ -884,7 +884,7 @@ asmlinkage int irix_getdomainname(char __user *name, int len)
down_read(&uts_sem);
if (len > __NEW_UTS_LEN)
len = __NEW_UTS_LEN;
- err = copy_to_user(name, system_utsname.domainname, len) ? -EFAULT : 0;
+ err = copy_to_user(name, utsname()->domainname, len) ? -EFAULT : 0;
up_read(&uts_sem);
return err;
@@ -1127,11 +1127,11 @@ struct iuname {
asmlinkage int irix_uname(struct iuname __user *buf)
{
down_read(&uts_sem);
- if (copy_from_user(system_utsname.sysname, buf->sysname, 65)
- || copy_from_user(system_utsname.nodename, buf->nodename, 65)
- || copy_from_user(system_utsname.release, buf->release, 65)
- || copy_from_user(system_utsname.version, buf->version, 65)
- || copy_from_user(system_utsname.machine, buf->machine, 65)) {
+ if (copy_from_user(utsname()->sysname, buf->sysname, 65)
+ || copy_from_user(utsname()->nodename, buf->nodename, 65)
+ || copy_from_user(utsname()->release, buf->release, 65)
+ || copy_from_user(utsname()->version, buf->version, 65)
+ || copy_from_user(utsname()->machine, buf->machine, 65)) {
return -EFAULT;
}
up_read(&uts_sem);
diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c
index 8134220ed60..7a941ecff3b 100644
--- a/arch/mips/sgi-ip22/ip22-reset.c
+++ b/arch/mips/sgi-ip22/ip22-reset.c
@@ -123,7 +123,8 @@ static inline void power_button(void)
if (machine_state & MACHINE_PANICED)
return;
- if ((machine_state & MACHINE_SHUTTING_DOWN) || kill_proc(1,SIGINT,1)) {
+ if ((machine_state & MACHINE_SHUTTING_DOWN) ||
+ kill_cad_pid(SIGINT, 1)) {
/* No init process or button pressed twice. */
sgi_machine_power_off();
}
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 79ddb460565..fd0932b2d52 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -120,7 +120,7 @@ static inline void ip32_power_button(void)
if (has_panicked)
return;
- if (shuting_down || kill_proc(1, SIGINT, 1)) {
+ if (shuting_down || kill_cad_pid(SIGINT, 1)) {
/* No init process or button pressed twice. */
ip32_machine_power_off();
}
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index cb69727027a..2e2dc4f2c85 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -266,16 +266,21 @@ static int hpux_uname(struct hpux_utsname *name)
down_read(&uts_sem);
- error = __copy_to_user(&name->sysname,&system_utsname.sysname,HPUX_UTSLEN-1);
- error |= __put_user(0,name->sysname+HPUX_UTSLEN-1);
- error |= __copy_to_user(&name->nodename,&system_utsname.nodename,HPUX_UTSLEN-1);
- error |= __put_user(0,name->nodename+HPUX_UTSLEN-1);
- error |= __copy_to_user(&name->release,&system_utsname.release,HPUX_UTSLEN-1);
- error |= __put_user(0,name->release+HPUX_UTSLEN-1);
- error |= __copy_to_user(&name->version,&system_utsname.version,HPUX_UTSLEN-1);
- error |= __put_user(0,name->version+HPUX_UTSLEN-1);
- error |= __copy_to_user(&name->machine,&system_utsname.machine,HPUX_UTSLEN-1);
- error |= __put_user(0,name->machine+HPUX_UTSLEN-1);
+ error = __copy_to_user(&name->sysname, &utsname()->sysname,
+ HPUX_UTSLEN - 1);
+ error |= __put_user(0, name->sysname + HPUX_UTSLEN - 1);
+ error |= __copy_to_user(&name->nodename, &utsname()->nodename,
+ HPUX_UTSLEN - 1);
+ error |= __put_user(0, name->nodename + HPUX_UTSLEN - 1);
+ error |= __copy_to_user(&name->release, &utsname()->release,
+ HPUX_UTSLEN - 1);
+ error |= __put_user(0, name->release + HPUX_UTSLEN - 1);
+ error |= __copy_to_user(&name->version, &utsname()->version,
+ HPUX_UTSLEN - 1);
+ error |= __put_user(0, name->version + HPUX_UTSLEN - 1);
+ error |= __copy_to_user(&name->machine, &utsname()->machine,
+ HPUX_UTSLEN - 1);
+ error |= __put_user(0, name->machine + HPUX_UTSLEN - 1);
up_read(&uts_sem);
@@ -373,8 +378,8 @@ int hpux_utssys(char *ubuf, int n, int type)
/* TODO: print a warning about using this? */
down_write(&uts_sem);
error = -EFAULT;
- if (!copy_from_user(system_utsname.sysname, ubuf, len)) {
- system_utsname.sysname[len] = 0;
+ if (!copy_from_user(utsname()->sysname, ubuf, len)) {
+ utsname()->sysname[len] = 0;
error = 0;
}
up_write(&uts_sem);
@@ -400,8 +405,8 @@ int hpux_utssys(char *ubuf, int n, int type)
/* TODO: print a warning about this? */
down_write(&uts_sem);
error = -EFAULT;
- if (!copy_from_user(system_utsname.release, ubuf, len)) {
- system_utsname.release[len] = 0;
+ if (!copy_from_user(utsname()->release, ubuf, len)) {
+ utsname()->release[len] = 0;
error = 0;
}
up_write(&uts_sem);
@@ -422,13 +427,13 @@ int hpux_getdomainname(char *name, int len)
down_read(&uts_sem);
- nlen = strlen(system_utsname.domainname) + 1;
+ nlen = strlen(utsname()->domainname) + 1;
if (nlen < len)
len = nlen;
if(len > __NEW_UTS_LEN)
goto done;
- if(copy_to_user(name, system_utsname.domainname, len))
+ if(copy_to_user(name, utsname()->domainname, len))
goto done;
err = 0;
done:
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 0b485ef4be8..2f9f9dfa66f 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -368,7 +368,14 @@ out:
return error;
}
-unsigned long
+extern int __execve(const char *filename, char *const argv[],
+ char *const envp[], struct task_struct *task);
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ return __execve(filename, argv, envp, current);
+}
+
+unsigned long
get_wchan(struct task_struct *p)
{
struct unwind_frame_info info;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a0dd1b0ee48..032e6ab5d3c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1069,7 +1069,7 @@ source "arch/powerpc/oprofile/Kconfig"
config KPROBES
bool "Kprobes (EXPERIMENTAL)"
- depends on PPC64 && EXPERIMENTAL && MODULES
+ depends on PPC64 && KALLSYMS && EXPERIMENTAL && MODULES
help
Kprobes allows you to trap at almost any kernel address and
execute a callback function. register_kprobe() establishes
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index cd65c367b8b..7b8d12b9026 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -259,14 +259,15 @@ void kretprobe_trampoline_holder(void)
*/
int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
- struct kretprobe_instance *ri = NULL;
- struct hlist_head *head;
- struct hlist_node *node, *tmp;
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *node, *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+ INIT_HLIST_HEAD(&empty_rp);
spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ head = kretprobe_inst_table_head(current);
/*
* It is possible to have multiple instances associated with a given
@@ -277,20 +278,20 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
* We can handle this because:
* - instances are always inserted at the head of the list
* - when multiple return probes are registered for the same
- * function, the first instance's ret_addr will point to the
+ * function, the first instance's ret_addr will point to the
* real return address, and all the rest will point to
* kretprobe_trampoline
*/
hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
- if (ri->task != current)
+ if (ri->task != current)
/* another task is sharing our hash bucket */
- continue;
+ continue;
if (ri->rp && ri->rp->handler)
ri->rp->handler(ri, regs);
orig_ret_address = (unsigned long)ri->ret_addr;
- recycle_rp_inst(ri);
+ recycle_rp_inst(ri, &empty_rp);
if (orig_ret_address != trampoline_address)
/*
@@ -308,12 +309,16 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
spin_unlock_irqrestore(&kretprobe_lock, flags);
preempt_enable_no_resched();
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we don't want the post_handler
- * to run (and have re-enabled preemption)
- */
- return 1;
+ hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
}
/*
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 58758d88336..88fd73fdf04 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -843,7 +843,7 @@ _GLOBAL(kernel_thread)
addi r1,r1,16
blr
-_GLOBAL(execve)
+_GLOBAL(kernel_execve)
li r0,__NR_execve
sc
bnslr
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index e3ed21cd3d9..9c54eccad99 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -556,7 +556,7 @@ _GLOBAL(giveup_altivec)
#endif /* CONFIG_ALTIVEC */
-_GLOBAL(execve)
+_GLOBAL(kernel_execve)
li r0,__NR_execve
sc
bnslr
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a127a1e3c09..7b2f6452ba7 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -424,7 +424,7 @@ void show_regs(struct pt_regs * regs)
printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
regs->nip, regs->link, regs->ctr);
printk("REGS: %p TRAP: %04lx %s (%s)\n",
- regs, regs->trap, print_tainted(), system_utsname.release);
+ regs, regs->trap, print_tainted(), init_utsname()->release);
printk("MSR: "REG" ", regs->msr);
printbits(regs->msr, msr_bits);
printk(" CR: %08lX XER: %08lX\n", regs->ccr, regs->xer);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 962ad5ebc76..cda2dbe70a7 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -414,7 +414,7 @@ void __init setup_system(void)
smp_release_cpus();
#endif
- printk("Starting Linux PPC64 %s\n", system_utsname.version);
+ printk("Starting Linux PPC64 %s\n", init_utsname()->version);
printk("-----------------------------------------------------\n");
printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size);
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 9b69d99a910..d358866b880 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -260,7 +260,7 @@ long ppc_newuname(struct new_utsname __user * name)
int err = 0;
down_read(&uts_sem);
- if (copy_to_user(name, &system_utsname, sizeof(*name)))
+ if (copy_to_user(name, utsname(), sizeof(*name)))
err = -EFAULT;
up_read(&uts_sem);
if (!err)
@@ -273,7 +273,7 @@ int sys_uname(struct old_utsname __user *name)
int err = 0;
down_read(&uts_sem);
- if (copy_to_user(name, &system_utsname, sizeof(*name)))
+ if (copy_to_user(name, utsname(), sizeof(*name)))
err = -EFAULT;
up_read(&uts_sem);
if (!err)
@@ -289,19 +289,19 @@ int sys_olduname(struct oldold_utsname __user *name)
return -EFAULT;
down_read(&uts_sem);
- error = __copy_to_user(&name->sysname, &system_utsname.sysname,
+ error = __copy_to_user(&name->sysname, &utsname()->sysname,
__OLD_UTS_LEN);
error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->nodename, &system_utsname.nodename,
+ error |= __copy_to_user(&name->nodename, &utsname()->nodename,
__OLD_UTS_LEN);
error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->release, &system_utsname.release,
+ error |= __copy_to_user(&name->release, &utsname()->release,
__OLD_UTS_LEN);
error |= __put_user(0, name->release + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->version, &system_utsname.version,
+ error |= __copy_to_user(&name->version, &utsname()->version,
__OLD_UTS_LEN);
error |= __put_user(0, name->version + __OLD_UTS_LEN);
- error |= __copy_to_user(&name->machine, &system_utsname.machine,
+ error |= __copy_to_user(&name->machine, &utsname()->machine,
__OLD_UTS_LEN);
error |= override_machine(name->machine);
up_read(&uts_sem);
diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c
index 1a2c2a50f92..1983b640bac 100644
--- a/arch/powerpc/platforms/iseries/mf.c
+++ b/arch/powerpc/platforms/iseries/mf.c
@@ -357,7 +357,7 @@ static int dma_and_signal_ce_msg(char *ce_msg,
*/
static int shutdown(void)
{
- int rc = kill_proc(1, SIGINT, 1);
+ int rc = kill_cad_pid(SIGINT, 1);
if (rc) {
printk(KERN_ALERT "mf.c: SIGINT to init failed (%d), "
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 8ed36214045..98189d8efac 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -342,7 +342,7 @@ static int __init pSeries_init_panel(void)
{
/* Manually leave the kernel version on the panel. */
ppc_md.progress("Linux ppc64\n", 0);
- ppc_md.progress(system_utsname.release, 0);
+ ppc_md.progress(init_utsname()->version, 0);
return 0;
}
diff --git a/arch/ppc/4xx_io/serial_sicc.c b/arch/ppc/4xx_io/serial_sicc.c
index b81a367dc27..87fe9a89dba 100644
--- a/arch/ppc/4xx_io/serial_sicc.c
+++ b/arch/ppc/4xx_io/serial_sicc.c
@@ -1720,7 +1720,7 @@ static int siccuart_open(struct tty_struct *tty, struct file *filp)
return 0;
}
-static struct tty_operations sicc_ops = {
+static const struct tty_operations sicc_ops = {
.open = siccuart_open,
.close = siccuart_close,
.write = siccuart_write,
diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S
index 50b4bbd0680..5f6684012de 100644
--- a/arch/ppc/kernel/misc.S
+++ b/arch/ppc/kernel/misc.S
@@ -942,20 +942,16 @@ _GLOBAL(kernel_thread)
addi r1,r1,16
blr
+_GLOBAL(kernel_execve)
+ li r0,__NR_execve
+ sc
+ bnslr
+ neg r3,r3
+ blr
+
/*
* This routine is just here to keep GCC happy - sigh...
*/
_GLOBAL(__main)
blr
-#define SYSCALL(name) \
-_GLOBAL(name) \
- li r0,__NR_##name; \
- sc; \
- bnslr; \
- lis r4,errno@ha; \
- stw r3,errno@l(r4); \
- li r3,-1; \
- blr
-
-SYSCALL(execve)
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index ca28fb0b379..4d9ff5ce4cb 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -369,11 +369,12 @@ void __kprobes kretprobe_trampoline_holder(void)
int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kretprobe_instance *ri = NULL;
- struct hlist_head *head;
+ struct hlist_head *head, empty_rp;
struct hlist_node *node, *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+ INIT_HLIST_HEAD(&empty_rp);
spin_lock_irqsave(&kretprobe_lock, flags);
head = kretprobe_inst_table_head(current);
@@ -399,7 +400,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
ri->rp->handler(ri, regs);
orig_ret_address = (unsigned long)ri->ret_addr;
- recycle_rp_inst(ri);
+ recycle_rp_inst(ri, &empty_rp);
if (orig_ret_address != trampoline_address) {
/*
@@ -417,6 +418,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
spin_unlock_irqrestore(&kretprobe_lock, flags);
preempt_enable_no_resched();
+ hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
/*
* By returning a non-zero value, we are telling
* kprobe_handler() that we don't want the post_handler
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index e351780bb66..584ed95f338 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -27,6 +27,7 @@
#include <linux/file.h>
#include <linux/utsname.h>
#include <linux/personality.h>
+#include <linux/unistd.h>
#include <asm/uaccess.h>
#include <asm/ipc.h>
@@ -266,3 +267,22 @@ s390_fadvise64_64(struct fadvise64_64_args __user *args)
return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
}
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ register const char *__arg1 asm("2") = filename;
+ register char *const*__arg2 asm("3") = argv;
+ register char *const*__arg3 asm("4") = envp;
+ register long __svcres asm("2");
+ asm volatile(
+ "svc %b1"
+ : "=d" (__svcres)
+ : "i" (__NR_execve),
+ "0" (__arg1),
+ "d" (__arg2),
+ "d" (__arg3) : "memory");
+ return __svcres;
+}
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 5f587332234..77491cf9b25 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -459,7 +459,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "machine\t\t: %s\n", get_system_type());
seq_printf(m, "processor\t: %d\n", cpu);
- seq_printf(m, "cpu family\t: %s\n", system_utsname.machine);
+ seq_printf(m, "cpu family\t: %s\n", init_utsname()->machine);
seq_printf(m, "cpu type\t: %s\n", get_cpu_subtype());
show_cpuflags(m);
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 6c0fb7c4af1..dbebaddcfe3 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -42,6 +42,7 @@ cpumask_t cpu_possible_map;
EXPORT_SYMBOL(cpu_possible_map);
cpumask_t cpu_online_map;
+EXPORT_SYMBOL(cpu_online_map);
static atomic_t cpus_booted = ATOMIC_INIT(0);
/* These are defined by the board-specific code. */
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index b68ff705f06..8fde95001c3 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -25,6 +25,7 @@
#include <asm/cacheflush.h>
#include <asm/uaccess.h>
#include <asm/ipc.h>
+#include <asm/unistd.h>
/*
* sys_pipe() is the normal C calling standard for creating
@@ -281,7 +282,7 @@ asmlinkage int sys_uname(struct old_utsname * name)
if (!name)
return -EFAULT;
down_read(&uts_sem);
- err=copy_to_user(name, &system_utsname, sizeof (*name));
+ err = copy_to_user(name, utsname(), sizeof (*name));
up_read(&uts_sem);
return err?-EFAULT:0;
}
@@ -309,3 +310,19 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1,
(u64)len0 << 32 | len1, advice);
#endif
}
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ register long __sc0 __asm__ ("r3") = __NR_execve;
+ register long __sc4 __asm__ ("r4") = (long) filename;
+ register long __sc5 __asm__ ("r5") = (long) argv;
+ register long __sc6 __asm__ ("r6") = (long) envp;
+ __asm__ __volatile__ ("trapa #0x13" : "=z" (__sc0)
+ : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6)
+ : "memory");
+ return __sc0;
+}
diff --git a/arch/sh64/kernel/process.c b/arch/sh64/kernel/process.c
index db475b7833f..525d0ec19b7 100644
--- a/arch/sh64/kernel/process.c
+++ b/arch/sh64/kernel/process.c
@@ -20,261 +20,16 @@
/*
* This file handles the architecture-dependent parts of process handling..
*/
-
-/* Temporary flags/tests. All to be removed/undefined. BEGIN */
-#define IDLE_TRACE
-#define VM_SHOW_TABLES
-#define VM_TEST_FAULT
-#define VM_TEST_RTLBMISS
-#define VM_TEST_WTLBMISS
-
-#undef VM_SHOW_TABLES
-#undef IDLE_TRACE
-/* Temporary flags/tests. All to be removed/undefined. END */
-
-#define __KERNEL_SYSCALLS__
-#include <stdarg.h>
-
-#include <linux/kernel.h>
-#include <linux/rwsem.h>
#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
#include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/user.h>
-#include <linux/a.out.h>
-#include <linux/interrupt.h>
-#include <linux/unistd.h>
-#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/init.h>
-
+#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/processor.h> /* includes also <asm/registers.h> */
-#include <asm/mmu_context.h>
-#include <asm/elf.h>
-#include <asm/page.h>
-
-#include <linux/irq.h>
struct task_struct *last_task_used_math = NULL;
-#ifdef IDLE_TRACE
-#ifdef VM_SHOW_TABLES
-/* For testing */
-static void print_PTE(long base)
-{
- int i, skip=0;
- long long x, y, *p = (long long *) base;
-
- for (i=0; i< 512; i++, p++){
- if (*p == 0) {
- if (!skip) {
- skip++;
- printk("(0s) ");
- }
- } else {
- skip=0;
- x = (*p) >> 32;
- y = (*p) & 0xffffffff;
- printk("%08Lx%08Lx ", x, y);
- if (!((i+1)&0x3)) printk("\n");
- }
- }
-}
-
-/* For testing */
-static void print_DIR(long base)
-{
- int i, skip=0;
- long *p = (long *) base;
-
- for (i=0; i< 512; i++, p++){
- if (*p == 0) {
- if (!skip) {
- skip++;
- printk("(0s) ");
- }
- } else {
- skip=0;
- printk("%08lx ", *p);
- if (!((i+1)&0x7)) printk("\n");
- }
- }
-}
-
-/* For testing */
-static void print_vmalloc_first_tables(void)
-{
-
-#define PRESENT 0x800 /* Bit 11 */
-
- /*
- * Do it really dirty by looking at raw addresses,
- * raw offsets, no types. If we used pgtable/pgalloc
- * macros/definitions we could hide potential bugs.
- *
- * Note that pointers are 32-bit for CDC.
- */
- long pgdt, pmdt, ptet;
-
- pgdt = (long) &swapper_pg_dir;
- printk("-->PGD (0x%08lx):\n", pgdt);
- print_DIR(pgdt);
- printk("\n");
-
- /* VMALLOC pool is mapped at 0xc0000000, second (pointer) entry in PGD */
- pgdt += 4;
- pmdt = (long) (* (long *) pgdt);
- if (!(pmdt & PRESENT)) {
- printk("No PMD\n");
- return;
- } else pmdt &= 0xfffff000;
-
- printk("-->PMD (0x%08lx):\n", pmdt);
- print_DIR(pmdt);
- printk("\n");
-
- /* Get the pmdt displacement for 0xc0000000 */
- pmdt += 2048;
-
- /* just look at first two address ranges ... */
- /* ... 0xc0000000 ... */
- ptet = (long) (* (long *) pmdt);
- if (!(ptet & PRESENT)) {
- printk("No PTE0\n");
- return;
- } else ptet &= 0xfffff000;
-
- printk("-->PTE0 (0x%08lx):\n", ptet);
- print_PTE(ptet);
- printk("\n");
-
- /* ... 0xc0001000 ... */
- ptet += 4;
- if (!(ptet & PRESENT)) {
- printk("No PTE1\n");
- return;
- } else ptet &= 0xfffff000;
- printk("-->PTE1 (0x%08lx):\n", ptet);
- print_PTE(ptet);
- printk("\n");
-}
-#else
-#define print_vmalloc_first_tables()
-#endif /* VM_SHOW_TABLES */
-
-static void test_VM(void)
-{
- void *a, *b, *c;
-
-#ifdef VM_SHOW_TABLES
- printk("Initial PGD/PMD/PTE\n");
-#endif
- print_vmalloc_first_tables();
-
- printk("Allocating 2 bytes\n");
- a = vmalloc(2);
- print_vmalloc_first_tables();
-
- printk("Allocating 4100 bytes\n");
- b = vmalloc(4100);
- print_vmalloc_first_tables();
-
- printk("Allocating 20234 bytes\n");
- c = vmalloc(20234);
- print_vmalloc_first_tables();
-
-#ifdef VM_TEST_FAULT
- /* Here you may want to fault ! */
-
-#ifdef VM_TEST_RTLBMISS
- printk("Ready to fault upon read.\n");
- if (* (char *) a) {
- printk("RTLBMISSed on area a !\n");
- }
- printk("RTLBMISSed on area a !\n");
-#endif
-
-#ifdef VM_TEST_WTLBMISS
- printk("Ready to fault upon write.\n");
- *((char *) b) = 'L';
- printk("WTLBMISSed on area b !\n");
-#endif
-
-#endif /* VM_TEST_FAULT */
-
- printk("Deallocating the 4100 byte chunk\n");
- vfree(b);
- print_vmalloc_first_tables();
-
- printk("Deallocating the 2 byte chunk\n");
- vfree(a);
- print_vmalloc_first_tables();
-
- printk("Deallocating the last chunk\n");
- vfree(c);
- print_vmalloc_first_tables();
-}
-
-extern unsigned long volatile jiffies;
-int once = 0;
-unsigned long old_jiffies;
-int pid = -1, pgid = -1;
-
-void idle_trace(void)
-{
-
- _syscall0(int, getpid)
- _syscall1(int, getpgid, int, pid)
-
- if (!once) {
- /* VM allocation/deallocation simple test */
- test_VM();
- pid = getpid();
-
- printk("Got all through to Idle !!\n");
- printk("I'm now going to loop forever ...\n");
- printk("Any ! below is a timer tick.\n");
- printk("Any . below is a getpgid system call from pid = %d.\n", pid);
-
-
- old_jiffies = jiffies;
- once++;
- }
-
- if (old_jiffies != jiffies) {
- old_jiffies = jiffies - old_jiffies;
- switch (old_jiffies) {
- case 1:
- printk("!");
- break;
- case 2:
- printk("!!");
- break;
- case 3:
- printk("!!!");
- break;
- case 4:
- printk("!!!!");
- break;
- default:
- printk("(%d!)", (int) old_jiffies);
- }
- old_jiffies = jiffies;
- }
- pgid = getpgid(pid);
- printk(".");
-}
-#else
-#define idle_trace() do { } while (0)
-#endif /* IDLE_TRACE */
-
static int hlt_counter = 1;
#define HARD_IDLE_TIMEOUT (HZ / 3)
@@ -323,7 +78,6 @@ void cpu_idle(void)
local_irq_disable();
while (!need_resched()) {
local_irq_enable();
- idle_trace();
hlt();
local_irq_disable();
}
@@ -622,6 +376,10 @@ void free_task_struct(struct task_struct *p)
/*
* Create a kernel thread
*/
+ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
+{
+ do_exit(fn(arg));
+}
/*
* This is the mechanism for creating a new kernel thread.
@@ -633,19 +391,17 @@ void free_task_struct(struct task_struct *p)
*/
int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
{
- /* A bit less processor dependent than older sh ... */
- unsigned int reply;
+ struct pt_regs regs;
-static __inline__ _syscall2(int,clone,unsigned long,flags,unsigned long,newsp)
-static __inline__ _syscall1(int,exit,int,ret)
+ memset(&regs, 0, sizeof(regs));
+ regs.regs[2] = (unsigned long)arg;
+ regs.regs[3] = (unsigned long)fn;
- reply = clone(flags | CLONE_VM, 0);
- if (!reply) {
- /* Child */
- reply = exit(fn(arg));
- }
+ regs.pc = (unsigned long)kernel_thread_helper;
+ regs.sr = (1 << 30);
- return reply;
+ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
+ &regs, 0, NULL, NULL);
}
/*
diff --git a/arch/sh64/kernel/sys_sh64.c b/arch/sh64/kernel/sys_sh64.c
index 58ff7d522d8..ad0fa4e003e 100644
--- a/arch/sh64/kernel/sys_sh64.c
+++ b/arch/sh64/kernel/sys_sh64.c
@@ -32,6 +32,7 @@
#include <asm/uaccess.h>
#include <asm/ipc.h>
#include <asm/ptrace.h>
+#include <asm/unistd.h>
#define REG_3 3
@@ -279,7 +280,25 @@ asmlinkage int sys_uname(struct old_utsname * name)
if (!name)
return -EFAULT;
down_read(&uts_sem);
- err=copy_to_user(name, &system_utsname, sizeof (*name));
+ err = copy_to_user(name, utsname(), sizeof (*name));
up_read(&uts_sem);
return err?-EFAULT:0;
}
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ register unsigned long __sc0 __asm__ ("r9") = ((0x13 << 16) | __NR_execve);
+ register unsigned long __sc2 __asm__ ("r2") = (unsigned long) filename;
+ register unsigned long __sc3 __asm__ ("r3") = (unsigned long) argv;
+ register unsigned long __sc4 __asm__ ("r4") = (unsigned long) envp;
+ __asm__ __volatile__ ("trapa %1 !\t\t\t execve(%2,%3,%4)"
+ : "=r" (__sc0)
+ : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4) );
+ __asm__ __volatile__ ("!dummy %0 %1 %2 %3"
+ : : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4) : "memory");
+ return __sc0;
+}
diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c
index 896863fb208..a954a0c0000 100644
--- a/arch/sparc/kernel/sys_sparc.c
+++ b/arch/sparc/kernel/sys_sparc.c
@@ -24,6 +24,7 @@
#include <asm/uaccess.h>
#include <asm/ipc.h>
+#include <asm/unistd.h>
/* #define DEBUG_UNIMP_SYSCALL */
@@ -475,16 +476,38 @@ asmlinkage int sys_getdomainname(char __user *name, int len)
down_read(&uts_sem);
- nlen = strlen(system_utsname.domainname) + 1;
+ nlen = strlen(utsname()->domainname) + 1;
err = -EINVAL;
if (nlen > len)
goto out;
err = -EFAULT;
- if (!copy_to_user(name, system_utsname.domainname, nlen))
+ if (!copy_to_user(name, utsname()->domainname, nlen))
err = 0;
out:
up_read(&uts_sem);
return err;
}
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ long __res;
+ register long __g1 __asm__ ("g1") = __NR_execve;
+ register long __o0 __asm__ ("o0") = (long)(filename);
+ register long __o1 __asm__ ("o1") = (long)(argv);
+ register long __o2 __asm__ ("o2") = (long)(envp);
+ asm volatile ("t 0x10\n\t"
+ "bcc 1f\n\t"
+ "mov %%o0, %0\n\t"
+ "sub %%g0, %%o0, %0\n\t"
+ "1:\n\t"
+ : "=r" (__res), "=&r" (__o0)
+ : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1)
+ : "cc");
+ return __res;
+}
diff --git a/arch/sparc/kernel/sys_sunos.c b/arch/sparc/kernel/sys_sunos.c
index aa0fb2efb61..9d2cd97d1c3 100644
--- a/arch/sparc/kernel/sys_sunos.c
+++ b/arch/sparc/kernel/sys_sunos.c
@@ -483,13 +483,18 @@ asmlinkage int sunos_uname(struct sunos_utsname __user *name)
{
int ret;
down_read(&uts_sem);
- ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0], sizeof(name->sname) - 1);
+ ret = copy_to_user(&name->sname[0], &utsname()->sysname[0],
+ sizeof(name->sname) - 1);
if (!ret) {
- ret |= __copy_to_user(&name->nname[0], &system_utsname.nodename[0], sizeof(name->nname) - 1);
+ ret |= __copy_to_user(&name->nname[0], &utsname()->nodename[0],
+ sizeof(name->nname) - 1);
ret |= __put_user('\0', &name->nname[8]);
- ret |= __copy_to_user(&name->rel[0], &system_utsname.release[0], sizeof(name->rel) - 1);
- ret |= __copy_to_user(&name->ver[0], &system_utsname.version[0], sizeof(name->ver) - 1);
- ret |= __copy_to_user(&name->mach[0], &system_utsname.machine[0], sizeof(name->mach) - 1);
+ ret |= __copy_to_user(&name->rel[0], &utsname()->release[0],
+ sizeof(name->rel) - 1);
+ ret |= __copy_to_user(&name->ver[0], &utsname()->version[0],
+ sizeof(name->ver) - 1);
+ ret |= __copy_to_user(&name->mach[0], &utsname()->machine[0],
+ sizeof(name->mach) - 1);
}
up_read(&uts_sem);
return ret ? -EFAULT : 0;
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 8d8ca716f7a..b627f8dbcaa 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -420,7 +420,7 @@ source "arch/sparc64/oprofile/Kconfig"
config KPROBES
bool "Kprobes (EXPERIMENTAL)"
- depends on EXPERIMENTAL && MODULES
+ depends on KALLSYMS && EXPERIMENTAL && MODULES
help
Kprobes allows you to trap at almost any kernel address and
execute a callback function. register_kprobe() establishes
diff --git a/arch/sparc64/kernel/power.c b/arch/sparc64/kernel/power.c
index e55466c77b6..0b9c70627ce 100644
--- a/arch/sparc64/kernel/power.c
+++ b/arch/sparc64/kernel/power.c
@@ -4,8 +4,6 @@
* Copyright (C) 1999 David S. Miller (davem@redhat.com)
*/
-#define __KERNEL_SYSCALLS__
-
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -14,6 +12,7 @@
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/pm.h>
+#include <linux/syscalls.h>
#include <asm/system.h>
#include <asm/auxio.h>
@@ -98,7 +97,7 @@ again:
/* Ok, down we go... */
button_pressed = 0;
- if (execve("/sbin/shutdown", argv, envp) < 0) {
+ if (kernel_execve("/sbin/shutdown", argv, envp) < 0) {
printk("powerd: shutdown execution failed\n");
add_wait_queue(&powerd_wait, &wait);
goto again;
diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
index c608c947e6c..a53d4abb4b4 100644
--- a/arch/sparc64/kernel/sys_sparc.c
+++ b/arch/sparc64/kernel/sys_sparc.c
@@ -31,6 +31,7 @@
#include <asm/utrap.h>
#include <asm/perfctr.h>
#include <asm/a.out.h>
+#include <asm/unistd.h>
/* #define DEBUG_UNIMP_SYSCALL */
@@ -712,13 +713,13 @@ asmlinkage long sys_getdomainname(char __user *name, int len)
down_read(&uts_sem);
- nlen = strlen(system_utsname.domainname) + 1;
+ nlen = strlen(utsname()->domainname) + 1;
err = -EINVAL;
if (nlen > len)
goto out;
err = -EFAULT;
- if (!copy_to_user(name, system_utsname.domainname, nlen))
+ if (!copy_to_user(name, utsname()->domainname, nlen))
err = 0;
out:
@@ -963,3 +964,23 @@ asmlinkage long sys_perfctr(int opcode, unsigned long arg0, unsigned long arg1,
};
return err;
}
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ long __res;
+ register long __g1 __asm__ ("g1") = __NR_execve;
+ register long __o0 __asm__ ("o0") = (long)(filename);
+ register long __o1 __asm__ ("o1") = (long)(argv);
+ register long __o2 __asm__ ("o2") = (long)(envp);
+ asm volatile ("t 0x6d\n\t"
+ "sub %%g0, %%o0, %0\n\t"
+ "movcc %%xcc, %%o0, %0\n\t"
+ : "=r" (__res), "=&r" (__o0)
+ : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1)
+ : "cc");
+ return __res;
+}
diff --git a/arch/sparc64/kernel/sys_sunos32.c b/arch/sparc64/kernel/sys_sunos32.c
index 87ebdf858a3..953296b73f3 100644
--- a/arch/sparc64/kernel/sys_sunos32.c
+++ b/arch/sparc64/kernel/sys_sunos32.c
@@ -439,16 +439,16 @@ asmlinkage int sunos_uname(struct sunos_utsname __user *name)
int ret;
down_read(&uts_sem);
- ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0],
+ ret = copy_to_user(&name->sname[0], &utsname()->sysname[0],
sizeof(name->sname) - 1);
- ret |= copy_to_user(&name->nname[0], &system_utsname.nodename[0],
+ ret |= copy_to_user(&name->nname[0], &utsname()->nodename[0],
sizeof(name->nname) - 1);
ret |= put_user('\0', &name->nname[8]);
- ret |= copy_to_user(&name->rel[0], &system_utsname.release[0],
+ ret |= copy_to_user(&name->rel[0], &utsname()->release[0],
sizeof(name->rel) - 1);
- ret |= copy_to_user(&name->ver[0], &system_utsname.version[0],
+ ret |= copy_to_user(&name->ver[0], &utsname()->version[0],
sizeof(name->ver) - 1);
- ret |= copy_to_user(&name->mach[0], &system_utsname.machine[0],
+ ret |= copy_to_user(&name->mach[0], &utsname()->machine[0],
sizeof(name->mach) - 1);
up_read(&uts_sem);
return (ret ? -EFAULT : 0);
diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c
index 9c581328e76..9ed997982f8 100644
--- a/arch/sparc64/solaris/misc.c
+++ b/arch/sparc64/solaris/misc.c
@@ -249,7 +249,7 @@ asmlinkage int solaris_utssys(u32 buf, u32 flags, int which, u32 buf2)
/* Let's cheat */
err = set_utsfield(v->sysname, "SunOS", 1, 0);
down_read(&uts_sem);
- err |= set_utsfield(v->nodename, system_utsname.nodename,
+ err |= set_utsfield(v->nodename, utsname()->nodename,
1, 1);
up_read(&uts_sem);
err |= set_utsfield(v->release, "2.6", 0, 0);
@@ -273,7 +273,7 @@ asmlinkage int solaris_utsname(u32 buf)
/* Why should we not lie a bit? */
down_read(&uts_sem);
err = set_utsfield(v->sysname, "SunOS", 0, 0);
- err |= set_utsfield(v->nodename, system_utsname.nodename, 1, 1);
+ err |= set_utsfield(v->nodename, utsname()->nodename, 1, 1);
err |= set_utsfield(v->release, "5.6", 0, 0);
err |= set_utsfield(v->version, "Generic", 0, 0);
err |= set_utsfield(v->machine, machine(), 0, 0);
@@ -305,7 +305,7 @@ asmlinkage int solaris_sysinfo(int cmd, u32 buf, s32 count)
case SI_HOSTNAME:
r = buffer + 256;
down_read(&uts_sem);
- for (p = system_utsname.nodename, q = buffer;
+ for (p = utsname()->nodename, q = buffer;
q < r && *p && *p != '.'; *q++ = *p++);
up_read(&uts_sem);
*q = 0;
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 563ce7690a1..24747a41378 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -642,9 +642,9 @@ int line_remove(struct line *lines, unsigned int num, int n)
}
struct tty_driver *line_register_devfs(struct lines *set,
- struct line_driver *line_driver,
- struct tty_operations *ops, struct line *lines,
- int nlines)
+ struct line_driver *line_driver,
+ const struct tty_operations *ops,
+ struct line *lines, int nlines)
{
int i;
struct tty_driver *driver = alloc_tty_driver(nlines);
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 773a134e7fd..a67dcbd78de 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -106,9 +106,9 @@ void mconsole_version(struct mc_request *req)
{
char version[256];
- sprintf(version, "%s %s %s %s %s", system_utsname.sysname,
- system_utsname.nodename, system_utsname.release,
- system_utsname.version, system_utsname.machine);
+ sprintf(version, "%s %s %s %s %s", utsname()->sysname,
+ utsname()->nodename, utsname()->release,
+ utsname()->version, utsname()->machine);
mconsole_reply(req, version, 0, 0);
}
diff --git a/arch/um/include/line.h b/arch/um/include/line.h
index 642c9a0320f..7be24811bb3 100644
--- a/arch/um/include/line.h
+++ b/arch/um/include/line.h
@@ -91,10 +91,9 @@ extern int line_setup_irq(int fd, int input, int output, struct line *line,
void *data);
extern void line_close_chan(struct line *line);
extern struct tty_driver * line_register_devfs(struct lines *set,
- struct line_driver *line_driver,
- struct tty_operations *driver,
- struct line *lines,
- int nlines);
+ struct line_driver *line_driver,
+ const struct tty_operations *driver,
+ struct line *lines, int nlines);
extern void lines_init(struct line *lines, int nlines, struct chan_opts *opts);
extern void close_lines(struct line *lines, int nlines);
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index 48cf88dd02d..f5ed8624648 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -110,7 +110,7 @@ long sys_uname(struct old_utsname __user * name)
if (!name)
return -EFAULT;
down_read(&uts_sem);
- err = copy_to_user(name, &system_utsname, sizeof (*name));
+ err = copy_to_user(name, utsname(), sizeof (*name));
up_read(&uts_sem);
return err?-EFAULT:0;
}
@@ -126,21 +126,21 @@ long sys_olduname(struct oldold_utsname __user * name)
down_read(&uts_sem);
- error = __copy_to_user(&name->sysname,&system_utsname.sysname,
+ error = __copy_to_user(&name->sysname, &utsname()->sysname,
__OLD_UTS_LEN);
- error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->nodename,&system_utsname.nodename,
+ error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
+ error |= __copy_to_user(&name->nodename, &utsname()->nodename,
__OLD_UTS_LEN);
- error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->release,&system_utsname.release,
+ error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
+ error |= __copy_to_user(&name->release, &utsname()->release,
__OLD_UTS_LEN);
- error |= __put_user(0,name->release+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->version,&system_utsname.version,
+ error |= __put_user(0, name->release + __OLD_UTS_LEN);
+ error |= __copy_to_user(&name->version, &utsname()->version,
__OLD_UTS_LEN);
- error |= __put_user(0,name->version+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->machine,&system_utsname.machine,
+ error |= __put_user(0, name->version + __OLD_UTS_LEN);
+ error |= __copy_to_user(&name->machine, &utsname()->machine,
__OLD_UTS_LEN);
- error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+ error |= __put_user(0, name->machine + __OLD_UTS_LEN);
up_read(&uts_sem);
@@ -164,3 +164,16 @@ int next_syscall_index(int limit)
spin_unlock(&syscall_lock);
return(ret);
}
+
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ mm_segment_t fs;
+ int ret;
+
+ fs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = um_execve(filename, argv, envp);
+ set_fs(fs);
+
+ return ret;
+}
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 55005710dcb..97d88e7902f 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -167,7 +167,7 @@ static char *usage_string =
static int __init uml_version_setup(char *line, int *add)
{
- printf("%s\n", system_utsname.release);
+ printf("%s\n", init_utsname()->release);
exit(0);
return 0;
@@ -278,7 +278,7 @@ static int __init Usage(char *line, int *add)
{
const char **p;
- printf(usage_string, system_utsname.release);
+ printf(usage_string, init_utsname()->release);
p = &__uml_help_start;
while (p < &__uml_help_end) {
printf("%s", *p);
@@ -403,7 +403,7 @@ int linux_main(int argc, char **argv)
/* Reserve up to 4M after the current brk */
uml_reserved = ROUND_4M(brk_start) + (1 << 22);
- setup_machinename(system_utsname.machine);
+ setup_machinename(init_utsname()->machine);
#ifdef CONFIG_CMDLINE_ON_HOST
argv1_begin = argv[1];
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index ff203625a4b..51f0893640a 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -11,6 +11,7 @@
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/mman.h>
+#include <sys/syscall.h>
#include "ptrace_user.h"
#include "os.h"
#include "user.h"
@@ -140,11 +141,9 @@ void os_usr1_process(int pid)
* syscalls, and also breaks with clone(), which does not unshare the TLS.
*/
-inline _syscall0(pid_t, getpid)
-
int os_getpid(void)
{
- return(getpid());
+ return(syscall(__NR_getpid));
}
int os_getpgrp(void)
diff --git a/arch/um/os-Linux/sys-i386/tls.c b/arch/um/os-Linux/sys-i386/tls.c
index 120abbe4e3c..6e945ab4584 100644
--- a/arch/um/os-Linux/sys-i386/tls.c
+++ b/arch/um/os-Linux/sys-i386/tls.c
@@ -1,10 +1,9 @@
#include <errno.h>
#include <linux/unistd.h>
+#include <sys/syscall.h>
#include "sysdep/tls.h"
#include "user_util.h"
-static _syscall1(int, get_thread_area, user_desc_t *, u_info);
-
/* Checks whether host supports TLS, and sets *tls_min according to the value
* valid on the host.
* i386 host have it == 6; x86_64 host have it == 12, for i386 emulation. */
@@ -17,7 +16,7 @@ void check_host_supports_tls(int *supports_tls, int *tls_min) {
user_desc_t info;
info.entry_number = val[i];
- if (get_thread_area(&info) == 0) {
+ if (syscall(__NR_get_thread_area, &info) == 0) {
*tls_min = val[i];
*supports_tls = 1;
return;
diff --git a/arch/um/os-Linux/tls.c b/arch/um/os-Linux/tls.c
index 9cb09a45546..a2de2580b8a 100644
--- a/arch/um/os-Linux/tls.c
+++ b/arch/um/os-Linux/tls.c
@@ -1,5 +1,6 @@
#include <errno.h>
#include <sys/ptrace.h>
+#include <sys/syscall.h>
#include <asm/ldt.h>
#include "sysdep/tls.h"
#include "uml-config.h"
@@ -48,14 +49,11 @@ int os_get_thread_area(user_desc_t *info, int pid)
#ifdef UML_CONFIG_MODE_TT
#include "linux/unistd.h"
-static _syscall1(int, get_thread_area, user_desc_t *, u_info);
-static _syscall1(int, set_thread_area, user_desc_t *, u_info);
-
int do_set_thread_area_tt(user_desc_t *info)
{
int ret;
- ret = set_thread_area(info);
+ ret = syscall(__NR_set_thread_area,info);
if (ret < 0) {
ret = -errno;
}
@@ -66,7 +64,7 @@ int do_get_thread_area_tt(user_desc_t *info)
{
int ret;
- ret = get_thread_area(info);
+ ret = syscall(__NR_get_thread_area,info);
if (ret < 0) {
ret = -errno;
}
diff --git a/arch/um/sys-i386/unmap.c b/arch/um/sys-i386/unmap.c
index 1b0ad0e4adc..8e55cd5d3d0 100644
--- a/arch/um/sys-i386/unmap.c
+++ b/arch/um/sys-i386/unmap.c
@@ -5,20 +5,17 @@
#include <linux/mman.h>
#include <asm/unistd.h>
+#include <sys/syscall.h>
-static int errno;
-
-static inline _syscall2(int,munmap,void *,start,size_t,len)
-static inline _syscall6(void *,mmap2,void *,addr,size_t,len,int,prot,int,flags,int,fd,off_t,offset)
int switcheroo(int fd, int prot, void *from, void *to, int size)
{
- if(munmap(to, size) < 0){
+ if (syscall(__NR_munmap, to, size) < 0){
return(-1);
}
- if(mmap2(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1 ){
+ if (syscall(__NR_mmap2, to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1 ){
return(-1);
}
- if(munmap(from, size) < 0){
+ if (syscall(__NR_munmap, from, size) < 0){
return(-1);
}
return(0);
diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/um/sys-x86_64/syscalls.c
index 6fce9f45dfd..73ce4463f70 100644
--- a/arch/um/sys-x86_64/syscalls.c
+++ b/arch/um/sys-x86_64/syscalls.c
@@ -21,7 +21,7 @@ asmlinkage long sys_uname64(struct new_utsname __user * name)
{
int err;
down_read(&uts_sem);
- err = copy_to_user(name, &system_utsname, sizeof (*name));
+ err = copy_to_user(name, utsname(), sizeof (*name));
up_read(&uts_sem);
if (personality(current->personality) == PER_LINUX32)
err |= copy_to_user(&name->machine, "i686", 5);
diff --git a/arch/um/sys-x86_64/sysrq.c b/arch/um/sys-x86_64/sysrq.c
index d0a25af19a5..ce3e07fcf28 100644
--- a/arch/um/sys-x86_64/sysrq.c
+++ b/arch/um/sys-x86_64/sysrq.c
@@ -16,7 +16,7 @@ void __show_regs(struct pt_regs * regs)
printk("\n");
print_modules();
printk("Pid: %d, comm: %.20s %s %s\n",
- current->pid, current->comm, print_tainted(), system_utsname.release);
+ current->pid, current->comm, print_tainted(), init_utsname()->release);
printk("RIP: %04lx:[<%016lx>] ", PT_REGS_CS(regs) & 0xffff,
PT_REGS_RIP(regs));
printk("\nRSP: %016lx EFLAGS: %08lx\n", PT_REGS_RSP(regs),
diff --git a/arch/um/sys-x86_64/unmap.c b/arch/um/sys-x86_64/unmap.c
index f4a4bffd8a1..57c9286a701 100644
--- a/arch/um/sys-x86_64/unmap.c
+++ b/arch/um/sys-x86_64/unmap.c
@@ -5,20 +5,17 @@
#include <linux/mman.h>
#include <asm/unistd.h>
+#include <sys/syscall.h>
-static int errno;
-
-static inline _syscall2(int,munmap,void *,start,size_t,len)
-static inline _syscall6(void *,mmap,void *,addr,size_t,len,int,prot,int,flags,int,fd,off_t,offset)
int switcheroo(int fd, int prot, void *from, void *to, int size)
{
- if(munmap(to, size) < 0){
+ if (syscall(__NR_munmap, to, size) < 0){
return(-1);
}
- if(mmap(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1){
+ if (syscall(__NR_mmap, to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1){
return(-1);
}
- if(munmap(from, size) < 0){
+ if (syscall(__NR_munmap, from, size) < 0){
return(-1);
}
return(0);
diff --git a/arch/v850/kernel/memcons.c b/arch/v850/kernel/memcons.c
index 815f8a43926..92f514fdcc7 100644
--- a/arch/v850/kernel/memcons.c
+++ b/arch/v850/kernel/memcons.c
@@ -104,7 +104,7 @@ int memcons_tty_chars_in_buffer (struct tty_struct *tty)
return 0;
}
-static struct tty_operations ops = {
+static const struct tty_operations ops = {
.open = memcons_tty_open,
.write = memcons_tty_write,
.write_room = memcons_tty_write_room,
diff --git a/arch/v850/kernel/simcons.c b/arch/v850/kernel/simcons.c
index 3975aa02cef..9973596ae30 100644
--- a/arch/v850/kernel/simcons.c
+++ b/arch/v850/kernel/simcons.c
@@ -77,7 +77,7 @@ int simcons_tty_chars_in_buffer (struct tty_struct *tty)
return 0;
}
-static struct tty_operations ops = {
+static const struct tty_operations ops = {
.open = simcons_tty_open,
.write = simcons_tty_write,
.write_room = simcons_tty_write_room,
diff --git a/arch/v850/kernel/syscalls.c b/arch/v850/kernel/syscalls.c
index 2ec0700fc46..d2b1fb19d24 100644
--- a/arch/v850/kernel/syscalls.c
+++ b/arch/v850/kernel/syscalls.c
@@ -33,6 +33,7 @@
#include <asm/uaccess.h>
#include <asm/ipc.h>
#include <asm/semaphore.h>
+#include <asm/unistd.h>
/*
* sys_ipc() is the de-multiplexer for the SysV IPC calls..
@@ -194,3 +195,22 @@ unsigned long sys_mmap (unsigned long addr, size_t len,
out:
return err;
}
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ register char *__a __asm__ ("r6") = filename;
+ register void *__b __asm__ ("r7") = argv;
+ register void *__c __asm__ ("r8") = envp;
+ register unsigned long __syscall __asm__ ("r12") = __NR_execve;
+ register unsigned long __ret __asm__ ("r10");
+ __asm__ __volatile__ ("trap 0"
+ : "=r" (__ret), "=r" (__syscall)
+ : "1" (__syscall), "r" (__a), "r" (__b), "r" (__c)
+ : "r1", "r5", "r11", "r13", "r14",
+ "r15", "r16", "r17", "r18", "r19");
+ return __ret;
+}
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index b87a19f0d58..0a5d8e659aa 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -690,7 +690,7 @@ source "arch/x86_64/oprofile/Kconfig"
config KPROBES
bool "Kprobes (EXPERIMENTAL)"
- depends on EXPERIMENTAL && MODULES
+ depends on KALLSYMS && EXPERIMENTAL && MODULES
help
Kprobes allows you to trap at almost any kernel address and
execute a callback function. register_kprobe() establishes
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index f280d3665f4..26a01717cc1 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -784,36 +784,36 @@ asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
if (!name)
return -EFAULT;
- if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
+ if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
return -EFAULT;
down_read(&uts_sem);
-
- err = __copy_to_user(&name->sysname,&system_utsname.sysname,
+
+ err = __copy_to_user(&name->sysname,&utsname()->sysname,
__OLD_UTS_LEN);
err |= __put_user(0,name->sysname+__OLD_UTS_LEN);
- err |= __copy_to_user(&name->nodename,&system_utsname.nodename,
+ err |= __copy_to_user(&name->nodename,&utsname()->nodename,
__OLD_UTS_LEN);
err |= __put_user(0,name->nodename+__OLD_UTS_LEN);
- err |= __copy_to_user(&name->release,&system_utsname.release,
+ err |= __copy_to_user(&name->release,&utsname()->release,
__OLD_UTS_LEN);
err |= __put_user(0,name->release+__OLD_UTS_LEN);
- err |= __copy_to_user(&name->version,&system_utsname.version,
+ err |= __copy_to_user(&name->version,&utsname()->version,
__OLD_UTS_LEN);
err |= __put_user(0,name->version+__OLD_UTS_LEN);
- {
- char *arch = "x86_64";
- if (personality(current->personality) == PER_LINUX32)
- arch = "i686";
+ {
+ char *arch = "x86_64";
+ if (personality(current->personality) == PER_LINUX32)
+ arch = "i686";
- err |= __copy_to_user(&name->machine,arch,strlen(arch)+1);
- }
-
- up_read(&uts_sem);
-
- err = err ? -EFAULT : 0;
-
- return err;
+ err |= __copy_to_user(&name->machine, arch, strlen(arch)+1);
+ }
+
+ up_read(&uts_sem);
+
+ err = err ? -EFAULT : 0;
+
+ return err;
}
long sys32_uname(struct old_utsname __user * name)
@@ -822,7 +822,7 @@ long sys32_uname(struct old_utsname __user * name)
if (!name)
return -EFAULT;
down_read(&uts_sem);
- err=copy_to_user(name, &system_utsname, sizeof (*name));
+ err = copy_to_user(name, utsname(), sizeof (*name));
up_read(&uts_sem);
if (personality(current->personality) == PER_LINUX32)
err |= copy_to_user(&name->machine, "i686", 5);
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 2802524104f..b8285cf1a9c 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -1023,7 +1023,7 @@ ENDPROC(child_rip)
* do_sys_execve asm fallback arguments:
* rdi: name, rsi: argv, rdx: envp, fake frame on the stack
*/
-ENTRY(execve)
+ENTRY(kernel_execve)
CFI_STARTPROC
FAKE_STACK_FRAME $0
SAVE_ALL
@@ -1036,7 +1036,7 @@ ENTRY(execve)
UNFAKE_STACK_FRAME
ret
CFI_ENDPROC
-ENDPROC(execve)
+ENDPROC(kernel_execve)
KPROBE_ENTRY(page_fault)
errorentry do_page_fault
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index ffc73ac7248..ac241567e68 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -270,20 +270,19 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
struct pt_regs *regs)
{
unsigned long *sara = (unsigned long *)regs->rsp;
- struct kretprobe_instance *ri;
+ struct kretprobe_instance *ri;
- if ((ri = get_free_rp_inst(rp)) != NULL) {
- ri->rp = rp;
- ri->task = current;
+ if ((ri = get_free_rp_inst(rp)) != NULL) {
+ ri->rp = rp;
+ ri->task = current;
ri->ret_addr = (kprobe_opcode_t *) *sara;
/* Replace the return addr with trampoline addr */
*sara = (unsigned long) &kretprobe_trampoline;
-
- add_rp_inst(ri);
- } else {
- rp->nmissed++;
- }
+ add_rp_inst(ri);
+ } else {
+ rp->nmissed++;
+ }
}
int __kprobes kprobe_handler(struct pt_regs *regs)
@@ -405,14 +404,15 @@ no_kprobe:
*/
int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
- struct kretprobe_instance *ri = NULL;
- struct hlist_head *head;
- struct hlist_node *node, *tmp;
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *node, *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+ INIT_HLIST_HEAD(&empty_rp);
spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ head = kretprobe_inst_table_head(current);
/*
* It is possible to have multiple instances associated with a given
@@ -423,20 +423,20 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
* We can handle this because:
* - instances are always inserted at the head of the list
* - when multiple return probes are registered for the same
- * function, the first instance's ret_addr will point to the
+ * function, the first instance's ret_addr will point to the
* real return address, and all the rest will point to
* kretprobe_trampoline
*/
hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
- if (ri->task != current)
+ if (ri->task != current)
/* another task is sharing our hash bucket */
- continue;
+ continue;
if (ri->rp && ri->rp->handler)
ri->rp->handler(ri, regs);
orig_ret_address = (unsigned long)ri->ret_addr;
- recycle_rp_inst(ri);
+ recycle_rp_inst(ri, &empty_rp);
if (orig_ret_address != trampoline_address)
/*
@@ -454,12 +454,16 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
spin_unlock_irqrestore(&kretprobe_lock, flags);
preempt_enable_no_resched();
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we don't want the post_handler
+ hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
* to run (and have re-enabled preemption)
- */
- return 1;
+ */
+ return 1;
}
/*
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 458006ae19f..de10cb8a2c9 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -294,9 +294,9 @@ void __show_regs(struct pt_regs * regs)
print_modules();
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
current->pid, current->comm, print_tainted(),
- system_utsname.release,
- (int)strcspn(system_utsname.version, " "),
- system_utsname.version);
+ init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
printk_address(regs->rip);
printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c
index 6449ea8fe75..76bf7c241fe 100644
--- a/arch/x86_64/kernel/sys_x86_64.c
+++ b/arch/x86_64/kernel/sys_x86_64.c
@@ -148,7 +148,7 @@ asmlinkage long sys_uname(struct new_utsname __user * name)
{
int err;
down_read(&uts_sem);
- err = copy_to_user(name, &system_utsname, sizeof (*name));
+ err = copy_to_user(name, utsname(), sizeof (*name));
up_read(&uts_sem);
if (personality(current->personality) == PER_LINUX32)
err |= copy_to_user(&name->machine, "i686", 5);
diff --git a/arch/xtensa/kernel/syscalls.c b/arch/xtensa/kernel/syscalls.c
index 4688ba2db84..d9285d4d556 100644
--- a/arch/xtensa/kernel/syscalls.c
+++ b/arch/xtensa/kernel/syscalls.c
@@ -128,7 +128,7 @@ out:
int sys_uname(struct old_utsname * name)
{
- if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
+ if (name && !copy_to_user(name, utsname(), sizeof (*name)))
return 0;
return -EFAULT;
}
@@ -266,3 +266,23 @@ void system_call (struct pt_regs *regs)
regs->areg[2] = res;
do_syscall_trace();
}
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ long __res;
+ asm volatile (
+ " mov a5, %2 \n"
+ " mov a4, %4 \n"
+ " mov a3, %3 \n"
+ " movi a2, %1 \n"
+ " syscall \n"
+ " mov %0, a2 \n"
+ : "=a" (__res)
+ : "i" (__NR_execve), "a" (filename), "a" (argv), "a" (envp)
+ : "a2", "a3", "a4", "a5");
+ return __res;
+}
diff --git a/arch/xtensa/platform-iss/console.c b/arch/xtensa/platform-iss/console.c
index 22d3c571a7b..5c947cae752 100644
--- a/arch/xtensa/platform-iss/console.c
+++ b/arch/xtensa/platform-iss/console.c
@@ -191,7 +191,7 @@ static int rs_read_proc(char *page, char **start, off_t off, int count,
}
-static struct tty_operations serial_ops = {
+static const struct tty_operations serial_ops = {
.open = rs_open,
.close = rs_close,
.write = rs_write,
diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c
index 9d6713a93ed..d0e92ed0a36 100644
--- a/drivers/char/amiserial.c
+++ b/drivers/char/amiserial.c
@@ -1958,7 +1958,7 @@ static void show_serial_version(void)
}
-static struct tty_operations serial_ops = {
+static const struct tty_operations serial_ops = {
.open = rs_open,
.close = rs_close,
.write = rs_write,
diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index c1c67281750..f85b4eb1661 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -5205,7 +5205,7 @@ done:
extra ports are ignored.
*/
-static struct tty_operations cy_ops = {
+static const struct tty_operations cy_ops = {
.open = cy_open,
.close = cy_close,
.write = cy_write,
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index 86d290e9f30..3baa2ab8cbd 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -1125,7 +1125,7 @@ static void __exit epca_module_exit(void)
module_exit(epca_module_exit);
-static struct tty_operations pc_ops = {
+static const struct tty_operations pc_ops = {
.open = pc_open,
.close = pc_close,
.write = pc_write,
diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index afcd83d9984..05788c75d7f 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -2376,7 +2376,7 @@ static inline int autoconfig(struct esp_struct * info)
return (port_detected);
}
-static struct tty_operations esp_ops = {
+static const struct tty_operations esp_ops = {
.open = esp_open,
.close = rs_close,
.write = rs_write,
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index a76d2c40dd5..4053d1cd393 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -696,7 +696,7 @@ int khvcd(void *unused)
return 0;
}
-static struct tty_operations hvc_ops = {
+static const struct tty_operations hvc_ops = {
.open = hvc_open,
.close = hvc_close,
.write = hvc_write,
diff --git a/drivers/char/hvcs.c b/drivers/char/hvcs.c
index 4589ff302b0..0b89bcde8c5 100644
--- a/drivers/char/hvcs.c
+++ b/drivers/char/hvcs.c
@@ -1306,7 +1306,7 @@ static int hvcs_chars_in_buffer(struct tty_struct *tty)
return hvcsd->chars_in_buffer;
}
-static struct tty_operations hvcs_ops = {
+static const struct tty_operations hvcs_ops = {
.open = hvcs_open,
.close = hvcs_close,
.hangup = hvcs_hangup,
diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c
index a89a95fb5e4..c07dc58d5c1 100644
--- a/drivers/char/hvsi.c
+++ b/drivers/char/hvsi.c
@@ -1130,7 +1130,7 @@ static int hvsi_tiocmset(struct tty_struct *tty, struct file *file,
}
-static struct tty_operations hvsi_ops = {
+static const struct tty_operations hvsi_ops = {
.open = hvsi_open,
.close = hvsi_close,
.write = hvsi_write,
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index 331f447e622..4828bc914ce 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -458,7 +458,7 @@ cleanup_module(void)
}
#endif /* MODULE */
-static struct tty_operations ip2_ops = {
+static const struct tty_operations ip2_ops = {
.open = ip2_open,
.close = ip2_close,
.write = ip2_write,
diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index 2e1da632aee..ea2bbf80ad3 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -1550,7 +1550,7 @@ static void isicom_unregister_ioregion(struct pci_dev *pdev)
board->base = 0;
}
-static struct tty_operations isicom_ops = {
+static const struct tty_operations isicom_ops = {
.open = isicom_open,
.close = isicom_close,
.write = isicom_write,
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index 6b4d82a4565..d6e031542c6 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -4636,7 +4636,7 @@ static int stli_memioctl(struct inode *ip, struct file *fp, unsigned int cmd, un
return rc;
}
-static struct tty_operations stli_ops = {
+static const struct tty_operations stli_ops = {
.open = stli_open,
.close = stli_close,
.write = stli_write,
diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index 3e90aac3751..99fb070fdbf 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -108,7 +108,11 @@ const int NR_TYPES = ARRAY_SIZE(max_vals);
struct kbd_struct kbd_table[MAX_NR_CONSOLES];
static struct kbd_struct *kbd = kbd_table;
-int spawnpid, spawnsig;
+struct vt_spawn_console vt_spawn_con = {
+ .lock = SPIN_LOCK_UNLOCKED,
+ .pid = NULL,
+ .sig = 0,
+};
/*
* Variables exported for vt.c
@@ -578,9 +582,13 @@ static void fn_compose(struct vc_data *vc, struct pt_regs *regs)
static void fn_spawn_con(struct vc_data *vc, struct pt_regs *regs)
{
- if (spawnpid)
- if (kill_proc(spawnpid, spawnsig, 1))
- spawnpid = 0;
+ spin_lock(&vt_spawn_con.lock);
+ if (vt_spawn_con.pid)
+ if (kill_pid(vt_spawn_con.pid, vt_spawn_con.sig, 1)) {
+ put_pid(vt_spawn_con.pid);
+ vt_spawn_con.pid = NULL;
+ }
+ spin_unlock(&vt_spawn_con.lock);
}
static void fn_SAK(struct vc_data *vc, struct pt_regs *regs)
diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c
index c1a6d3c48da..b401383808c 100644
--- a/drivers/char/moxa.c
+++ b/drivers/char/moxa.c
@@ -281,7 +281,7 @@ static int moxa_get_serial_info(struct moxa_str *, struct serial_struct __user *
static int moxa_set_serial_info(struct moxa_str *, struct serial_struct __user *);
static void MoxaSetFifo(int port, int enable);
-static struct tty_operations moxa_ops = {
+static const struct tty_operations moxa_ops = {
.open = moxa_open,
.close = moxa_close,
.write = moxa_write,
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 27a65377204..8253fca8efd 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -453,7 +453,7 @@ static int CheckIsMoxaMust(int io)
/* above is modified by Victor Yu. 08-15-2002 */
-static struct tty_operations mxser_ops = {
+static const struct tty_operations mxser_ops = {
.open = mxser_open,
.close = mxser_close,
.write = mxser_write,
diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c
index 7c57ebfa864..ea1aa7764f8 100644
--- a/drivers/char/nwbutton.c
+++ b/drivers/char/nwbutton.c
@@ -127,9 +127,8 @@ static void button_consume_callbacks (int bpcount)
static void button_sequence_finished (unsigned long parameters)
{
#ifdef CONFIG_NWBUTTON_REBOOT /* Reboot using button is enabled */
- if (button_press_count == reboot_count) {
- kill_proc (1, SIGINT, 1); /* Ask init to reboot us */
- }
+ if (button_press_count == reboot_count)
+ kill_cad_pid(SIGINT, 1); /* Ask init to reboot us */
#endif /* CONFIG_NWBUTTON_REBOOT */
button_consume_callbacks (button_press_count);
bcount = sprintf (button_output_buffer, "%d\n", button_press_count);
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 00f574cbb0d..dd845cbefe9 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -3010,7 +3010,7 @@ static struct pcmcia_driver mgslpc_driver = {
.resume = mgslpc_resume,
};
-static struct tty_operations mgslpc_ops = {
+static const struct tty_operations mgslpc_ops = {
.open = mgslpc_open,
.close = mgslpc_close,
.write = mgslpc_write,
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 34dd4c38110..80d3eedd7f9 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -224,7 +224,7 @@ static void pty_set_termios(struct tty_struct *tty, struct termios *old_termios)
tty->termios->c_cflag |= (CS8 | CREAD);
}
-static struct tty_operations pty_ops = {
+static const struct tty_operations pty_ops = {
.open = pty_open,
.close = pty_close,
.write = pty_write,
diff --git a/drivers/char/random.c b/drivers/char/random.c
index b430a12eb81..07f47a0208a 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -889,8 +889,8 @@ static void init_std_data(struct entropy_store *r)
do_gettimeofday(&tv);
add_entropy_words(r, (__u32 *)&tv, sizeof(tv)/4);
- add_entropy_words(r, (__u32 *)&system_utsname,
- sizeof(system_utsname)/4);
+ add_entropy_words(r, (__u32 *)utsname(),
+ sizeof(*(utsname()))/4);
}
static int __init rand_initialize(void)
diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c
index 3fa80aaf452..202a3b0945b 100644
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -727,7 +727,7 @@ static struct vpd_prom *get_VPD_PROM(struct Host *hp)
return &vpdp;
}
-static struct tty_operations rio_ops = {
+static const struct tty_operations rio_ops = {
.open = riotopen,
.close = gs_close,
.write = gs_write,
diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c
index 06b9f78a95d..214d850112f 100644
--- a/drivers/char/riscom8.c
+++ b/drivers/char/riscom8.c
@@ -1583,7 +1583,7 @@ static void do_softint(void *private_)
}
}
-static struct tty_operations riscom_ops = {
+static const struct tty_operations riscom_ops = {
.open = rc_open,
.close = rc_close,
.write = rc_write,
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index 0ac13188132..bac80056f7e 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -2334,7 +2334,7 @@ static int __init init_ISA(int i)
return (1);
}
-static struct tty_operations rocket_ops = {
+static const struct tty_operations rocket_ops = {
.open = rp_open,
.close = rp_close,
.write = rp_write,
diff --git a/drivers/char/ser_a2232.c b/drivers/char/ser_a2232.c
index 510bd3e0e88..65c751d0d64 100644
--- a/drivers/char/ser_a2232.c
+++ b/drivers/char/ser_a2232.c
@@ -661,7 +661,7 @@ static void a2232_init_portstructs(void)
}
}
-static struct tty_operations a2232_ops = {
+static const struct tty_operations a2232_ops = {
.open = a2232_open,
.close = gs_close,
.write = gs_write,
diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c
index 21a710cb4bb..b4ea1266b66 100644
--- a/drivers/char/serial167.c
+++ b/drivers/char/serial167.c
@@ -2158,7 +2158,7 @@ mvme167_serial_console_setup(int cflag)
rcor >> 5, rbpr);
} /* serial_console_init */
-static struct tty_operations cy_ops = {
+static const struct tty_operations cy_ops = {
.open = cy_open,
.close = cy_close,
.write = cy_write,
diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c
index d12d4f629ce..864854c5886 100644
--- a/drivers/char/snsc_event.c
+++ b/drivers/char/snsc_event.c
@@ -220,7 +220,7 @@ scdrv_dispatch_event(char *event, int len)
" Sending SIGPWR to init...\n");
/* give a SIGPWR signal to init proc */
- kill_proc(1, SIGPWR, 0);
+ kill_cad_pid(SIGPWR, 0);
} else {
/* print to system log */
printk("%s|$(0x%x)%s\n", severity, esp_code, desc);
diff --git a/drivers/char/specialix.c b/drivers/char/specialix.c
index f52c7c31bad..902c48dca3b 100644
--- a/drivers/char/specialix.c
+++ b/drivers/char/specialix.c
@@ -2363,7 +2363,7 @@ static void do_softint(void *private_)
func_exit();
}
-static struct tty_operations sx_ops = {
+static const struct tty_operations sx_ops = {
.open = sx_open,
.close = sx_close,
.write = sx_write,
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 3beb2203d24..bd711537ec4 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -2993,7 +2993,7 @@ static int stl_memioctl(struct inode *ip, struct file *fp, unsigned int cmd, uns
return(rc);
}
-static struct tty_operations stl_ops = {
+static const struct tty_operations stl_ops = {
.open = stl_open,
.close = stl_close,
.write = stl_write,
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index e1cd2bc4b1e..57e31e5eaed 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -2226,7 +2226,7 @@ static int probe_si (struct sx_board *board)
return 1;
}
-static struct tty_operations sx_ops = {
+static const struct tty_operations sx_ops = {
.break_ctl = sx_break,
.open = sx_open,
.close = gs_close,
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index 78b1b1a2732..244dc308c77 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -4360,7 +4360,7 @@ static struct mgsl_struct* mgsl_allocate_device(void)
} /* end of mgsl_allocate_device()*/
-static struct tty_operations mgsl_ops = {
+static const struct tty_operations mgsl_ops = {
.open = mgsl_open,
.close = mgsl_close,
.write = mgsl_write,
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 78bc85180c8..bdc7cb248b8 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -3441,7 +3441,7 @@ static void __devexit remove_one(struct pci_dev *dev)
{
}
-static struct tty_operations ops = {
+static const struct tty_operations ops = {
.open = open,
.close = close,
.write = write,
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 66f3754fbbd..6eb75dcd796 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -3929,7 +3929,7 @@ void device_init(int adapter_num, struct pci_dev *pdev)
}
}
-static struct tty_operations ops = {
+static const struct tty_operations ops = {
.open = open,
.close = close,
.write = write,
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 333741770f1..e90ea39c7c4 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -3680,7 +3680,8 @@ void put_tty_driver(struct tty_driver *driver)
kfree(driver);
}
-void tty_set_operations(struct tty_driver *driver, struct tty_operations *op)
+void tty_set_operations(struct tty_driver *driver,
+ const struct tty_operations *op)
{
driver->open = op->open;
driver->close = op->close;
diff --git a/drivers/char/viocons.c b/drivers/char/viocons.c
index f3efeaf2826..a362ee9c92d 100644
--- a/drivers/char/viocons.c
+++ b/drivers/char/viocons.c
@@ -1047,7 +1047,7 @@ static int send_open(HvLpIndex remoteLp, void *sem)
0, 0, 0, 0);
}
-static struct tty_operations serial_ops = {
+static const struct tty_operations serial_ops = {
.open = viotty_open,
.close = viotty_close,
.write = viotty_write,
diff --git a/drivers/char/vme_scc.c b/drivers/char/vme_scc.c
index bfe5ea948f6..c2ca31eb850 100644
--- a/drivers/char/vme_scc.c
+++ b/drivers/char/vme_scc.c
@@ -113,7 +113,7 @@ static struct real_driver scc_real_driver = {
};
-static struct tty_operations scc_ops = {
+static const struct tty_operations scc_ops = {
.open = scc_open,
.close = gs_close,
.write = gs_write,
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index fb75da940b5..ec0c070bf15 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -903,6 +903,7 @@ void vc_deallocate(unsigned int currcons)
if (vc_cons_allocated(currcons)) {
struct vc_data *vc = vc_cons[currcons].d;
vc->vc_sw->con_deinit(vc);
+ put_pid(vc->vt_pid);
module_put(vc->vc_sw->owner);
if (vc->vc_kmalloced)
kfree(vc->vc_screenbuf);
@@ -2674,7 +2675,7 @@ static int __init con_init(void)
}
console_initcall(con_init);
-static struct tty_operations con_ops = {
+static const struct tty_operations con_ops = {
.open = con_open,
.close = con_close,
.write = con_write,
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index a53e382cc10..ac5d60edbaf 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -645,13 +645,16 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
*/
case KDSIGACCEPT:
{
- extern int spawnpid, spawnsig;
if (!perm || !capable(CAP_KILL))
return -EPERM;
if (!valid_signal(arg) || arg < 1 || arg == SIGKILL)
return -EINVAL;
- spawnpid = current->pid;
- spawnsig = arg;
+
+ spin_lock_irq(&vt_spawn_con.lock);
+ put_pid(vt_spawn_con.pid);
+ vt_spawn_con.pid = get_pid(task_pid(current));
+ vt_spawn_con.sig = arg;
+ spin_unlock_irq(&vt_spawn_con.lock);
return 0;
}
@@ -669,7 +672,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
vc->vt_mode = tmp;
/* the frsig is ignored, so we set it to 0 */
vc->vt_mode.frsig = 0;
- vc->vt_pid = current->pid;
+ put_pid(xchg(&vc->vt_pid, get_pid(task_pid(current))));
/* no switch is required -- saw@shade.msu.ru */
vc->vt_newvt = -1;
release_console_sem();
@@ -1060,7 +1063,7 @@ void reset_vc(struct vc_data *vc)
vc->vt_mode.relsig = 0;
vc->vt_mode.acqsig = 0;
vc->vt_mode.frsig = 0;
- vc->vt_pid = -1;
+ put_pid(xchg(&vc->vt_pid, NULL));
vc->vt_newvt = -1;
if (!in_interrupt()) /* Via keyboard.c:SAK() - akpm */
reset_palette(vc);
@@ -1111,7 +1114,7 @@ static void complete_change_console(struct vc_data *vc)
* tell us if the process has gone or something else
* is awry
*/
- if (kill_proc(vc->vt_pid, vc->vt_mode.acqsig, 1) != 0) {
+ if (kill_pid(vc->vt_pid, vc->vt_mode.acqsig, 1) != 0) {
/*
* The controlling process has died, so we revert back to
* normal operation. In this case, we'll also change back
@@ -1171,7 +1174,7 @@ void change_console(struct vc_data *new_vc)
* tell us if the process has gone or something else
* is awry
*/
- if (kill_proc(vc->vt_pid, vc->vt_mode.relsig, 1) == 0) {
+ if (kill_pid(vc->vt_pid, vc->vt_mode.relsig, 1) == 0) {
/*
* It worked. Mark the vt to switch to and
* return. The process needs to send us a
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 42eaed88c28..a5456108dba 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1601,7 +1601,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
dev->mmap = ipath_mmap;
snprintf(dev->node_desc, sizeof(dev->node_desc),
- IPATH_IDSTR " %s", system_utsname.nodename);
+ IPATH_IDSTR " %s", init_utsname()->nodename);
ret = ib_register_device(dev);
if (ret)
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 669f76393b5..11844bbfe93 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -1298,7 +1298,7 @@ static int capinc_tty_read_proc(char *page, char **start, off_t off,
static struct tty_driver *capinc_tty_driver;
-static struct tty_operations capinc_ops = {
+static const struct tty_operations capinc_ops = {
.open = capinc_tty_open,
.close = capinc_tty_close,
.write = capinc_tty_write,
diff --git a/drivers/isdn/gigaset/interface.c b/drivers/isdn/gigaset/interface.c
index bd2e4267528..596f3aebe2f 100644
--- a/drivers/isdn/gigaset/interface.c
+++ b/drivers/isdn/gigaset/interface.c
@@ -134,7 +134,7 @@ static int if_tiocmset(struct tty_struct *tty, struct file *file,
static int if_write(struct tty_struct *tty,
const unsigned char *buf, int count);
-static struct tty_operations if_ops = {
+static const struct tty_operations if_ops = {
.open = if_open,
.close = if_close,
.ioctl = if_ioctl,
diff --git a/drivers/isdn/gigaset/proc.c b/drivers/isdn/gigaset/proc.c
index 9ae3a7f3e7b..9ad840e95db 100644
--- a/drivers/isdn/gigaset/proc.c
+++ b/drivers/isdn/gigaset/proc.c
@@ -83,5 +83,6 @@ void gigaset_init_dev_sysfs(struct cardstate *cs)
return;
gig_dbg(DEBUG_INIT, "setting up sysfs");
- class_device_create_file(cs->class, &class_device_attr_cidmode);
+ if (class_device_create_file(cs->class, &class_device_attr_cidmode))
+ dev_err(cs->dev, "could not create sysfs attribute\n");
}
diff --git a/drivers/isdn/hisax/hisax.h b/drivers/isdn/hisax/hisax.h
index 75920aa0a3c..2f9d5118cea 100644
--- a/drivers/isdn/hisax/hisax.h
+++ b/drivers/isdn/hisax/hisax.h
@@ -1316,7 +1316,18 @@ void dlogframe(struct IsdnCardState *cs, struct sk_buff *skb, int dir);
void iecpy(u_char * dest, u_char * iestart, int ieoffset);
#endif /* __KERNEL__ */
-#define HZDELAY(jiffs) {int tout = jiffs; while (tout--) udelay(1000000/HZ);}
+/*
+ * Busywait delay for `jiffs' jiffies
+ */
+#define HZDELAY(jiffs) do { \
+ int tout = jiffs; \
+ \
+ while (tout--) { \
+ int loops = USEC_PER_SEC / HZ; \
+ while (loops--) \
+ udelay(1); \
+ } \
+ } while (0)
int ll_run(struct IsdnCardState *cs, int addfeatures);
int CallcNew(void);
diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c
index 9ab66e8960d..2b91bb07fc7 100644
--- a/drivers/isdn/i4l/isdn_tty.c
+++ b/drivers/isdn/i4l/isdn_tty.c
@@ -1860,7 +1860,7 @@ modem_write_profile(atemu * m)
send_sig(SIGIO, dev->profd, 1);
}
-static struct tty_operations modem_ops = {
+static const struct tty_operations modem_ops = {
.open = isdn_tty_open,
.close = isdn_tty_close,
.write = isdn_tty_write,
diff --git a/drivers/media/dvb/dvb-core/dvb_ringbuffer.c b/drivers/media/dvb/dvb-core/dvb_ringbuffer.c
index c972fe014c5..9878183ba3f 100644
--- a/drivers/media/dvb/dvb-core/dvb_ringbuffer.c
+++ b/drivers/media/dvb/dvb-core/dvb_ringbuffer.c
@@ -26,7 +26,6 @@
-#define __KERNEL_SYSCALLS__
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/module.h>
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 19c2b85249c..c1bf1fb04c5 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -3,5 +3,6 @@
#
obj- := misc.o # Dummy rule to force built-in.o to be made
-obj-$(CONFIG_IBM_ASM) += ibmasm/
+obj-$(CONFIG_IBM_ASM) += ibmasm/
obj-$(CONFIG_HDPU_FEATURES) += hdpuftrs/
+obj-$(CONFIG_LKDTM) += lkdtm.o
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
new file mode 100644
index 00000000000..e689ee94ac3
--- /dev/null
+++ b/drivers/misc/lkdtm.c
@@ -0,0 +1,342 @@
+/*
+ * Kprobe module for testing crash dumps
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author: Ankita Garg <ankita@in.ibm.com>
+ *
+ * This module induces system failures at predefined crashpoints to
+ * evaluate the reliability of crash dumps obtained using different dumping
+ * solutions.
+ *
+ * It is adapted from the Linux Kernel Dump Test Tool by
+ * Fernando Luis Vazquez Cao <http://lkdtt.sourceforge.net>
+ *
+ * Usage : insmod lkdtm.ko [recur_count={>0}] cpoint_name=<> cpoint_type=<>
+ * [cpoint_count={>0}]
+ *
+ * recur_count : Recursion level for the stack overflow test. Default is 10.
+ *
+ * cpoint_name : Crash point where the kernel is to be crashed. It can be
+ * one of INT_HARDWARE_ENTRY, INT_HW_IRQ_EN, INT_TASKLET_ENTRY,
+ * FS_DEVRW, MEM_SWAPOUT, TIMERADD, SCSI_DISPATCH_CMD,
+ * IDE_CORE_CP
+ *
+ * cpoint_type : Indicates the action to be taken on hitting the crash point.
+ * It can be one of PANIC, BUG, EXCEPTION, LOOP, OVERFLOW
+ *
+ * cpoint_count : Indicates the number of times the crash point is to be hit
+ * to trigger an action. The default is 10.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include <linux/kallsyms.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <scsi/scsi_cmnd.h>
+
+#ifdef CONFIG_IDE
+#include <linux/ide.h>
+#endif
+
+#define NUM_CPOINTS 8
+#define NUM_CPOINT_TYPES 5
+#define DEFAULT_COUNT 10
+#define REC_NUM_DEFAULT 10
+
+enum cname {
+ INVALID,
+ INT_HARDWARE_ENTRY,
+ INT_HW_IRQ_EN,
+ INT_TASKLET_ENTRY,
+ FS_DEVRW,
+ MEM_SWAPOUT,
+ TIMERADD,
+ SCSI_DISPATCH_CMD,
+ IDE_CORE_CP
+};
+
+enum ctype {
+ NONE,
+ PANIC,
+ BUG,
+ EXCEPTION,
+ LOOP,
+ OVERFLOW
+};
+
+static char* cp_name[] = {
+ "INT_HARDWARE_ENTRY",
+ "INT_HW_IRQ_EN",
+ "INT_TASKLET_ENTRY",
+ "FS_DEVRW",
+ "MEM_SWAPOUT",
+ "TIMERADD",
+ "SCSI_DISPATCH_CMD",
+ "IDE_CORE_CP"
+};
+
+static char* cp_type[] = {
+ "PANIC",
+ "BUG",
+ "EXCEPTION",
+ "LOOP",
+ "OVERFLOW"
+};
+
+static struct jprobe lkdtm;
+
+static int lkdtm_parse_commandline(void);
+static void lkdtm_handler(void);
+
+static char* cpoint_name = INVALID;
+static char* cpoint_type = NONE;
+static int cpoint_count = DEFAULT_COUNT;
+static int recur_count = REC_NUM_DEFAULT;
+
+static enum cname cpoint = INVALID;
+static enum ctype cptype = NONE;
+static int count = DEFAULT_COUNT;
+
+module_param(recur_count, int, 0644);
+MODULE_PARM_DESC(recur_count, "Recurcion level for the stack overflow test,\
+ default is 10");
+module_param(cpoint_name, charp, 0644);
+MODULE_PARM_DESC(cpoint_name, "Crash Point, where kernel is to be crashed");
+module_param(cpoint_type, charp, 06444);
+MODULE_PARM_DESC(cpoint_type, "Crash Point Type, action to be taken on\
+ hitting the crash point");
+module_param(cpoint_count, int, 06444);
+MODULE_PARM_DESC(cpoint_count, "Crash Point Count, number of times the \
+ crash point is to be hit to trigger action");
+
+unsigned int jp_do_irq(unsigned int irq, struct pt_regs *regs)
+{
+ lkdtm_handler();
+ jprobe_return();
+ return 0;
+}
+
+irqreturn_t jp_handle_irq_event(unsigned int irq, struct pt_regs *regs,
+ struct irqaction *action)
+{
+ lkdtm_handler();
+ jprobe_return();
+ return 0;
+}
+
+void jp_tasklet_action(struct softirq_action *a)
+{
+ lkdtm_handler();
+ jprobe_return();
+}
+
+void jp_ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
+{
+ lkdtm_handler();
+ jprobe_return();
+}
+
+struct scan_control;
+
+unsigned long jp_shrink_page_list(struct list_head *page_list,
+ struct scan_control *sc)
+{
+ lkdtm_handler();
+ jprobe_return();
+ return 0;
+}
+
+int jp_hrtimer_start(struct hrtimer *timer, ktime_t tim,
+ const enum hrtimer_mode mode)
+{
+ lkdtm_handler();
+ jprobe_return();
+ return 0;
+}
+
+int jp_scsi_dispatch_cmd(struct scsi_cmnd *cmd)
+{
+ lkdtm_handler();
+ jprobe_return();
+ return 0;
+}
+
+#ifdef CONFIG_IDE
+int jp_generic_ide_ioctl(ide_drive_t *drive, struct file *file,
+ struct block_device *bdev, unsigned int cmd,
+ unsigned long arg)
+{
+ lkdtm_handler();
+ jprobe_return();
+ return 0;
+}
+#endif
+
+static int lkdtm_parse_commandline(void)
+{
+ int i;
+
+ if (cpoint_name == INVALID || cpoint_type == NONE ||
+ cpoint_count < 1 || recur_count < 1)
+ return -EINVAL;
+
+ for (i = 0; i < NUM_CPOINTS; ++i) {
+ if (!strcmp(cpoint_name, cp_name[i])) {
+ cpoint = i + 1;
+ break;
+ }
+ }
+
+ for (i = 0; i < NUM_CPOINT_TYPES; ++i) {
+ if (!strcmp(cpoint_type, cp_type[i])) {
+ cptype = i + 1;
+ break;
+ }
+ }
+
+ if (cpoint == INVALID || cptype == NONE)
+ return -EINVAL;
+
+ count = cpoint_count;
+
+ return 0;
+}
+
+static int recursive_loop(int a)
+{
+ char buf[1024];
+
+ memset(buf,0xFF,1024);
+ recur_count--;
+ if (!recur_count)
+ return 0;
+ else
+ return recursive_loop(a);
+}
+
+void lkdtm_handler(void)
+{
+ printk(KERN_INFO "lkdtm : Crash point %s of type %s hit\n",
+ cpoint_name, cpoint_type);
+ --count;
+
+ if (count == 0) {
+ switch (cptype) {
+ case NONE:
+ break;
+ case PANIC:
+ printk(KERN_INFO "lkdtm : PANIC\n");
+ panic("dumptest");
+ break;
+ case BUG:
+ printk(KERN_INFO "lkdtm : BUG\n");
+ BUG();
+ break;
+ case EXCEPTION:
+ printk(KERN_INFO "lkdtm : EXCEPTION\n");
+ *((int *) 0) = 0;
+ break;
+ case LOOP:
+ printk(KERN_INFO "lkdtm : LOOP\n");
+ for (;;);
+ break;
+ case OVERFLOW:
+ printk(KERN_INFO "lkdtm : OVERFLOW\n");
+ (void) recursive_loop(0);
+ break;
+ default:
+ break;
+ }
+ count = cpoint_count;
+ }
+}
+
+int lkdtm_module_init(void)
+{
+ int ret;
+
+ if (lkdtm_parse_commandline() == -EINVAL) {
+ printk(KERN_INFO "lkdtm : Invalid command\n");
+ return -EINVAL;
+ }
+
+ switch (cpoint) {
+ case INT_HARDWARE_ENTRY:
+ lkdtm.kp.symbol_name = "__do_IRQ";
+ lkdtm.entry = (kprobe_opcode_t*) jp_do_irq;
+ break;
+ case INT_HW_IRQ_EN:
+ lkdtm.kp.symbol_name = "handle_IRQ_event";
+ lkdtm.entry = (kprobe_opcode_t*) jp_handle_irq_event;
+ break;
+ case INT_TASKLET_ENTRY:
+ lkdtm.kp.symbol_name = "tasklet_action";
+ lkdtm.entry = (kprobe_opcode_t*) jp_tasklet_action;
+ break;
+ case FS_DEVRW:
+ lkdtm.kp.symbol_name = "ll_rw_block";
+ lkdtm.entry = (kprobe_opcode_t*) jp_ll_rw_block;
+ break;
+ case MEM_SWAPOUT:
+ lkdtm.kp.symbol_name = "shrink_page_list";
+ lkdtm.entry = (kprobe_opcode_t*) jp_shrink_page_list;
+ break;
+ case TIMERADD:
+ lkdtm.kp.symbol_name = "hrtimer_start";
+ lkdtm.entry = (kprobe_opcode_t*) jp_hrtimer_start;
+ break;
+ case SCSI_DISPATCH_CMD:
+ lkdtm.kp.symbol_name = "scsi_dispatch_cmd";
+ lkdtm.entry = (kprobe_opcode_t*) jp_scsi_dispatch_cmd;
+ break;
+ case IDE_CORE_CP:
+#ifdef CONFIG_IDE
+ lkdtm.kp.symbol_name = "generic_ide_ioctl";
+ lkdtm.entry = (kprobe_opcode_t*) jp_generic_ide_ioctl;
+#else
+ printk(KERN_INFO "lkdtm : Crash point not available\n");
+#endif
+ break;
+ default:
+ printk(KERN_INFO "lkdtm : Invalid Crash Point\n");
+ break;
+ }
+
+ if ((ret = register_jprobe(&lkdtm)) < 0) {
+ printk(KERN_INFO "lkdtm : Couldn't register jprobe\n");
+ return ret;
+ }
+
+ printk(KERN_INFO "lkdtm : Crash point %s of type %s registered\n",
+ cpoint_name, cpoint_type);
+ return 0;
+}
+
+void lkdtm_module_exit(void)
+{
+ unregister_jprobe(&lkdtm);
+ printk(KERN_INFO "lkdtm : Crash point unregistered\n");
+}
+
+module_init(lkdtm_module_init);
+module_exit(lkdtm_module_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index bc0face01d2..151a2e10e4f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -697,7 +697,7 @@ static int tun_chr_fasync(int fd, struct file *file, int on)
return ret;
if (on) {
- ret = f_setown(file, current->pid, 0);
+ ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0);
if (ret)
return ret;
tun->flags |= TUN_FASYNC;
diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index 97937809de0..eddfa8786e8 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -150,7 +150,6 @@ that only one external action is invoked at a time.
#include <linux/skbuff.h>
#include <asm/uaccess.h>
#include <asm/io.h>
-#define __KERNEL_SYSCALLS__
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/slab.h>
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index bf00fa2537b..8dac2ba82bb 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -684,7 +684,7 @@ int __init led_init(void)
int ret;
snprintf(lcd_text_default, sizeof(lcd_text_default),
- "Linux %s", system_utsname.release);
+ "Linux %s", init_utsname()->release);
/* Work around the buggy PDC of KittyHawk-machines */
switch (CPU_HVERSION) {
diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c
index fad5a33bf0f..4a9f025a6b5 100644
--- a/drivers/parisc/power.c
+++ b/drivers/parisc/power.c
@@ -84,8 +84,7 @@
static void deferred_poweroff(void *dummy)
{
- extern int cad_pid; /* from kernel/sys.c */
- if (kill_proc(cad_pid, SIGINT, 1)) {
+ if (kill_cad_pid(SIGINT, 1)) {
/* just in case killing init process failed */
machine_power_off();
}
diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c
index 2fa566fa6da..d7de175d53f 100644
--- a/drivers/s390/char/con3215.c
+++ b/drivers/s390/char/con3215.c
@@ -1103,7 +1103,7 @@ tty3215_start(struct tty_struct *tty)
}
}
-static struct tty_operations tty3215_ops = {
+static const struct tty_operations tty3215_ops = {
.open = tty3215_open,
.close = tty3215_close,
.write = tty3215_write,
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index b4557fa3085..78f8bda81da 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -27,7 +27,7 @@ struct raw3270_fn fs3270_fn;
struct fs3270 {
struct raw3270_view view;
- pid_t fs_pid; /* Pid of controlling program. */
+ struct pid *fs_pid; /* Pid of controlling program. */
int read_command; /* ccw command to use for reads. */
int write_command; /* ccw command to use for writes. */
int attention; /* Got attention. */
@@ -102,7 +102,7 @@ fs3270_restore_callback(struct raw3270_request *rq, void *data)
fp = (struct fs3270 *) rq->view;
if (rq->rc != 0 || rq->rescnt != 0) {
if (fp->fs_pid)
- kill_proc(fp->fs_pid, SIGHUP, 1);
+ kill_pid(fp->fs_pid, SIGHUP, 1);
}
fp->rdbuf_size = 0;
raw3270_request_reset(rq);
@@ -173,7 +173,7 @@ fs3270_save_callback(struct raw3270_request *rq, void *data)
*/
if (rq->rc != 0 || rq->rescnt == 0) {
if (fp->fs_pid)
- kill_proc(fp->fs_pid, SIGHUP, 1);
+ kill_pid(fp->fs_pid, SIGHUP, 1);
fp->rdbuf_size = 0;
} else
fp->rdbuf_size = fp->rdbuf->size - rq->rescnt;
@@ -442,7 +442,7 @@ fs3270_open(struct inode *inode, struct file *filp)
return PTR_ERR(fp);
init_waitqueue_head(&fp->wait);
- fp->fs_pid = current->pid;
+ fp->fs_pid = get_pid(task_pid(current));
rc = raw3270_add_view(&fp->view, &fs3270_fn, minor);
if (rc) {
fs3270_free_view(&fp->view);
@@ -480,7 +480,8 @@ fs3270_close(struct inode *inode, struct file *filp)
fp = filp->private_data;
filp->private_data = NULL;
if (fp) {
- fp->fs_pid = 0;
+ put_pid(fp->fs_pid);
+ fp->fs_pid = NULL;
raw3270_reset(&fp->view);
raw3270_put_view(&fp->view);
raw3270_del_view(&fp->view);
diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c
index f6cf9023039..6f43e04dbef 100644
--- a/drivers/s390/char/sclp_tty.c
+++ b/drivers/s390/char/sclp_tty.c
@@ -711,7 +711,7 @@ static struct sclp_register sclp_input_event =
.receiver_fn = sclp_tty_receiver
};
-static struct tty_operations sclp_ops = {
+static const struct tty_operations sclp_ops = {
.open = sclp_tty_open,
.close = sclp_tty_close,
.write = sclp_tty_write,
diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c
index 54fba6f1718..723bf4191bf 100644
--- a/drivers/s390/char/sclp_vt220.c
+++ b/drivers/s390/char/sclp_vt220.c
@@ -655,7 +655,7 @@ __sclp_vt220_init(int early)
return 0;
}
-static struct tty_operations sclp_vt220_ops = {
+static const struct tty_operations sclp_vt220_ops = {
.open = sclp_vt220_open,
.close = sclp_vt220_close,
.write = sclp_vt220_write,
diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c
index 06e2eeec847..4717c361160 100644
--- a/drivers/s390/char/tty3270.c
+++ b/drivers/s390/char/tty3270.c
@@ -1737,7 +1737,7 @@ tty3270_ioctl(struct tty_struct *tty, struct file *file,
return kbd_ioctl(tp->kbd, file, cmd, arg);
}
-static struct tty_operations tty3270_ops = {
+static const struct tty_operations tty3270_ops = {
.open = tty3270_open,
.close = tty3270_close,
.write = tty3270_write,
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 479364d0332..e088b5e2871 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -208,7 +208,7 @@ s390_handle_mcck(void)
*/
__ctl_clear_bit(14, 24); /* Disable WARNING MCH */
if (xchg(&mchchk_wng_posted, 1) == 0)
- kill_proc(1, SIGPWR, 1);
+ kill_cad_pid(SIGPWR, 1);
}
#endif
diff --git a/drivers/sbus/char/aurora.c b/drivers/sbus/char/aurora.c
index 4fdb2c93221..a305d409154 100644
--- a/drivers/sbus/char/aurora.c
+++ b/drivers/sbus/char/aurora.c
@@ -2187,7 +2187,7 @@ static void do_softint(void *private_)
#endif
}
-static struct tty_operations aurora_ops = {
+static const struct tty_operations aurora_ops = {
.open = aurora_open,
.close = aurora_close,
.write = aurora_write,
diff --git a/drivers/sbus/char/bbc_envctrl.c b/drivers/sbus/char/bbc_envctrl.c
index 1cc706e1111..d27e4f6d704 100644
--- a/drivers/sbus/char/bbc_envctrl.c
+++ b/drivers/sbus/char/bbc_envctrl.c
@@ -4,9 +4,6 @@
* Copyright (C) 2001 David S. Miller (davem@redhat.com)
*/
-#define __KERNEL_SYSCALLS__
-static int errno;
-
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/sched.h>
@@ -200,7 +197,7 @@ static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
shutting_down = 1;
- if (execve("/sbin/shutdown", argv, envp) < 0)
+ if (kernel_execve("/sbin/shutdown", argv, envp) < 0)
printk(KERN_CRIT "envctrl: shutdown execution failed\n");
}
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index 063e676a3ac..728a133d0fc 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -19,9 +19,6 @@
* Daniele Bellucci <bellucda@tiscali.it>
*/
-#define __KERNEL_SYSCALLS__
-static int errno;
-
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/kthread.h>
@@ -976,13 +973,15 @@ static void envctrl_do_shutdown(void)
"HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
char *argv[] = {
"/sbin/shutdown", "-h", "now", NULL };
+ int ret;
if (inprog != 0)
return;
inprog = 1;
printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n");
- if (0 > execve("/sbin/shutdown", argv, envp)) {
+ ret = kernel_execve("/sbin/shutdown", argv, envp);
+ if (ret < 0) {
printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n");
inprog = 0; /* unlikely to succeed, but we could try again */
}
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index ae410645899..1b53afb1cb5 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -933,8 +933,8 @@ lpfc_fdmi_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode)
ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
ae->ad.bits.AttrType = be16_to_cpu(OS_NAME_VERSION);
sprintf(ae->un.OsNameVersion, "%s %s %s",
- system_utsname.sysname, system_utsname.release,
- system_utsname.version);
+ init_utsname()->sysname, init_utsname()->release,
+ init_utsname()->version);
len = strlen(ae->un.OsNameVersion);
len += (len & 3) ? (4 - (len & 3)) : 4;
ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
@@ -1052,7 +1052,7 @@ lpfc_fdmi_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode)
size);
ae->ad.bits.AttrType = be16_to_cpu(HOST_NAME);
sprintf(ae->un.HostName, "%s",
- system_utsname.nodename);
+ init_utsname()->nodename);
len = strlen(ae->un.HostName);
len += (len & 3) ? (4 - (len & 3)) : 4;
ae->ad.bits.AttrLen =
@@ -1140,7 +1140,7 @@ lpfc_fdmi_tmo_handler(struct lpfc_hba *phba)
ndlp = lpfc_findnode_did(phba, NLP_SEARCH_ALL, FDMI_DID);
if (ndlp) {
- if (system_utsname.nodename[0] != '\0') {
+ if (init_utsname()->nodename[0] != '\0') {
lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DHBA);
} else {
mod_timer(&phba->fc_fdmitmo, jiffies + HZ * 60);
diff --git a/drivers/serial/68328serial.c b/drivers/serial/68328serial.c
index 993a702422e..bac853c5abb 100644
--- a/drivers/serial/68328serial.c
+++ b/drivers/serial/68328serial.c
@@ -1378,7 +1378,7 @@ void startup_console(void)
#endif /* CONFIG_PM_LEGACY */
-static struct tty_operations rs_ops = {
+static const struct tty_operations rs_ops = {
.open = rs_open,
.close = rs_close,
.write = rs_write,
diff --git a/drivers/serial/68360serial.c b/drivers/serial/68360serial.c
index e80e70e9b12..1b299e8c57c 100644
--- a/drivers/serial/68360serial.c
+++ b/drivers/serial/68360serial.c
@@ -2424,7 +2424,7 @@ long console_360_init(long kmem_start, long kmem_end)
*/
static int baud_idx;
-static struct tty_operations rs_360_ops = {
+static const struct tty_operations rs_360_ops = {
.owner = THIS_MODULE,
.open = rs_360_open,
.close = rs_360_close,
diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c
index cabd048c863..9851d9eff02 100644
--- a/drivers/serial/crisv10.c
+++ b/drivers/serial/crisv10.c
@@ -4825,7 +4825,7 @@ show_serial_version(void)
/* rs_init inits the driver at boot (using the module_init chain) */
-static struct tty_operations rs_ops = {
+static const struct tty_operations rs_ops = {
.open = rs_open,
.close = rs_close,
.write = rs_write,
diff --git a/drivers/serial/mcfserial.c b/drivers/serial/mcfserial.c
index 832abd3c470..00d7859c167 100644
--- a/drivers/serial/mcfserial.c
+++ b/drivers/serial/mcfserial.c
@@ -1666,7 +1666,7 @@ static void show_serial_version(void)
printk(mcfrs_drivername);
}
-static struct tty_operations mcfrs_ops = {
+static const struct tty_operations mcfrs_ops = {
.open = mcfrs_open,
.close = mcfrs_close,
.write = mcfrs_write,
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 5f7ba1adb30..de5e8930a6f 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -2111,7 +2111,7 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
}
}
-static struct tty_operations uart_ops = {
+static const struct tty_operations uart_ops = {
.open = uart_open,
.close = uart_close,
.write = uart_write,
diff --git a/drivers/tc/zs.c b/drivers/tc/zs.c
index 5e8a27620f6..622881f2676 100644
--- a/drivers/tc/zs.c
+++ b/drivers/tc/zs.c
@@ -1701,7 +1701,7 @@ static void __init probe_sccs(void)
spin_unlock_irqrestore(&zs_lock, flags);
}
-static struct tty_operations serial_ops = {
+static const struct tty_operations serial_ops = {
.open = rs_open,
.close = rs_close,
.write = rs_write,
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index ca90326f2f5..71288295df2 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1120,7 +1120,7 @@ static struct usb_driver acm_driver = {
* TTY driver structures.
*/
-static struct tty_operations acm_ops = {
+static const struct tty_operations acm_ops = {
.open = acm_tty_open,
.close = acm_tty_close,
.write = acm_tty_write,
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index a94c63bef63..3f509beb88e 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -65,7 +65,7 @@ DEFINE_MUTEX(usbfs_mutex);
struct async {
struct list_head asynclist;
struct dev_state *ps;
- pid_t pid;
+ struct pid *pid;
uid_t uid, euid;
unsigned int signr;
unsigned int ifnum;
@@ -225,6 +225,7 @@ static struct async *alloc_async(unsigned int numisoframes)
static void free_async(struct async *as)
{
+ put_pid(as->pid);
kfree(as->urb->transfer_buffer);
kfree(as->urb->setup_packet);
usb_free_urb(as->urb);
@@ -317,7 +318,7 @@ static void async_completed(struct urb *urb, struct pt_regs *regs)
sinfo.si_errno = as->urb->status;
sinfo.si_code = SI_ASYNCIO;
sinfo.si_addr = as->userurb;
- kill_proc_info_as_uid(as->signr, &sinfo, as->pid, as->uid,
+ kill_pid_info_as_uid(as->signr, &sinfo, as->pid, as->uid,
as->euid, as->secid);
}
snoop(&urb->dev->dev, "urb complete\n");
@@ -573,7 +574,7 @@ static int usbdev_open(struct inode *inode, struct file *file)
INIT_LIST_HEAD(&ps->async_completed);
init_waitqueue_head(&ps->wait);
ps->discsignr = 0;
- ps->disc_pid = current->pid;
+ ps->disc_pid = get_pid(task_pid(current));
ps->disc_uid = current->uid;
ps->disc_euid = current->euid;
ps->disccontext = NULL;
@@ -611,6 +612,7 @@ static int usbdev_release(struct inode *inode, struct file *file)
usb_autosuspend_device(dev, 1);
usb_unlock_device(dev);
usb_put_dev(dev);
+ put_pid(ps->disc_pid);
kfree(ps);
return 0;
}
@@ -1063,7 +1065,7 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
as->userbuffer = NULL;
as->signr = uurb->signr;
as->ifnum = ifnum;
- as->pid = current->pid;
+ as->pid = get_pid(task_pid(current));
as->uid = current->uid;
as->euid = current->euid;
security_task_getsecid(current, &as->secid);
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 37f9f5e7425..e658089f7b5 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -318,8 +318,8 @@ static int rh_string (
// id 3 == vendor description
} else if (id == 3) {
- snprintf (buf, sizeof buf, "%s %s %s", system_utsname.sysname,
- system_utsname.release, hcd->driver->description);
+ snprintf (buf, sizeof buf, "%s %s %s", init_utsname()->sysname,
+ init_utsname()->release, hcd->driver->description);
// unsupported IDs --> "protocol stall"
} else
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 7c77c2d8d30..b5d6a79af0b 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -699,7 +699,7 @@ static void usbfs_remove_device(struct usb_device *dev)
sinfo.si_errno = EPIPE;
sinfo.si_code = SI_ASYNCIO;
sinfo.si_addr = ds->disccontext;
- kill_proc_info_as_uid(ds->discsignr, &sinfo, ds->disc_pid, ds->disc_uid, ds->disc_euid, ds->secid);
+ kill_pid_info_as_uid(ds->discsignr, &sinfo, ds->disc_pid, ds->disc_uid, ds->disc_euid, ds->secid);
}
}
}
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index f69df137ec0..13322e33f91 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -139,7 +139,7 @@ struct dev_state {
struct list_head async_completed;
wait_queue_head_t wait; /* wake up if a request completed */
unsigned int discsignr;
- pid_t disc_pid;
+ struct pid *disc_pid;
uid_t disc_uid, disc_euid;
void __user *disccontext;
unsigned long ifclaimed;
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index 366dc0a9e52..1c17d26d03b 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -2260,7 +2260,7 @@ eth_bind (struct usb_gadget *gadget)
return -ENODEV;
}
snprintf (manufacturer, sizeof manufacturer, "%s %s/%s",
- system_utsname.sysname, system_utsname.release,
+ init_utsname()->sysname, init_utsname()->release,
gadget->name);
/* If there's an RNDIS configuration, that's what Windows wants to
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index c83d3b6c68f..8b975d15538 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -4001,7 +4001,7 @@ static int __init fsg_bind(struct usb_gadget *gadget)
usb_gadget_set_selfpowered(gadget);
snprintf(manufacturer, sizeof manufacturer, "%s %s with %s",
- system_utsname.sysname, system_utsname.release,
+ init_utsname()->sysname, init_utsname()->release,
gadget->name);
/* On a real device, serial[] would be loaded from permanent
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
index b68cecd5741..83601d4009e 100644
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -1189,7 +1189,7 @@ static int __devinit gmidi_bind(struct usb_gadget *gadget)
strlcpy(manufacturer, iManufacturer, sizeof(manufacturer));
} else {
snprintf(manufacturer, sizeof(manufacturer), "%s %s with %s",
- system_utsname.sysname, system_utsname.release,
+ init_utsname()->sysname, init_utsname()->release,
gadget->name);
}
if (iProduct) {
diff --git a/drivers/usb/gadget/serial.c b/drivers/usb/gadget/serial.c
index b893e3118e1..208e55a667a 100644
--- a/drivers/usb/gadget/serial.c
+++ b/drivers/usb/gadget/serial.c
@@ -271,7 +271,7 @@ static unsigned int use_acm = GS_DEFAULT_USE_ACM;
/* tty driver struct */
-static struct tty_operations gs_tty_ops = {
+static const struct tty_operations gs_tty_ops = {
.open = gs_open,
.close = gs_close,
.write = gs_write,
@@ -1434,7 +1434,7 @@ static int __init gs_bind(struct usb_gadget *gadget)
return -ENOMEM;
snprintf(manufacturer, sizeof(manufacturer), "%s %s with %s",
- system_utsname.sysname, system_utsname.release,
+ init_utsname()->sysname, init_utsname()->release,
gadget->name);
memset(dev, 0, sizeof(struct gs_dev));
diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c
index b7018ee487e..0f809dd6849 100644
--- a/drivers/usb/gadget/zero.c
+++ b/drivers/usb/gadget/zero.c
@@ -1242,7 +1242,7 @@ autoconf_fail:
EP_OUT_NAME, EP_IN_NAME);
snprintf (manufacturer, sizeof manufacturer, "%s %s with %s",
- system_utsname.sysname, system_utsname.release,
+ init_utsname()->sysname, init_utsname()->release,
gadget->name);
return 0;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 0222d92842b..8006e51c34b 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -1015,7 +1015,7 @@ void usb_serial_disconnect(struct usb_interface *interface)
dev_info(dev, "device disconnected\n");
}
-static struct tty_operations serial_ops = {
+static const struct tty_operations serial_ops = {
.open = serial_open,
.close = serial_close,
.write = serial_write,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0e9ba0b9d71..c78762051da 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -772,12 +772,12 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
separator[1] = 0;
memset(vol->source_rfc1001_name,0x20,15);
- for(i=0;i < strnlen(system_utsname.nodename,15);i++) {
+ for(i=0;i < strnlen(utsname()->nodename,15);i++) {
/* does not have to be a perfect mapping since the field is
informational, only used for servers that do not support
port 445 and it can be overridden at mount time */
vol->source_rfc1001_name[i] =
- toupper(system_utsname.nodename[i]);
+ toupper(utsname()->nodename[i]);
}
vol->source_rfc1001_name[15] = 0;
/* null target name indicates to use *SMBSERVR default called name
@@ -2153,7 +2153,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
32, nls_codepage);
bcc_ptr += 2 * bytes_returned;
bytes_returned =
- cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release,
+ cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release,
32, nls_codepage);
bcc_ptr += 2 * bytes_returned;
bcc_ptr += 2;
@@ -2180,8 +2180,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
}
strcpy(bcc_ptr, "Linux version ");
bcc_ptr += strlen("Linux version ");
- strcpy(bcc_ptr, system_utsname.release);
- bcc_ptr += strlen(system_utsname.release) + 1;
+ strcpy(bcc_ptr, utsname()->release);
+ bcc_ptr += strlen(utsname()->release) + 1;
strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
}
@@ -2445,7 +2445,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
32, nls_codepage);
bcc_ptr += 2 * bytes_returned;
bytes_returned =
- cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 32,
+ cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32,
nls_codepage);
bcc_ptr += 2 * bytes_returned;
bcc_ptr += 2; /* null terminate Linux version */
@@ -2462,8 +2462,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
} else { /* ASCII */
strcpy(bcc_ptr, "Linux version ");
bcc_ptr += strlen("Linux version ");
- strcpy(bcc_ptr, system_utsname.release);
- bcc_ptr += strlen(system_utsname.release) + 1;
+ strcpy(bcc_ptr, utsname()->release);
+ bcc_ptr += strlen(utsname()->release) + 1;
strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
bcc_ptr++; /* empty domain field */
@@ -2836,7 +2836,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
32, nls_codepage);
bcc_ptr += 2 * bytes_returned;
bytes_returned =
- cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 32,
+ cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32,
nls_codepage);
bcc_ptr += 2 * bytes_returned;
bcc_ptr += 2; /* null term version string */
@@ -2888,8 +2888,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
strcpy(bcc_ptr, "Linux version ");
bcc_ptr += strlen("Linux version ");
- strcpy(bcc_ptr, system_utsname.release);
- bcc_ptr += strlen(system_utsname.release) + 1;
+ strcpy(bcc_ptr, utsname()->release);
+ bcc_ptr += strlen(utsname()->release) + 1;
strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
bcc_ptr++; /* null domain */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index d1705ab8136..22b4c35dcfe 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -111,7 +111,7 @@ static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
nls_cp);
bcc_ptr += 2 * bytes_ret;
- bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release,
+ bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release,
32, nls_cp);
bcc_ptr += 2 * bytes_ret;
bcc_ptr += 2; /* trailing null */
@@ -158,8 +158,8 @@ static void ascii_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
strcpy(bcc_ptr, "Linux version ");
bcc_ptr += strlen("Linux version ");
- strcpy(bcc_ptr, system_utsname.release);
- bcc_ptr += strlen(system_utsname.release) + 1;
+ strcpy(bcc_ptr, init_utsname()->release);
+ bcc_ptr += strlen(init_utsname()->release) + 1;
strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
diff --git a/fs/compat.c b/fs/compat.c
index 13fb08d096c..d98c96f4a44 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -56,8 +56,6 @@
int compat_log = 1;
-extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
-
int compat_printk(const char *fmt, ...)
{
va_list ap;
diff --git a/fs/dnotify.c b/fs/dnotify.c
index f932591df5a..2b0442db67e 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -92,7 +92,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
prev = &odn->dn_next;
}
- error = f_setown(filp, current->pid, 0);
+ error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
if (error)
goto out_free;
diff --git a/fs/exec.c b/fs/exec.c
index 6270f8f20a6..d993ea1a81a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1318,7 +1318,7 @@ static void format_corename(char *corename, const char *pattern, long signr)
case 'h':
down_read(&uts_sem);
rc = snprintf(out_ptr, out_end - out_ptr,
- "%s", system_utsname.nodename);
+ "%s", utsname()->nodename);
up_read(&uts_sem);
if (rc > out_end - out_ptr)
goto out;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index d35cbc6bc11..e4f26165f12 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -250,19 +250,22 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
return error;
}
-static void f_modown(struct file *filp, unsigned long pid,
+static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
uid_t uid, uid_t euid, int force)
{
write_lock_irq(&filp->f_owner.lock);
if (force || !filp->f_owner.pid) {
- filp->f_owner.pid = pid;
+ put_pid(filp->f_owner.pid);
+ filp->f_owner.pid = get_pid(pid);
+ filp->f_owner.pid_type = type;
filp->f_owner.uid = uid;
filp->f_owner.euid = euid;
}
write_unlock_irq(&filp->f_owner.lock);
}
-int f_setown(struct file *filp, unsigned long arg, int force)
+int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
+ int force)
{
int err;
@@ -270,15 +273,44 @@ int f_setown(struct file *filp, unsigned long arg, int force)
if (err)
return err;
- f_modown(filp, arg, current->uid, current->euid, force);
+ f_modown(filp, pid, type, current->uid, current->euid, force);
return 0;
}
+EXPORT_SYMBOL(__f_setown);
+int f_setown(struct file *filp, unsigned long arg, int force)
+{
+ enum pid_type type;
+ struct pid *pid;
+ int who = arg;
+ int result;
+ type = PIDTYPE_PID;
+ if (who < 0) {
+ type = PIDTYPE_PGID;
+ who = -who;
+ }
+ rcu_read_lock();
+ pid = find_pid(who);
+ result = __f_setown(filp, pid, type, force);
+ rcu_read_unlock();
+ return result;
+}
EXPORT_SYMBOL(f_setown);
void f_delown(struct file *filp)
{
- f_modown(filp, 0, 0, 0, 1);
+ f_modown(filp, NULL, PIDTYPE_PID, 0, 0, 1);
+}
+
+pid_t f_getown(struct file *filp)
+{
+ pid_t pid;
+ read_lock(&filp->f_owner.lock);
+ pid = pid_nr(filp->f_owner.pid);
+ if (filp->f_owner.pid_type == PIDTYPE_PGID)
+ pid = -pid;
+ read_unlock(&filp->f_owner.lock);
+ return pid;
}
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
@@ -319,7 +351,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
* current syscall conventions, the only way
* to fix this will be in libc.
*/
- err = filp->f_owner.pid;
+ err = f_getown(filp);
force_successful_syscall_return();
break;
case F_SETOWN:
@@ -470,24 +502,19 @@ static void send_sigio_to_task(struct task_struct *p,
void send_sigio(struct fown_struct *fown, int fd, int band)
{
struct task_struct *p;
- int pid;
+ enum pid_type type;
+ struct pid *pid;
read_lock(&fown->lock);
+ type = fown->pid_type;
pid = fown->pid;
if (!pid)
goto out_unlock_fown;
read_lock(&tasklist_lock);
- if (pid > 0) {
- p = find_task_by_pid(pid);
- if (p) {
- send_sigio_to_task(p, fown, fd, band);
- }
- } else {
- do_each_task_pid(-pid, PIDTYPE_PGID, p) {
- send_sigio_to_task(p, fown, fd, band);
- } while_each_task_pid(-pid, PIDTYPE_PGID, p);
- }
+ do_each_pid_task(pid, type, p) {
+ send_sigio_to_task(p, fown, fd, band);
+ } while_each_pid_task(pid, type, p);
read_unlock(&tasklist_lock);
out_unlock_fown:
read_unlock(&fown->lock);
@@ -503,9 +530,12 @@ static void send_sigurg_to_task(struct task_struct *p,
int send_sigurg(struct fown_struct *fown)
{
struct task_struct *p;
- int pid, ret = 0;
+ enum pid_type type;
+ struct pid *pid;
+ int ret = 0;
read_lock(&fown->lock);
+ type = fown->pid_type;
pid = fown->pid;
if (!pid)
goto out_unlock_fown;
@@ -513,16 +543,9 @@ int send_sigurg(struct fown_struct *fown)
ret = 1;
read_lock(&tasklist_lock);
- if (pid > 0) {
- p = find_task_by_pid(pid);
- if (p) {
- send_sigurg_to_task(p, fown);
- }
- } else {
- do_each_task_pid(-pid, PIDTYPE_PGID, p) {
- send_sigurg_to_task(p, fown);
- } while_each_task_pid(-pid, PIDTYPE_PGID, p);
- }
+ do_each_pid_task(pid, type, p) {
+ send_sigurg_to_task(p, fown);
+ } while_each_pid_task(pid, type, p);
read_unlock(&tasklist_lock);
out_unlock_fown:
read_unlock(&fown->lock);
diff --git a/fs/file_table.c b/fs/file_table.c
index bc35a40417d..24f25a057d9 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -174,6 +174,7 @@ void fastcall __fput(struct file *file)
fops_put(file->f_op);
if (file->f_mode & FMODE_WRITE)
put_write_access(inode);
+ put_pid(file->f_owner.pid);
file_kill(file);
file->f_dentry = NULL;
file->f_vfsmnt = NULL;
diff --git a/fs/inode.c b/fs/inode.c
index ada7643104e..bf6bec4e54f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -657,7 +657,7 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
return inode;
}
-static inline unsigned long hash(struct super_block *sb, unsigned long hashval)
+static unsigned long hash(struct super_block *sb, unsigned long hashval)
{
unsigned long tmp;
@@ -1003,7 +1003,7 @@ void generic_delete_inode(struct inode *inode)
list_del_init(&inode->i_list);
list_del_init(&inode->i_sb_list);
- inode->i_state|=I_FREEING;
+ inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
@@ -1210,13 +1210,15 @@ void file_update_time(struct file *file)
return;
now = current_fs_time(inode->i_sb);
- if (!timespec_equal(&inode->i_mtime, &now))
+ if (!timespec_equal(&inode->i_mtime, &now)) {
+ inode->i_mtime = now;
sync_it = 1;
- inode->i_mtime = now;
+ }
- if (!timespec_equal(&inode->i_ctime, &now))
+ if (!timespec_equal(&inode->i_ctime, &now)) {
+ inode->i_ctime = now;
sync_it = 1;
- inode->i_ctime = now;
+ }
if (sync_it)
mark_inode_dirty_sync(inode);
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index f95cc3f3c42..87e1d03e826 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -202,7 +202,7 @@ reclaimer(void *ptr)
/* This one ensures that our parent doesn't terminate while the
* reclaim is in progress */
lock_kernel();
- lockd_up();
+ lockd_up(0); /* note: this cannot fail as lockd is already running */
nlmclnt_prepare_reclaim(host);
/* First, reclaim all locks that have been marked. */
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 271e2165fff..0116729cec5 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -129,11 +129,11 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
nlmclnt_next_cookie(&argp->cookie);
argp->state = nsm_local_state;
memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
- lock->caller = system_utsname.nodename;
+ lock->caller = utsname()->nodename;
lock->oh.data = req->a_owner;
lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
(unsigned int)fl->fl_u.nfs_fl.owner->pid,
- system_utsname.nodename);
+ utsname()->nodename);
lock->svid = fl->fl_u.nfs_fl.owner->pid;
lock->fl.fl_start = fl->fl_start;
lock->fl.fl_end = fl->fl_end;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 5954dcb497e..a816b920d43 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -145,7 +145,7 @@ xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
*/
sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
if (!(p = xdr_encode_string(p, buffer))
- || !(p = xdr_encode_string(p, system_utsname.nodename)))
+ || !(p = xdr_encode_string(p, utsname()->nodename)))
return ERR_PTR(-EIO);
*p++ = htonl(argp->prog);
*p++ = htonl(argp->vers);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 9a991b52c64..3cc369e5693 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -31,6 +31,7 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
+#include <net/ip.h>
#include <linux/lockd/lockd.h>
#include <linux/nfs.h>
@@ -46,6 +47,7 @@ EXPORT_SYMBOL(nlmsvc_ops);
static DEFINE_MUTEX(nlmsvc_mutex);
static unsigned int nlmsvc_users;
static pid_t nlmsvc_pid;
+static struct svc_serv *nlmsvc_serv;
int nlmsvc_grace_period;
unsigned long nlmsvc_timeout;
@@ -96,7 +98,6 @@ static inline void clear_grace_period(void)
static void
lockd(struct svc_rqst *rqstp)
{
- struct svc_serv *serv = rqstp->rq_server;
int err = 0;
unsigned long grace_period_expire;
@@ -112,6 +113,7 @@ lockd(struct svc_rqst *rqstp)
* Let our maker know we're running.
*/
nlmsvc_pid = current->pid;
+ nlmsvc_serv = rqstp->rq_server;
complete(&lockd_start_done);
daemonize("lockd");
@@ -161,7 +163,7 @@ lockd(struct svc_rqst *rqstp)
* Find a socket with data available and call its
* recvfrom routine.
*/
- err = svc_recv(serv, rqstp, timeout);
+ err = svc_recv(rqstp, timeout);
if (err == -EAGAIN || err == -EINTR)
continue;
if (err < 0) {
@@ -174,7 +176,7 @@ lockd(struct svc_rqst *rqstp)
dprintk("lockd: request from %08x\n",
(unsigned)ntohl(rqstp->rq_addr.sin_addr.s_addr));
- svc_process(serv, rqstp);
+ svc_process(rqstp);
}
@@ -189,6 +191,7 @@ lockd(struct svc_rqst *rqstp)
nlmsvc_invalidate_all();
nlm_shutdown_hosts();
nlmsvc_pid = 0;
+ nlmsvc_serv = NULL;
} else
printk(KERN_DEBUG
"lockd: new process, skipping host shutdown\n");
@@ -205,54 +208,77 @@ lockd(struct svc_rqst *rqstp)
module_put_and_exit(0);
}
+
+static int find_socket(struct svc_serv *serv, int proto)
+{
+ struct svc_sock *svsk;
+ int found = 0;
+ list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
+ if (svsk->sk_sk->sk_protocol == proto) {
+ found = 1;
+ break;
+ }
+ return found;
+}
+
+static int make_socks(struct svc_serv *serv, int proto)
+{
+ /* Make any sockets that are needed but not present.
+ * If nlm_udpport or nlm_tcpport were set as module
+ * options, make those sockets unconditionally
+ */
+ static int warned;
+ int err = 0;
+ if (proto == IPPROTO_UDP || nlm_udpport)
+ if (!find_socket(serv, IPPROTO_UDP))
+ err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport);
+ if (err == 0 && (proto == IPPROTO_TCP || nlm_tcpport))
+ if (!find_socket(serv, IPPROTO_TCP))
+ err= svc_makesock(serv, IPPROTO_TCP, nlm_tcpport);
+ if (!err)
+ warned = 0;
+ else if (warned++ == 0)
+ printk(KERN_WARNING
+ "lockd_up: makesock failed, error=%d\n", err);
+ return err;
+}
+
/*
* Bring up the lockd process if it's not already up.
*/
int
-lockd_up(void)
+lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
{
- static int warned;
struct svc_serv * serv;
int error = 0;
mutex_lock(&nlmsvc_mutex);
/*
- * Unconditionally increment the user count ... this is
- * the number of clients who _want_ a lockd process.
- */
- nlmsvc_users++;
- /*
* Check whether we're already up and running.
*/
- if (nlmsvc_pid)
+ if (nlmsvc_pid) {
+ if (proto)
+ error = make_socks(nlmsvc_serv, proto);
goto out;
+ }
/*
* Sanity check: if there's no pid,
* we should be the first user ...
*/
- if (nlmsvc_users > 1)
+ if (nlmsvc_users)
printk(KERN_WARNING
"lockd_up: no pid, %d users??\n", nlmsvc_users);
error = -ENOMEM;
- serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE);
+ serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL);
if (!serv) {
printk(KERN_WARNING "lockd_up: create service failed\n");
goto out;
}
- if ((error = svc_makesock(serv, IPPROTO_UDP, nlm_udpport)) < 0
-#ifdef CONFIG_NFSD_TCP
- || (error = svc_makesock(serv, IPPROTO_TCP, nlm_tcpport)) < 0
-#endif
- ) {
- if (warned++ == 0)
- printk(KERN_WARNING
- "lockd_up: makesock failed, error=%d\n", error);
+ if ((error = make_socks(serv, proto)) < 0)
goto destroy_and_out;
- }
- warned = 0;
/*
* Create the kernel thread and wait for it to start.
@@ -272,6 +298,8 @@ lockd_up(void)
destroy_and_out:
svc_destroy(serv);
out:
+ if (!error)
+ nlmsvc_users++;
mutex_unlock(&nlmsvc_mutex);
return error;
}
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c9d419703cf..93c00ee7189 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -325,7 +325,7 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
{
locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
- call->a_args.lock.caller = system_utsname.nodename;
+ call->a_args.lock.caller = utsname()->nodename;
call->a_args.lock.oh.len = lock->oh.len;
/* set default data area */
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 033ea4ac2c3..61c46facf25 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -515,7 +515,7 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
*/
#define NLM_void_sz 0
#define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
-#define NLM_caller_sz 1+XDR_QUADLEN(sizeof(system_utsname.nodename))
+#define NLM_caller_sz 1+XDR_QUADLEN(sizeof(utsname()->nodename))
#define NLM_netobj_sz 1+XDR_QUADLEN(XDR_MAX_NETOBJ)
/* #define NLM_owner_sz 1+XDR_QUADLEN(NLM_MAXOWNER) */
#define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE)
diff --git a/fs/locks.c b/fs/locks.c
index 21dfadfca2b..e0b6a80649a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1514,7 +1514,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
goto out_unlock;
}
- error = f_setown(filp, current->pid, 0);
+ error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
out_unlock:
unlock_kernel();
return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index 66d921e14fe..55442a6cf22 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -133,7 +133,7 @@ struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
static inline int check_mnt(struct vfsmount *mnt)
{
- return mnt->mnt_namespace == current->namespace;
+ return mnt->mnt_namespace == current->nsproxy->namespace;
}
static void touch_namespace(struct namespace *ns)
@@ -830,7 +830,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
if (parent_nd) {
detach_mnt(source_mnt, parent_nd);
attach_mnt(source_mnt, nd);
- touch_namespace(current->namespace);
+ touch_namespace(current->nsproxy->namespace);
} else {
mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
commit_tree(source_mnt);
@@ -1441,7 +1441,7 @@ dput_out:
*/
struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
{
- struct namespace *namespace = tsk->namespace;
+ struct namespace *namespace = tsk->nsproxy->namespace;
struct namespace *new_ns;
struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
struct vfsmount *p, *q;
@@ -1508,7 +1508,7 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
int copy_namespace(int flags, struct task_struct *tsk)
{
- struct namespace *namespace = tsk->namespace;
+ struct namespace *namespace = tsk->nsproxy->namespace;
struct namespace *new_ns;
int err = 0;
@@ -1531,7 +1531,7 @@ int copy_namespace(int flags, struct task_struct *tsk)
goto out;
}
- tsk->namespace = new_ns;
+ tsk->nsproxy->namespace = new_ns;
out:
put_namespace(namespace);
@@ -1754,7 +1754,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
detach_mnt(user_nd.mnt, &root_parent);
attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */
attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */
- touch_namespace(current->namespace);
+ touch_namespace(current->nsproxy->namespace);
spin_unlock(&vfsmount_lock);
chroot_fs_refs(&user_nd, &new_nd);
security_sb_post_pivotroot(&user_nd, &new_nd);
@@ -1780,7 +1780,6 @@ static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
struct namespace *namespace;
- struct task_struct *g, *p;
mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
if (IS_ERR(mnt))
@@ -1796,13 +1795,8 @@ static void __init init_mount_tree(void)
namespace->root = mnt;
mnt->mnt_namespace = namespace;
- init_task.namespace = namespace;
- read_lock(&tasklist_lock);
- do_each_thread(g, p) {
- get_namespace(namespace);
- p->namespace = namespace;
- } while_each_thread(g, p);
- read_unlock(&tasklist_lock);
+ init_task.nsproxy->namespace = namespace;
+ get_namespace(namespace);
set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
set_fs_root(current->fs, namespace->root, namespace->root->mnt_root);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index a3ee11364db..7933e2e99db 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -58,7 +58,6 @@ module_param_call(callback_tcpport, param_set_port, param_get_int,
*/
static void nfs_callback_svc(struct svc_rqst *rqstp)
{
- struct svc_serv *serv = rqstp->rq_server;
int err;
__module_get(THIS_MODULE);
@@ -80,7 +79,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
/*
* Listen for a request on the socket
*/
- err = svc_recv(serv, rqstp, MAX_SCHEDULE_TIMEOUT);
+ err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
if (err == -EAGAIN || err == -EINTR)
continue;
if (err < 0) {
@@ -91,7 +90,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
}
dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__,
NIPQUAD(rqstp->rq_addr.sin_addr.s_addr));
- svc_process(serv, rqstp);
+ svc_process(rqstp);
}
svc_exit_thread(rqstp);
@@ -116,7 +115,7 @@ int nfs_callback_up(void)
goto out;
init_completion(&nfs_callback_info.started);
init_completion(&nfs_callback_info.stopped);
- serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE);
+ serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
ret = -ENOMEM;
if (!serv)
goto out_err;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ec1938d4b81..8106f3b29e4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -460,7 +460,8 @@ static int nfs_start_lockd(struct nfs_server *server)
goto out;
if (server->flags & NFS_MOUNT_NONLM)
goto out;
- error = lockd_up();
+ error = lockd_up((server->flags & NFS_MOUNT_TCP) ?
+ IPPROTO_TCP : IPPROTO_UDP);
if (error < 0)
server->flags |= NFS_MOUNT_NONLM;
else
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index c0a754ecdee..1d656a64519 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -312,7 +312,7 @@ static int __init root_nfs_name(char *name)
/* Override them by options set on kernel command-line */
root_nfs_parse(name, buf);
- cp = system_utsname.nodename;
+ cp = utsname()->nodename;
if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
return -1;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 01bc68c628a..cfe141e5d75 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -370,7 +370,7 @@ static int check_export(struct inode *inode, int flags)
*/
if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
!(flags & NFSEXP_FSID)) {
- dprintk("exp_export: export of non-dev fs without fsid");
+ dprintk("exp_export: export of non-dev fs without fsid\n");
return -EINVAL;
}
if (!inode->i_sb->s_export_op) {
@@ -1078,6 +1078,7 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
/* Iterator */
static void *e_start(struct seq_file *m, loff_t *pos)
+ __acquires(svc_export_cache.hash_lock)
{
loff_t n = *pos;
unsigned hash, export;
@@ -1086,7 +1087,7 @@ static void *e_start(struct seq_file *m, loff_t *pos)
exp_readlock();
read_lock(&svc_export_cache.hash_lock);
if (!n--)
- return (void *)1;
+ return SEQ_START_TOKEN;
hash = n >> 32;
export = n & ((1LL<<32) - 1);
@@ -1110,7 +1111,7 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos)
struct cache_head *ch = p;
int hash = (*pos >> 32);
- if (p == (void *)1)
+ if (p == SEQ_START_TOKEN)
hash = 0;
else if (ch->next == NULL) {
hash++;
@@ -1131,6 +1132,7 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos)
}
static void e_stop(struct seq_file *m, void *p)
+ __releases(svc_export_cache.hash_lock)
{
read_unlock(&svc_export_cache.hash_lock);
exp_readunlock();
@@ -1178,15 +1180,13 @@ static int e_show(struct seq_file *m, void *p)
{
struct cache_head *cp = p;
struct svc_export *exp = container_of(cp, struct svc_export, h);
- svc_client *clp;
- if (p == (void *)1) {
+ if (p == SEQ_START_TOKEN) {
seq_puts(m, "# Version 1.1\n");
seq_puts(m, "# Path Client(Flags) # IPs\n");
return 0;
}
- clp = exp->ex_client;
cache_get(&exp->h);
if (cache_check(&svc_export_cache, &exp->h, NULL))
return 0;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 8583d99ee74..f6ca9fb3fc6 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -131,7 +131,7 @@ xdr_error: \
#define READ_BUF(nbytes) do { \
p = xdr_inline_decode(xdr, nbytes); \
if (!p) { \
- dprintk("NFSD: %s: reply buffer overflowed in line %d.", \
+ dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
__FUNCTION__, __LINE__); \
return -EIO; \
} \
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ee4eff27aed..15ded7a30a7 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -600,7 +600,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_se
&setattr->sa_stateid, CHECK_FH | WR_STATE, NULL);
nfs4_unlock_state();
if (status) {
- dprintk("NFSD: nfsd4_setattr: couldn't process stateid!");
+ dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
}
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7046ac9cf97..5c6a477c20e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -23,10 +23,14 @@
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/ctype.h>
#include <linux/nfs.h>
#include <linux/nfsd_idmap.h>
+#include <linux/lockd/bind.h>
#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcsock.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/cache.h>
#include <linux/nfsd/xdr.h>
@@ -35,8 +39,6 @@
#include <asm/uaccess.h>
-unsigned int nfsd_versbits = ~0;
-
/*
* We have a single directory with 9 nodes in it.
*/
@@ -52,7 +54,9 @@ enum {
NFSD_List,
NFSD_Fh,
NFSD_Threads,
+ NFSD_Pool_Threads,
NFSD_Versions,
+ NFSD_Ports,
/*
* The below MUST come last. Otherwise we leave a hole in nfsd_files[]
* with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
@@ -75,7 +79,9 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size);
static ssize_t write_getfs(struct file *file, char *buf, size_t size);
static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
static ssize_t write_threads(struct file *file, char *buf, size_t size);
+static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
static ssize_t write_versions(struct file *file, char *buf, size_t size);
+static ssize_t write_ports(struct file *file, char *buf, size_t size);
#ifdef CONFIG_NFSD_V4
static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
@@ -91,7 +97,9 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_Getfs] = write_getfs,
[NFSD_Fh] = write_filehandle,
[NFSD_Threads] = write_threads,
+ [NFSD_Pool_Threads] = write_pool_threads,
[NFSD_Versions] = write_versions,
+ [NFSD_Ports] = write_ports,
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = write_leasetime,
[NFSD_RecoveryDir] = write_recoverydir,
@@ -358,6 +366,72 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
return strlen(buf);
}
+extern int nfsd_nrpools(void);
+extern int nfsd_get_nrthreads(int n, int *);
+extern int nfsd_set_nrthreads(int n, int *);
+
+static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
+{
+ /* if size > 0, look for an array of number of threads per node
+ * and apply them then write out number of threads per node as reply
+ */
+ char *mesg = buf;
+ int i;
+ int rv;
+ int len;
+ int npools = nfsd_nrpools();
+ int *nthreads;
+
+ if (npools == 0) {
+ /*
+ * NFS is shut down. The admin can start it by
+ * writing to the threads file but NOT the pool_threads
+ * file, sorry. Report zero threads.
+ */
+ strcpy(buf, "0\n");
+ return strlen(buf);
+ }
+
+ nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
+ if (nthreads == NULL)
+ return -ENOMEM;
+
+ if (size > 0) {
+ for (i = 0; i < npools; i++) {
+ rv = get_int(&mesg, &nthreads[i]);
+ if (rv == -ENOENT)
+ break; /* fewer numbers than pools */
+ if (rv)
+ goto out_free; /* syntax error */
+ rv = -EINVAL;
+ if (nthreads[i] < 0)
+ goto out_free;
+ }
+ rv = nfsd_set_nrthreads(i, nthreads);
+ if (rv)
+ goto out_free;
+ }
+
+ rv = nfsd_get_nrthreads(npools, nthreads);
+ if (rv)
+ goto out_free;
+
+ mesg = buf;
+ size = SIMPLE_TRANSACTION_LIMIT;
+ for (i = 0; i < npools && size > 0; i++) {
+ snprintf(mesg, size, "%d%c", nthreads[i], (i == npools-1 ? '\n' : ' '));
+ len = strlen(mesg);
+ size -= len;
+ mesg += len;
+ }
+
+ return (mesg-buf);
+
+out_free:
+ kfree(nthreads);
+ return rv;
+}
+
static ssize_t write_versions(struct file *file, char *buf, size_t size)
{
/*
@@ -372,6 +446,10 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
if (size>0) {
if (nfsd_serv)
+ /* Cannot change versions without updating
+ * nfsd_serv->sv_xdrsize, and reallocing
+ * rq_argp and rq_resp
+ */
return -EBUSY;
if (buf[size-1] != '\n')
return -EINVAL;
@@ -390,10 +468,7 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
case 2:
case 3:
case 4:
- if (sign != '-')
- NFSCTL_VERSET(nfsd_versbits, num);
- else
- NFSCTL_VERUNSET(nfsd_versbits, num);
+ nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET);
break;
default:
return -EINVAL;
@@ -404,16 +479,15 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
/* If all get turned off, turn them back on, as
* having no versions is BAD
*/
- if ((nfsd_versbits & NFSCTL_VERALL)==0)
- nfsd_versbits = NFSCTL_VERALL;
+ nfsd_reset_versions();
}
/* Now write current state into reply buffer */
len = 0;
sep = "";
for (num=2 ; num <= 4 ; num++)
- if (NFSCTL_VERISSET(NFSCTL_VERALL, num)) {
+ if (nfsd_vers(num, NFSD_AVAIL)) {
len += sprintf(buf+len, "%s%c%d", sep,
- NFSCTL_VERISSET(nfsd_versbits, num)?'+':'-',
+ nfsd_vers(num, NFSD_TEST)?'+':'-',
num);
sep = " ";
}
@@ -421,6 +495,62 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
return len;
}
+static ssize_t write_ports(struct file *file, char *buf, size_t size)
+{
+ if (size == 0) {
+ int len = 0;
+ lock_kernel();
+ if (nfsd_serv)
+ len = svc_sock_names(buf, nfsd_serv, NULL);
+ unlock_kernel();
+ return len;
+ }
+ /* Either a single 'fd' number is written, in which
+ * case it must be for a socket of a supported family/protocol,
+ * and we use it as an nfsd socket, or
+ * A '-' followed by the 'name' of a socket in which case
+ * we close the socket.
+ */
+ if (isdigit(buf[0])) {
+ char *mesg = buf;
+ int fd;
+ int err;
+ err = get_int(&mesg, &fd);
+ if (err)
+ return -EINVAL;
+ if (fd < 0)
+ return -EINVAL;
+ err = nfsd_create_serv();
+ if (!err) {
+ int proto = 0;
+ err = lockd_up(proto);
+ if (!err) {
+ err = svc_addsock(nfsd_serv, fd, buf, &proto);
+ if (err)
+ lockd_down();
+ }
+ /* Decrease the count, but don't shutdown the
+ * the service
+ */
+ nfsd_serv->sv_nrthreads--;
+ }
+ return err;
+ }
+ if (buf[0] == '-') {
+ char *toclose = kstrdup(buf+1, GFP_KERNEL);
+ int len = 0;
+ if (!toclose)
+ return -ENOMEM;
+ lock_kernel();
+ if (nfsd_serv)
+ len = svc_sock_names(buf, nfsd_serv, toclose);
+ unlock_kernel();
+ kfree(toclose);
+ return len;
+ }
+ return -EINVAL;
+}
+
#ifdef CONFIG_NFSD_V4
extern time_t nfs4_leasetime(void);
@@ -483,7 +613,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_List] = {"exports", &exports_operations, S_IRUGO},
[NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index ec1decf29ba..19443056ec3 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -57,12 +57,6 @@ static atomic_t nfsd_busy;
static unsigned long nfsd_last_call;
static DEFINE_SPINLOCK(nfsd_call_lock);
-struct nfsd_list {
- struct list_head list;
- struct task_struct *task;
-};
-static struct list_head nfsd_list = LIST_HEAD_INIT(nfsd_list);
-
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
static struct svc_stat nfsd_acl_svcstats;
static struct svc_version * nfsd_acl_version[] = {
@@ -117,6 +111,32 @@ struct svc_program nfsd_program = {
};
+int nfsd_vers(int vers, enum vers_op change)
+{
+ if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
+ return -1;
+ switch(change) {
+ case NFSD_SET:
+ nfsd_versions[vers] = nfsd_version[vers];
+ break;
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+ if (vers < NFSD_ACL_NRVERS)
+ nfsd_acl_version[vers] = nfsd_acl_version[vers];
+#endif
+ case NFSD_CLEAR:
+ nfsd_versions[vers] = NULL;
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+ if (vers < NFSD_ACL_NRVERS)
+ nfsd_acl_version[vers] = NULL;
+#endif
+ break;
+ case NFSD_TEST:
+ return nfsd_versions[vers] != NULL;
+ case NFSD_AVAIL:
+ return nfsd_version[vers] != NULL;
+ }
+ return 0;
+}
/*
* Maximum number of nfsd processes
*/
@@ -130,16 +150,175 @@ int nfsd_nrthreads(void)
return nfsd_serv->sv_nrthreads;
}
+static int killsig; /* signal that was used to kill last nfsd */
+static void nfsd_last_thread(struct svc_serv *serv)
+{
+ /* When last nfsd thread exits we need to do some clean-up */
+ struct svc_sock *svsk;
+ list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
+ lockd_down();
+ nfsd_serv = NULL;
+ nfsd_racache_shutdown();
+ nfs4_state_shutdown();
+
+ printk(KERN_WARNING "nfsd: last server has exited\n");
+ if (killsig != SIG_NOCLEAN) {
+ printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
+ nfsd_export_flush();
+ }
+}
+
+void nfsd_reset_versions(void)
+{
+ int found_one = 0;
+ int i;
+
+ for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
+ if (nfsd_program.pg_vers[i])
+ found_one = 1;
+ }
+
+ if (!found_one) {
+ for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
+ nfsd_program.pg_vers[i] = nfsd_version[i];
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+ for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
+ nfsd_acl_program.pg_vers[i] =
+ nfsd_acl_version[i];
+#endif
+ }
+}
+
+int nfsd_create_serv(void)
+{
+ int err = 0;
+ lock_kernel();
+ if (nfsd_serv) {
+ svc_get(nfsd_serv);
+ unlock_kernel();
+ return 0;
+ }
+
+ atomic_set(&nfsd_busy, 0);
+ nfsd_serv = svc_create_pooled(&nfsd_program, NFSD_BUFSIZE,
+ nfsd_last_thread,
+ nfsd, SIG_NOCLEAN, THIS_MODULE);
+ if (nfsd_serv == NULL)
+ err = -ENOMEM;
+ unlock_kernel();
+ do_gettimeofday(&nfssvc_boot); /* record boot time */
+ return err;
+}
+
+static int nfsd_init_socks(int port)
+{
+ int error;
+ if (!list_empty(&nfsd_serv->sv_permsocks))
+ return 0;
+
+ error = lockd_up(IPPROTO_UDP);
+ if (error >= 0) {
+ error = svc_makesock(nfsd_serv, IPPROTO_UDP, port);
+ if (error < 0)
+ lockd_down();
+ }
+ if (error < 0)
+ return error;
+
+#ifdef CONFIG_NFSD_TCP
+ error = lockd_up(IPPROTO_TCP);
+ if (error >= 0) {
+ error = svc_makesock(nfsd_serv, IPPROTO_TCP, port);
+ if (error < 0)
+ lockd_down();
+ }
+ if (error < 0)
+ return error;
+#endif
+ return 0;
+}
+
+int nfsd_nrpools(void)
+{
+ if (nfsd_serv == NULL)
+ return 0;
+ else
+ return nfsd_serv->sv_nrpools;
+}
+
+int nfsd_get_nrthreads(int n, int *nthreads)
+{
+ int i = 0;
+
+ if (nfsd_serv != NULL) {
+ for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++)
+ nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads;
+ }
+
+ return 0;
+}
+
+int nfsd_set_nrthreads(int n, int *nthreads)
+{
+ int i = 0;
+ int tot = 0;
+ int err = 0;
+
+ if (nfsd_serv == NULL || n <= 0)
+ return 0;
+
+ if (n > nfsd_serv->sv_nrpools)
+ n = nfsd_serv->sv_nrpools;
+
+ /* enforce a global maximum number of threads */
+ tot = 0;
+ for (i = 0; i < n; i++) {
+ if (nthreads[i] > NFSD_MAXSERVS)
+ nthreads[i] = NFSD_MAXSERVS;
+ tot += nthreads[i];
+ }
+ if (tot > NFSD_MAXSERVS) {
+ /* total too large: scale down requested numbers */
+ for (i = 0; i < n && tot > 0; i++) {
+ int new = nthreads[i] * NFSD_MAXSERVS / tot;
+ tot -= (nthreads[i] - new);
+ nthreads[i] = new;
+ }
+ for (i = 0; i < n && tot > 0; i++) {
+ nthreads[i]--;
+ tot--;
+ }
+ }
+
+ /*
+ * There must always be a thread in pool 0; the admin
+ * can't shut down NFS completely using pool_threads.
+ */
+ if (nthreads[0] == 0)
+ nthreads[0] = 1;
+
+ /* apply the new numbers */
+ lock_kernel();
+ svc_get(nfsd_serv);
+ for (i = 0; i < n; i++) {
+ err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
+ nthreads[i]);
+ if (err)
+ break;
+ }
+ svc_destroy(nfsd_serv);
+ unlock_kernel();
+
+ return err;
+}
+
int
nfsd_svc(unsigned short port, int nrservs)
{
int error;
- int none_left, found_one, i;
- struct list_head *victim;
lock_kernel();
- dprintk("nfsd: creating service: vers 0x%x\n",
- nfsd_versbits);
+ dprintk("nfsd: creating service\n");
error = -EINVAL;
if (nrservs <= 0)
nrservs = 0;
@@ -153,91 +332,20 @@ nfsd_svc(unsigned short port, int nrservs)
error = nfs4_state_start();
if (error<0)
goto out;
- if (!nfsd_serv) {
- /*
- * Use the nfsd_ctlbits to define which
- * versions that will be advertised.
- * If nfsd_ctlbits doesn't list any version,
- * export them all.
- */
- found_one = 0;
-
- for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
- if (NFSCTL_VERISSET(nfsd_versbits, i)) {
- nfsd_program.pg_vers[i] = nfsd_version[i];
- found_one = 1;
- } else
- nfsd_program.pg_vers[i] = NULL;
- }
- if (!found_one) {
- for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
- nfsd_program.pg_vers[i] = nfsd_version[i];
- }
+ nfsd_reset_versions();
+ error = nfsd_create_serv();
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
- found_one = 0;
-
- for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) {
- if (NFSCTL_VERISSET(nfsd_versbits, i)) {
- nfsd_acl_program.pg_vers[i] =
- nfsd_acl_version[i];
- found_one = 1;
- } else
- nfsd_acl_program.pg_vers[i] = NULL;
- }
-
- if (!found_one) {
- for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
- nfsd_acl_program.pg_vers[i] =
- nfsd_acl_version[i];
- }
-#endif
-
- atomic_set(&nfsd_busy, 0);
- error = -ENOMEM;
- nfsd_serv = svc_create(&nfsd_program, NFSD_BUFSIZE);
- if (nfsd_serv == NULL)
- goto out;
- error = svc_makesock(nfsd_serv, IPPROTO_UDP, port);
- if (error < 0)
- goto failure;
+ if (error)
+ goto out;
+ error = nfsd_init_socks(port);
+ if (error)
+ goto failure;
-#ifdef CONFIG_NFSD_TCP
- error = svc_makesock(nfsd_serv, IPPROTO_TCP, port);
- if (error < 0)
- goto failure;
-#endif
- do_gettimeofday(&nfssvc_boot); /* record boot time */
- } else
- nfsd_serv->sv_nrthreads++;
- nrservs -= (nfsd_serv->sv_nrthreads-1);
- while (nrservs > 0) {
- nrservs--;
- __module_get(THIS_MODULE);
- error = svc_create_thread(nfsd, nfsd_serv);
- if (error < 0) {
- module_put(THIS_MODULE);
- break;
- }
- }
- victim = nfsd_list.next;
- while (nrservs < 0 && victim != &nfsd_list) {
- struct nfsd_list *nl =
- list_entry(victim,struct nfsd_list, list);
- victim = victim->next;
- send_sig(SIG_NOCLEAN, nl->task, 1);
- nrservs++;
- }
+ error = svc_set_num_threads(nfsd_serv, NULL, nrservs);
failure:
- none_left = (nfsd_serv->sv_nrthreads == 1);
svc_destroy(nfsd_serv); /* Release server */
- if (none_left) {
- nfsd_serv = NULL;
- nfsd_racache_shutdown();
- nfs4_state_shutdown();
- }
out:
unlock_kernel();
return error;
@@ -270,10 +378,8 @@ update_thread_usage(int busy_threads)
static void
nfsd(struct svc_rqst *rqstp)
{
- struct svc_serv *serv = rqstp->rq_server;
struct fs_struct *fsp;
int err;
- struct nfsd_list me;
sigset_t shutdown_mask, allowed_mask;
/* Lock module and set up kernel thread */
@@ -297,10 +403,7 @@ nfsd(struct svc_rqst *rqstp)
nfsdstats.th_cnt++;
- lockd_up(); /* start lockd */
-
- me.task = current;
- list_add(&me.list, &nfsd_list);
+ rqstp->rq_task = current;
unlock_kernel();
@@ -322,8 +425,7 @@ nfsd(struct svc_rqst *rqstp)
* Find a socket with data available and call its
* recvfrom routine.
*/
- while ((err = svc_recv(serv, rqstp,
- 60*60*HZ)) == -EAGAIN)
+ while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
;
if (err < 0)
break;
@@ -336,7 +438,7 @@ nfsd(struct svc_rqst *rqstp)
/* Process request with signals blocked. */
sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
- svc_process(serv, rqstp);
+ svc_process(rqstp);
/* Unlock export hash tables */
exp_readunlock();
@@ -353,29 +455,13 @@ nfsd(struct svc_rqst *rqstp)
if (sigismember(&current->pending.signal, signo) &&
!sigismember(&current->blocked, signo))
break;
- err = signo;
+ killsig = signo;
}
- /* Clear signals before calling lockd_down() and svc_exit_thread() */
+ /* Clear signals before calling svc_exit_thread() */
flush_signals(current);
lock_kernel();
- /* Release lockd */
- lockd_down();
-
- /* Check if this is last thread */
- if (serv->sv_nrthreads==1) {
-
- printk(KERN_WARNING "nfsd: last server has exited\n");
- if (err != SIG_NOCLEAN) {
- printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
- nfsd_export_flush();
- }
- nfsd_serv = NULL;
- nfsd_racache_shutdown(); /* release read-ahead cache */
- nfs4_state_shutdown();
- }
- list_del(&me.list);
nfsdstats.th_cnt --;
out:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c9e3b5a8fe0..443ebc52e38 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1114,7 +1114,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
*/
if (!resfhp->fh_dentry) {
/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
- fh_lock(fhp);
+ fh_lock_nested(fhp, I_MUTEX_PARENT);
dchild = lookup_one_len(fname, dentry, flen);
err = PTR_ERR(dchild);
if (IS_ERR(dchild))
@@ -1240,7 +1240,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfserr_notdir;
if(!dirp->i_op || !dirp->i_op->lookup)
goto out;
- fh_lock(fhp);
+ fh_lock_nested(fhp, I_MUTEX_PARENT);
/*
* Compose the response file handle.
@@ -1494,7 +1494,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
if (isdotent(name, len))
goto out;
- fh_lock(ffhp);
+ fh_lock_nested(ffhp, I_MUTEX_PARENT);
ddir = ffhp->fh_dentry;
dirp = ddir->d_inode;
@@ -1644,7 +1644,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (err)
goto out;
- fh_lock(fhp);
+ fh_lock_nested(fhp, I_MUTEX_PARENT);
dentry = fhp->fh_dentry;
dirp = dentry->d_inode;
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 9de6b495f11..b1317ad5ca1 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -163,8 +163,6 @@ int register_nls(struct nls_table * nls)
{
struct nls_table ** tmp = &tables;
- if (!nls)
- return -EINVAL;
if (nls->next)
return -EBUSY;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index c0e554971df..25e917fb473 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -162,7 +162,7 @@ static inline char * task_state(struct task_struct *p, char *buffer)
int g;
struct fdtable *fdt = NULL;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
buffer += sprintf(buffer,
"State:\t%s\n"
"SleepAVG:\t%lu%%\n"
@@ -174,14 +174,13 @@ static inline char * task_state(struct task_struct *p, char *buffer)
"Gid:\t%d\t%d\t%d\t%d\n",
get_task_state(p),
(p->sleep_avg/1024)*100/(1020000000/1024),
- p->tgid,
- p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0,
- pid_alive(p) && p->ptrace ? p->parent->pid : 0,
+ p->tgid, p->pid,
+ pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
+ pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
p->uid, p->euid, p->suid, p->fsuid,
p->gid, p->egid, p->sgid, p->fsgid);
- read_unlock(&tasklist_lock);
+
task_lock(p);
- rcu_read_lock();
if (p->files)
fdt = files_fdtable(p->files);
buffer += sprintf(buffer,
@@ -244,6 +243,7 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
static inline char * task_sig(struct task_struct *p, char *buffer)
{
+ unsigned long flags;
sigset_t pending, shpending, blocked, ignored, caught;
int num_threads = 0;
unsigned long qsize = 0;
@@ -255,10 +255,8 @@ static inline char * task_sig(struct task_struct *p, char *buffer)
sigemptyset(&ignored);
sigemptyset(&caught);
- /* Gather all the data with the appropriate locks held */
- read_lock(&tasklist_lock);
- if (p->sighand) {
- spin_lock_irq(&p->sighand->siglock);
+ rcu_read_lock();
+ if (lock_task_sighand(p, &flags)) {
pending = p->pending.signal;
shpending = p->signal->shared_pending.signal;
blocked = p->blocked;
@@ -266,9 +264,9 @@ static inline char * task_sig(struct task_struct *p, char *buffer)
num_threads = atomic_read(&p->signal->count);
qsize = atomic_read(&p->user->sigpending);
qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
- spin_unlock_irq(&p->sighand->siglock);
+ unlock_task_sighand(p, &flags);
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
buffer += sprintf(buffer, "Threads:\t%d\n", num_threads);
buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim);
@@ -322,7 +320,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
sigset_t sigign, sigcatch;
char state;
int res;
- pid_t ppid, pgid = -1, sid = -1;
+ pid_t ppid = 0, pgid = -1, sid = -1;
int num_threads = 0;
struct mm_struct *mm;
unsigned long long start_time;
@@ -330,8 +328,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
unsigned long min_flt = 0, maj_flt = 0;
cputime_t cutime, cstime, utime, stime;
unsigned long rsslim = 0;
- struct task_struct *t;
char tcomm[sizeof(task->comm)];
+ unsigned long flags;
state = *get_task_state(task);
vsize = eip = esp = 0;
@@ -349,15 +347,33 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
cutime = cstime = utime = stime = cputime_zero;
mutex_lock(&tty_mutex);
- read_lock(&tasklist_lock);
- if (task->sighand) {
- spin_lock_irq(&task->sighand->siglock);
- num_threads = atomic_read(&task->signal->count);
+ rcu_read_lock();
+ if (lock_task_sighand(task, &flags)) {
+ struct signal_struct *sig = task->signal;
+ struct tty_struct *tty = sig->tty;
+
+ if (tty) {
+ /*
+ * sig->tty is not stable, but tty_mutex
+ * protects us from release_dev(tty)
+ */
+ barrier();
+ tty_pgrp = tty->pgrp;
+ tty_nr = new_encode_dev(tty_devnum(tty));
+ }
+
+ num_threads = atomic_read(&sig->count);
collect_sigign_sigcatch(task, &sigign, &sigcatch);
+ cmin_flt = sig->cmin_flt;
+ cmaj_flt = sig->cmaj_flt;
+ cutime = sig->cutime;
+ cstime = sig->cstime;
+ rsslim = sig->rlim[RLIMIT_RSS].rlim_cur;
+
/* add up live thread stats at the group level */
if (whole) {
- t = task;
+ struct task_struct *t = task;
do {
min_flt += t->min_flt;
maj_flt += t->maj_flt;
@@ -365,31 +381,20 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
stime = cputime_add(stime, t->stime);
t = next_thread(t);
} while (t != task);
- }
- spin_unlock_irq(&task->sighand->siglock);
- }
- if (task->signal) {
- if (task->signal->tty) {
- tty_pgrp = task->signal->tty->pgrp;
- tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
+ min_flt += sig->min_flt;
+ maj_flt += sig->maj_flt;
+ utime = cputime_add(utime, sig->utime);
+ stime = cputime_add(stime, sig->stime);
}
+
+ sid = sig->session;
pgid = process_group(task);
- sid = task->signal->session;
- cmin_flt = task->signal->cmin_flt;
- cmaj_flt = task->signal->cmaj_flt;
- cutime = task->signal->cutime;
- cstime = task->signal->cstime;
- rsslim = task->signal->rlim[RLIMIT_RSS].rlim_cur;
- if (whole) {
- min_flt += task->signal->min_flt;
- maj_flt += task->signal->maj_flt;
- utime = cputime_add(utime, task->signal->utime);
- stime = cputime_add(stime, task->signal->stime);
- }
+ ppid = rcu_dereference(task->real_parent)->tgid;
+
+ unlock_task_sighand(task, &flags);
}
- ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
mutex_unlock(&tty_mutex);
if (!whole || num_threads<2)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 89c20d9d50b..82da55b5cff 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -71,6 +71,7 @@
#include <linux/cpuset.h>
#include <linux/audit.h>
#include <linux/poll.h>
+#include <linux/nsproxy.h>
#include "internal.h"
/* NOTE:
@@ -83,262 +84,44 @@
* in /proc for a task before it execs a suid executable.
*/
-/*
- * For hysterical raisins we keep the same inumbers as in the old procfs.
- * Feel free to change the macro below - just keep the range distinct from
- * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
- * As soon as we'll get a separate superblock we will be able to forget
- * about magical ranges too.
- */
-
-#define fake_ino(pid,ino) (((pid)<<16)|(ino))
-
-enum pid_directory_inos {
- PROC_TGID_INO = 2,
- PROC_TGID_TASK,
- PROC_TGID_STATUS,
- PROC_TGID_MEM,
-#ifdef CONFIG_SECCOMP
- PROC_TGID_SECCOMP,
-#endif
- PROC_TGID_CWD,
- PROC_TGID_ROOT,
- PROC_TGID_EXE,
- PROC_TGID_FD,
- PROC_TGID_ENVIRON,
- PROC_TGID_AUXV,
- PROC_TGID_CMDLINE,
- PROC_TGID_STAT,
- PROC_TGID_STATM,
- PROC_TGID_MAPS,
- PROC_TGID_NUMA_MAPS,
- PROC_TGID_MOUNTS,
- PROC_TGID_MOUNTSTATS,
- PROC_TGID_WCHAN,
-#ifdef CONFIG_MMU
- PROC_TGID_SMAPS,
-#endif
-#ifdef CONFIG_SCHEDSTATS
- PROC_TGID_SCHEDSTAT,
-#endif
-#ifdef CONFIG_CPUSETS
- PROC_TGID_CPUSET,
-#endif
-#ifdef CONFIG_SECURITY
- PROC_TGID_ATTR,
- PROC_TGID_ATTR_CURRENT,
- PROC_TGID_ATTR_PREV,
- PROC_TGID_ATTR_EXEC,
- PROC_TGID_ATTR_FSCREATE,
- PROC_TGID_ATTR_KEYCREATE,
- PROC_TGID_ATTR_SOCKCREATE,
-#endif
-#ifdef CONFIG_AUDITSYSCALL
- PROC_TGID_LOGINUID,
-#endif
- PROC_TGID_OOM_SCORE,
- PROC_TGID_OOM_ADJUST,
- PROC_TID_INO,
- PROC_TID_STATUS,
- PROC_TID_MEM,
-#ifdef CONFIG_SECCOMP
- PROC_TID_SECCOMP,
-#endif
- PROC_TID_CWD,
- PROC_TID_ROOT,
- PROC_TID_EXE,
- PROC_TID_FD,
- PROC_TID_ENVIRON,
- PROC_TID_AUXV,
- PROC_TID_CMDLINE,
- PROC_TID_STAT,
- PROC_TID_STATM,
- PROC_TID_MAPS,
- PROC_TID_NUMA_MAPS,
- PROC_TID_MOUNTS,
- PROC_TID_MOUNTSTATS,
- PROC_TID_WCHAN,
-#ifdef CONFIG_MMU
- PROC_TID_SMAPS,
-#endif
-#ifdef CONFIG_SCHEDSTATS
- PROC_TID_SCHEDSTAT,
-#endif
-#ifdef CONFIG_CPUSETS
- PROC_TID_CPUSET,
-#endif
-#ifdef CONFIG_SECURITY
- PROC_TID_ATTR,
- PROC_TID_ATTR_CURRENT,
- PROC_TID_ATTR_PREV,
- PROC_TID_ATTR_EXEC,
- PROC_TID_ATTR_FSCREATE,
- PROC_TID_ATTR_KEYCREATE,
- PROC_TID_ATTR_SOCKCREATE,
-#endif
-#ifdef CONFIG_AUDITSYSCALL
- PROC_TID_LOGINUID,
-#endif
- PROC_TID_OOM_SCORE,
- PROC_TID_OOM_ADJUST,
-
- /* Add new entries before this */
- PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
-};
/* Worst case buffer size needed for holding an integer. */
#define PROC_NUMBUF 10
struct pid_entry {
- int type;
int len;
char *name;
mode_t mode;
+ struct inode_operations *iop;
+ struct file_operations *fop;
+ union proc_op op;
};
-#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
-
-static struct pid_entry tgid_base_stuff[] = {
- E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO),
- E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR),
- E(PROC_TGID_ENVIRON, "environ", S_IFREG|S_IRUSR),
- E(PROC_TGID_AUXV, "auxv", S_IFREG|S_IRUSR),
- E(PROC_TGID_STATUS, "status", S_IFREG|S_IRUGO),
- E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO),
- E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO),
- E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO),
- E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO),
-#ifdef CONFIG_NUMA
- E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
-#endif
- E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
-#ifdef CONFIG_SECCOMP
- E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
-#endif
- E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO),
- E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO),
- E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO),
- E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
- E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
-#ifdef CONFIG_MMU
- E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO),
-#endif
-#ifdef CONFIG_SECURITY
- E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO),
-#endif
-#ifdef CONFIG_KALLSYMS
- E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO),
-#endif
-#ifdef CONFIG_SCHEDSTATS
- E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO),
-#endif
-#ifdef CONFIG_CPUSETS
- E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO),
-#endif
- E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
- E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
-#ifdef CONFIG_AUDITSYSCALL
- E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
-#endif
- {0,0,NULL,0}
-};
-static struct pid_entry tid_base_stuff[] = {
- E(PROC_TID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR),
- E(PROC_TID_ENVIRON, "environ", S_IFREG|S_IRUSR),
- E(PROC_TID_AUXV, "auxv", S_IFREG|S_IRUSR),
- E(PROC_TID_STATUS, "status", S_IFREG|S_IRUGO),
- E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO),
- E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO),
- E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO),
- E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO),
-#ifdef CONFIG_NUMA
- E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
-#endif
- E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
-#ifdef CONFIG_SECCOMP
- E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
-#endif
- E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO),
- E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO),
- E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO),
- E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
-#ifdef CONFIG_MMU
- E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO),
-#endif
-#ifdef CONFIG_SECURITY
- E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO),
-#endif
-#ifdef CONFIG_KALLSYMS
- E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO),
-#endif
-#ifdef CONFIG_SCHEDSTATS
- E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO),
-#endif
-#ifdef CONFIG_CPUSETS
- E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO),
-#endif
- E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
- E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
-#ifdef CONFIG_AUDITSYSCALL
- E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
-#endif
- {0,0,NULL,0}
-};
-
-#ifdef CONFIG_SECURITY
-static struct pid_entry tgid_attr_stuff[] = {
- E(PROC_TGID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO),
- E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
- E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
- E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
- E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
- E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
- {0,0,NULL,0}
-};
-static struct pid_entry tid_attr_stuff[] = {
- E(PROC_TID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO),
- E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO),
- E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO),
- E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
- E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
- E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
- {0,0,NULL,0}
-};
-#endif
-
-#undef E
-
-static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
-{
- struct task_struct *task = get_proc_task(inode);
- struct files_struct *files = NULL;
- struct file *file;
- int fd = proc_fd(inode);
-
- if (task) {
- files = get_files_struct(task);
- put_task_struct(task);
- }
- if (files) {
- /*
- * We are not taking a ref to the file structure, so we must
- * hold ->file_lock.
- */
- spin_lock(&files->file_lock);
- file = fcheck_files(files, fd);
- if (file) {
- *mnt = mntget(file->f_vfsmnt);
- *dentry = dget(file->f_dentry);
- spin_unlock(&files->file_lock);
- put_files_struct(files);
- return 0;
- }
- spin_unlock(&files->file_lock);
- put_files_struct(files);
- }
- return -ENOENT;
+#define NOD(NAME, MODE, IOP, FOP, OP) { \
+ .len = sizeof(NAME) - 1, \
+ .name = (NAME), \
+ .mode = MODE, \
+ .iop = IOP, \
+ .fop = FOP, \
+ .op = OP, \
}
+#define DIR(NAME, MODE, OTYPE) \
+ NOD(NAME, (S_IFDIR|(MODE)), \
+ &proc_##OTYPE##_inode_operations, &proc_##OTYPE##_operations, \
+ {} )
+#define LNK(NAME, OTYPE) \
+ NOD(NAME, (S_IFLNK|S_IRWXUGO), \
+ &proc_pid_link_inode_operations, NULL, \
+ { .proc_get_link = &proc_##OTYPE##_link } )
+#define REG(NAME, MODE, OTYPE) \
+ NOD(NAME, (S_IFREG|(MODE)), NULL, \
+ &proc_##OTYPE##_operations, {})
+#define INF(NAME, MODE, OTYPE) \
+ NOD(NAME, (S_IFREG|(MODE)), \
+ NULL, &proc_info_file_operations, \
+ { .proc_read = &proc_##OTYPE } )
+
static struct fs_struct *get_fs_struct(struct task_struct *task)
{
struct fs_struct *fs;
@@ -587,7 +370,7 @@ static int mounts_open(struct inode *inode, struct file *file)
if (task) {
task_lock(task);
- namespace = task->namespace;
+ namespace = task->nsproxy->namespace;
if (namespace)
get_namespace(namespace);
task_unlock(task);
@@ -658,7 +441,7 @@ static int mountstats_open(struct inode *inode, struct file *file)
if (task) {
task_lock(task);
- namespace = task->namespace;
+ namespace = task->nsproxy->namespace;
if (namespace)
get_namespace(namespace);
task_unlock(task);
@@ -1137,143 +920,6 @@ static struct inode_operations proc_pid_link_inode_operations = {
.setattr = proc_setattr,
};
-static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
-{
- struct dentry *dentry = filp->f_dentry;
- struct inode *inode = dentry->d_inode;
- struct task_struct *p = get_proc_task(inode);
- unsigned int fd, tid, ino;
- int retval;
- char buf[PROC_NUMBUF];
- struct files_struct * files;
- struct fdtable *fdt;
-
- retval = -ENOENT;
- if (!p)
- goto out_no_task;
- retval = 0;
- tid = p->pid;
-
- fd = filp->f_pos;
- switch (fd) {
- case 0:
- if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
- goto out;
- filp->f_pos++;
- case 1:
- ino = parent_ino(dentry);
- if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
- goto out;
- filp->f_pos++;
- default:
- files = get_files_struct(p);
- if (!files)
- goto out;
- rcu_read_lock();
- fdt = files_fdtable(files);
- for (fd = filp->f_pos-2;
- fd < fdt->max_fds;
- fd++, filp->f_pos++) {
- unsigned int i,j;
-
- if (!fcheck_files(files, fd))
- continue;
- rcu_read_unlock();
-
- j = PROC_NUMBUF;
- i = fd;
- do {
- j--;
- buf[j] = '0' + (i % 10);
- i /= 10;
- } while (i);
-
- ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
- if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
- rcu_read_lock();
- break;
- }
- rcu_read_lock();
- }
- rcu_read_unlock();
- put_files_struct(files);
- }
-out:
- put_task_struct(p);
-out_no_task:
- return retval;
-}
-
-static int proc_pident_readdir(struct file *filp,
- void *dirent, filldir_t filldir,
- struct pid_entry *ents, unsigned int nents)
-{
- int i;
- int pid;
- struct dentry *dentry = filp->f_dentry;
- struct inode *inode = dentry->d_inode;
- struct task_struct *task = get_proc_task(inode);
- struct pid_entry *p;
- ino_t ino;
- int ret;
-
- ret = -ENOENT;
- if (!task)
- goto out;
-
- ret = 0;
- pid = task->pid;
- put_task_struct(task);
- i = filp->f_pos;
- switch (i) {
- case 0:
- ino = inode->i_ino;
- if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
- goto out;
- i++;
- filp->f_pos++;
- /* fall through */
- case 1:
- ino = parent_ino(dentry);
- if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
- goto out;
- i++;
- filp->f_pos++;
- /* fall through */
- default:
- i -= 2;
- if (i >= nents) {
- ret = 1;
- goto out;
- }
- p = ents + i;
- while (p->name) {
- if (filldir(dirent, p->name, p->len, filp->f_pos,
- fake_ino(pid, p->type), p->mode >> 12) < 0)
- goto out;
- filp->f_pos++;
- p++;
- }
- }
-
- ret = 1;
-out:
- return ret;
-}
-
-static int proc_tgid_base_readdir(struct file * filp,
- void * dirent, filldir_t filldir)
-{
- return proc_pident_readdir(filp,dirent,filldir,
- tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
-}
-
-static int proc_tid_base_readdir(struct file * filp,
- void * dirent, filldir_t filldir)
-{
- return proc_pident_readdir(filp,dirent,filldir,
- tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
-}
/* building an inode */
@@ -1293,13 +939,13 @@ static int task_dumpable(struct task_struct *task)
}
-static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
+static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
{
struct inode * inode;
struct proc_inode *ei;
/* We need a new inode */
-
+
inode = new_inode(sb);
if (!inode)
goto out;
@@ -1307,13 +953,12 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
/* Common stuff */
ei = PROC_I(inode);
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
- inode->i_ino = fake_ino(task->pid, ino);
inode->i_op = &proc_def_inode_operations;
/*
* grab the reference to task.
*/
- ei->pid = get_pid(task->pids[PIDTYPE_PID].pid);
+ ei->pid = get_task_pid(task, PIDTYPE_PID);
if (!ei->pid)
goto out_unlock;
@@ -1333,6 +978,27 @@ out_unlock:
return NULL;
}
+static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+ struct inode *inode = dentry->d_inode;
+ struct task_struct *task;
+ generic_fillattr(inode, stat);
+
+ rcu_read_lock();
+ stat->uid = 0;
+ stat->gid = 0;
+ task = pid_task(proc_pid(inode), PIDTYPE_PID);
+ if (task) {
+ if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
+ task_dumpable(task)) {
+ stat->uid = task->euid;
+ stat->gid = task->egid;
+ }
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
/* dentry stuff */
/*
@@ -1372,25 +1038,130 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
return 0;
}
-static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+static int pid_delete_dentry(struct dentry * dentry)
{
- struct inode *inode = dentry->d_inode;
- struct task_struct *task;
- generic_fillattr(inode, stat);
+ /* Is the task we represent dead?
+ * If so, then don't put the dentry on the lru list,
+ * kill it immediately.
+ */
+ return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
+}
+
+static struct dentry_operations pid_dentry_operations =
+{
+ .d_revalidate = pid_revalidate,
+ .d_delete = pid_delete_dentry,
+};
+
+/* Lookups */
+
+typedef struct dentry *instantiate_t(struct inode *, struct dentry *, struct task_struct *, void *);
+
+/*
+ * Fill a directory entry.
+ *
+ * If possible create the dcache entry and derive our inode number and
+ * file type from dcache entry.
+ *
+ * Since all of the proc inode numbers are dynamically generated, the inode
+ * numbers do not exist until the inode is cache. This means creating the
+ * the dcache entry in readdir is necessary to keep the inode numbers
+ * reported by readdir in sync with the inode numbers reported
+ * by stat.
+ */
+static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+ char *name, int len,
+ instantiate_t instantiate, struct task_struct *task, void *ptr)
+{
+ struct dentry *child, *dir = filp->f_dentry;
+ struct inode *inode;
+ struct qstr qname;
+ ino_t ino = 0;
+ unsigned type = DT_UNKNOWN;
+
+ qname.name = name;
+ qname.len = len;
+ qname.hash = full_name_hash(name, len);
+
+ child = d_lookup(dir, &qname);
+ if (!child) {
+ struct dentry *new;
+ new = d_alloc(dir, &qname);
+ if (new) {
+ child = instantiate(dir->d_inode, new, task, ptr);
+ if (child)
+ dput(new);
+ else
+ child = new;
+ }
+ }
+ if (!child || IS_ERR(child) || !child->d_inode)
+ goto end_instantiate;
+ inode = child->d_inode;
+ if (inode) {
+ ino = inode->i_ino;
+ type = inode->i_mode >> 12;
+ }
+ dput(child);
+end_instantiate:
+ if (!ino)
+ ino = find_inode_number(dir, &qname);
+ if (!ino)
+ ino = 1;
+ return filldir(dirent, name, len, filp->f_pos, ino, type);
+}
+
+static unsigned name_to_int(struct dentry *dentry)
+{
+ const char *name = dentry->d_name.name;
+ int len = dentry->d_name.len;
+ unsigned n = 0;
+
+ if (len > 1 && *name == '0')
+ goto out;
+ while (len-- > 0) {
+ unsigned c = *name++ - '0';
+ if (c > 9)
+ goto out;
+ if (n >= (~0U-9)/10)
+ goto out;
+ n *= 10;
+ n += c;
+ }
+ return n;
+out:
+ return ~0U;
+}
+
+static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+{
+ struct task_struct *task = get_proc_task(inode);
+ struct files_struct *files = NULL;
+ struct file *file;
+ int fd = proc_fd(inode);
- rcu_read_lock();
- stat->uid = 0;
- stat->gid = 0;
- task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task) {
- if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
- task_dumpable(task)) {
- stat->uid = task->euid;
- stat->gid = task->egid;
+ files = get_files_struct(task);
+ put_task_struct(task);
+ }
+ if (files) {
+ /*
+ * We are not taking a ref to the file structure, so we must
+ * hold ->file_lock.
+ */
+ spin_lock(&files->file_lock);
+ file = fcheck_files(files, fd);
+ if (file) {
+ *mnt = mntget(file->f_vfsmnt);
+ *dentry = dget(file->f_dentry);
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+ return 0;
}
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
}
- rcu_read_unlock();
- return 0;
+ return -ENOENT;
}
static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
@@ -1428,75 +1199,30 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
return 0;
}
-static int pid_delete_dentry(struct dentry * dentry)
-{
- /* Is the task we represent dead?
- * If so, then don't put the dentry on the lru list,
- * kill it immediately.
- */
- return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
-}
-
static struct dentry_operations tid_fd_dentry_operations =
{
.d_revalidate = tid_fd_revalidate,
.d_delete = pid_delete_dentry,
};
-static struct dentry_operations pid_dentry_operations =
-{
- .d_revalidate = pid_revalidate,
- .d_delete = pid_delete_dentry,
-};
-
-/* Lookups */
-
-static unsigned name_to_int(struct dentry *dentry)
-{
- const char *name = dentry->d_name.name;
- int len = dentry->d_name.len;
- unsigned n = 0;
-
- if (len > 1 && *name == '0')
- goto out;
- while (len-- > 0) {
- unsigned c = *name++ - '0';
- if (c > 9)
- goto out;
- if (n >= (~0U-9)/10)
- goto out;
- n *= 10;
- n += c;
- }
- return n;
-out:
- return ~0U;
-}
-
-/* SMP-safe */
-static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
+static struct dentry *proc_fd_instantiate(struct inode *dir,
+ struct dentry *dentry, struct task_struct *task, void *ptr)
{
- struct task_struct *task = get_proc_task(dir);
- unsigned fd = name_to_int(dentry);
- struct dentry *result = ERR_PTR(-ENOENT);
- struct file * file;
- struct files_struct * files;
- struct inode *inode;
- struct proc_inode *ei;
-
- if (!task)
- goto out_no_task;
- if (fd == ~0U)
- goto out;
+ unsigned fd = *(unsigned *)ptr;
+ struct file *file;
+ struct files_struct *files;
+ struct inode *inode;
+ struct proc_inode *ei;
+ struct dentry *error = ERR_PTR(-ENOENT);
- inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd);
+ inode = proc_pid_make_inode(dir->i_sb, task);
if (!inode)
goto out;
ei = PROC_I(inode);
ei->fd = fd;
files = get_files_struct(task);
if (!files)
- goto out_unlock;
+ goto out_iput;
inode->i_mode = S_IFLNK;
/*
@@ -1506,13 +1232,14 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
spin_lock(&files->file_lock);
file = fcheck_files(files, fd);
if (!file)
- goto out_unlock2;
+ goto out_unlock;
if (file->f_mode & 1)
inode->i_mode |= S_IRUSR | S_IXUSR;
if (file->f_mode & 2)
inode->i_mode |= S_IWUSR | S_IXUSR;
spin_unlock(&files->file_lock);
put_files_struct(files);
+
inode->i_op = &proc_pid_link_inode_operations;
inode->i_size = 64;
ei->op.proc_get_link = proc_fd_link;
@@ -1520,34 +1247,106 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (tid_fd_revalidate(dentry, NULL))
- result = NULL;
-out:
- put_task_struct(task);
-out_no_task:
- return result;
+ error = NULL;
-out_unlock2:
+ out:
+ return error;
+out_unlock:
spin_unlock(&files->file_lock);
put_files_struct(files);
-out_unlock:
+out_iput:
iput(inode);
goto out;
}
-static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir);
-static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd);
-static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
+static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
+{
+ struct task_struct *task = get_proc_task(dir);
+ unsigned fd = name_to_int(dentry);
+ struct dentry *result = ERR_PTR(-ENOENT);
+
+ if (!task)
+ goto out_no_task;
+ if (fd == ~0U)
+ goto out;
+
+ result = proc_fd_instantiate(dir, dentry, task, &fd);
+out:
+ put_task_struct(task);
+out_no_task:
+ return result;
+}
+
+static int proc_fd_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+ struct task_struct *task, int fd)
+{
+ char name[PROC_NUMBUF];
+ int len = snprintf(name, sizeof(name), "%d", fd);
+ return proc_fill_cache(filp, dirent, filldir, name, len,
+ proc_fd_instantiate, task, &fd);
+}
+
+static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
+{
+ struct dentry *dentry = filp->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ struct task_struct *p = get_proc_task(inode);
+ unsigned int fd, tid, ino;
+ int retval;
+ struct files_struct * files;
+ struct fdtable *fdt;
+
+ retval = -ENOENT;
+ if (!p)
+ goto out_no_task;
+ retval = 0;
+ tid = p->pid;
+
+ fd = filp->f_pos;
+ switch (fd) {
+ case 0:
+ if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
+ goto out;
+ filp->f_pos++;
+ case 1:
+ ino = parent_ino(dentry);
+ if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
+ goto out;
+ filp->f_pos++;
+ default:
+ files = get_files_struct(p);
+ if (!files)
+ goto out;
+ rcu_read_lock();
+ fdt = files_fdtable(files);
+ for (fd = filp->f_pos-2;
+ fd < fdt->max_fds;
+ fd++, filp->f_pos++) {
+
+ if (!fcheck_files(files, fd))
+ continue;
+ rcu_read_unlock();
+
+ if (proc_fd_fill_cache(filp, dirent, filldir, p, fd) < 0) {
+ rcu_read_lock();
+ break;
+ }
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+ put_files_struct(files);
+ }
+out:
+ put_task_struct(p);
+out_no_task:
+ return retval;
+}
static struct file_operations proc_fd_operations = {
.read = generic_read_dir,
.readdir = proc_readfd,
};
-static struct file_operations proc_task_operations = {
- .read = generic_read_dir,
- .readdir = proc_task_readdir,
-};
-
/*
* proc directories can do almost nothing..
*/
@@ -1556,11 +1355,137 @@ static struct inode_operations proc_fd_inode_operations = {
.setattr = proc_setattr,
};
-static struct inode_operations proc_task_inode_operations = {
- .lookup = proc_task_lookup,
- .getattr = proc_task_getattr,
- .setattr = proc_setattr,
-};
+static struct dentry *proc_pident_instantiate(struct inode *dir,
+ struct dentry *dentry, struct task_struct *task, void *ptr)
+{
+ struct pid_entry *p = ptr;
+ struct inode *inode;
+ struct proc_inode *ei;
+ struct dentry *error = ERR_PTR(-EINVAL);
+
+ inode = proc_pid_make_inode(dir->i_sb, task);
+ if (!inode)
+ goto out;
+
+ ei = PROC_I(inode);
+ inode->i_mode = p->mode;
+ if (S_ISDIR(inode->i_mode))
+ inode->i_nlink = 2; /* Use getattr to fix if necessary */
+ if (p->iop)
+ inode->i_op = p->iop;
+ if (p->fop)
+ inode->i_fop = p->fop;
+ ei->op = p->op;
+ dentry->d_op = &pid_dentry_operations;
+ d_add(dentry, inode);
+ /* Close the race of the process dying before we return the dentry */
+ if (pid_revalidate(dentry, NULL))
+ error = NULL;
+out:
+ return error;
+}
+
+static struct dentry *proc_pident_lookup(struct inode *dir,
+ struct dentry *dentry,
+ struct pid_entry *ents,
+ unsigned int nents)
+{
+ struct inode *inode;
+ struct dentry *error;
+ struct task_struct *task = get_proc_task(dir);
+ struct pid_entry *p, *last;
+
+ error = ERR_PTR(-ENOENT);
+ inode = NULL;
+
+ if (!task)
+ goto out_no_task;
+
+ /*
+ * Yes, it does not scale. And it should not. Don't add
+ * new entries into /proc/<tgid>/ without very good reasons.
+ */
+ last = &ents[nents - 1];
+ for (p = ents; p <= last; p++) {
+ if (p->len != dentry->d_name.len)
+ continue;
+ if (!memcmp(dentry->d_name.name, p->name, p->len))
+ break;
+ }
+ if (p > last)
+ goto out;
+
+ error = proc_pident_instantiate(dir, dentry, task, p);
+out:
+ put_task_struct(task);
+out_no_task:
+ return error;
+}
+
+static int proc_pident_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+ struct task_struct *task, struct pid_entry *p)
+{
+ return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
+ proc_pident_instantiate, task, p);
+}
+
+static int proc_pident_readdir(struct file *filp,
+ void *dirent, filldir_t filldir,
+ struct pid_entry *ents, unsigned int nents)
+{
+ int i;
+ int pid;
+ struct dentry *dentry = filp->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ struct task_struct *task = get_proc_task(inode);
+ struct pid_entry *p, *last;
+ ino_t ino;
+ int ret;
+
+ ret = -ENOENT;
+ if (!task)
+ goto out_no_task;
+
+ ret = 0;
+ pid = task->pid;
+ i = filp->f_pos;
+ switch (i) {
+ case 0:
+ ino = inode->i_ino;
+ if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
+ goto out;
+ i++;
+ filp->f_pos++;
+ /* fall through */
+ case 1:
+ ino = parent_ino(dentry);
+ if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
+ goto out;
+ i++;
+ filp->f_pos++;
+ /* fall through */
+ default:
+ i -= 2;
+ if (i >= nents) {
+ ret = 1;
+ goto out;
+ }
+ p = ents + i;
+ last = &ents[nents - 1];
+ while (p <= last) {
+ if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0)
+ goto out;
+ filp->f_pos++;
+ p++;
+ }
+ }
+
+ ret = 1;
+out:
+ put_task_struct(task);
+out_no_task:
+ return ret;
+}
#ifdef CONFIG_SECURITY
static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
@@ -1581,8 +1506,8 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
if (!(page = __get_free_page(GFP_KERNEL)))
goto out;
- length = security_getprocattr(task,
- (char*)file->f_dentry->d_name.name,
+ length = security_getprocattr(task,
+ (char*)file->f_dentry->d_name.name,
(void*)page, count);
if (length >= 0)
length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
@@ -1595,17 +1520,17 @@ out_no_task:
static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
size_t count, loff_t *ppos)
-{
+{
struct inode * inode = file->f_dentry->d_inode;
- char *page;
- ssize_t length;
+ char *page;
+ ssize_t length;
struct task_struct *task = get_proc_task(inode);
length = -ESRCH;
if (!task)
goto out_no_task;
- if (count > PAGE_SIZE)
- count = PAGE_SIZE;
+ if (count > PAGE_SIZE)
+ count = PAGE_SIZE;
/* No partial writes. */
length = -EINVAL;
@@ -1613,16 +1538,16 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
goto out;
length = -ENOMEM;
- page = (char*)__get_free_page(GFP_USER);
- if (!page)
+ page = (char*)__get_free_page(GFP_USER);
+ if (!page)
goto out;
- length = -EFAULT;
- if (copy_from_user(page, buf, count))
+ length = -EFAULT;
+ if (copy_from_user(page, buf, count))
goto out_free;
- length = security_setprocattr(task,
- (char*)file->f_dentry->d_name.name,
+ length = security_setprocattr(task,
+ (char*)file->f_dentry->d_name.name,
(void*)page, count);
out_free:
free_page((unsigned long) page);
@@ -1630,330 +1555,263 @@ out:
put_task_struct(task);
out_no_task:
return length;
-}
+}
static struct file_operations proc_pid_attr_operations = {
.read = proc_pid_attr_read,
.write = proc_pid_attr_write,
};
-static struct file_operations proc_tid_attr_operations;
-static struct inode_operations proc_tid_attr_inode_operations;
-static struct file_operations proc_tgid_attr_operations;
-static struct inode_operations proc_tgid_attr_inode_operations;
+static struct pid_entry attr_dir_stuff[] = {
+ REG("current", S_IRUGO|S_IWUGO, pid_attr),
+ REG("prev", S_IRUGO, pid_attr),
+ REG("exec", S_IRUGO|S_IWUGO, pid_attr),
+ REG("fscreate", S_IRUGO|S_IWUGO, pid_attr),
+ REG("keycreate", S_IRUGO|S_IWUGO, pid_attr),
+ REG("sockcreate", S_IRUGO|S_IWUGO, pid_attr),
+};
+
+static int proc_attr_dir_readdir(struct file * filp,
+ void * dirent, filldir_t filldir)
+{
+ return proc_pident_readdir(filp,dirent,filldir,
+ attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
+}
+
+static struct file_operations proc_attr_dir_operations = {
+ .read = generic_read_dir,
+ .readdir = proc_attr_dir_readdir,
+};
+
+static struct dentry *proc_attr_dir_lookup(struct inode *dir,
+ struct dentry *dentry, struct nameidata *nd)
+{
+ return proc_pident_lookup(dir, dentry,
+ attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
+}
+
+static struct inode_operations proc_attr_dir_inode_operations = {
+ .lookup = proc_attr_dir_lookup,
+ .getattr = pid_getattr,
+ .setattr = proc_setattr,
+};
+
#endif
-/* SMP-safe */
-static struct dentry *proc_pident_lookup(struct inode *dir,
- struct dentry *dentry,
- struct pid_entry *ents)
+/*
+ * /proc/self:
+ */
+static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
+ int buflen)
+{
+ char tmp[PROC_NUMBUF];
+ sprintf(tmp, "%d", current->tgid);
+ return vfs_readlink(dentry,buffer,buflen,tmp);
+}
+
+static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
{
+ char tmp[PROC_NUMBUF];
+ sprintf(tmp, "%d", current->tgid);
+ return ERR_PTR(vfs_follow_link(nd,tmp));
+}
+
+static struct inode_operations proc_self_inode_operations = {
+ .readlink = proc_self_readlink,
+ .follow_link = proc_self_follow_link,
+};
+
+/*
+ * proc base
+ *
+ * These are the directory entries in the root directory of /proc
+ * that properly belong to the /proc filesystem, as they describe
+ * describe something that is process related.
+ */
+static struct pid_entry proc_base_stuff[] = {
+ NOD("self", S_IFLNK|S_IRWXUGO,
+ &proc_self_inode_operations, NULL, {}),
+};
+
+/*
+ * Exceptional case: normally we are not allowed to unhash a busy
+ * directory. In this case, however, we can do it - no aliasing problems
+ * due to the way we treat inodes.
+ */
+static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ struct inode *inode = dentry->d_inode;
+ struct task_struct *task = get_proc_task(inode);
+ if (task) {
+ put_task_struct(task);
+ return 1;
+ }
+ d_drop(dentry);
+ return 0;
+}
+
+static struct dentry_operations proc_base_dentry_operations =
+{
+ .d_revalidate = proc_base_revalidate,
+ .d_delete = pid_delete_dentry,
+};
+
+static struct dentry *proc_base_instantiate(struct inode *dir,
+ struct dentry *dentry, struct task_struct *task, void *ptr)
+{
+ struct pid_entry *p = ptr;
struct inode *inode;
+ struct proc_inode *ei;
+ struct dentry *error = ERR_PTR(-EINVAL);
+
+ /* Allocate the inode */
+ error = ERR_PTR(-ENOMEM);
+ inode = new_inode(dir->i_sb);
+ if (!inode)
+ goto out;
+
+ /* Initialize the inode */
+ ei = PROC_I(inode);
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+ /*
+ * grab the reference to the task.
+ */
+ ei->pid = get_task_pid(task, PIDTYPE_PID);
+ if (!ei->pid)
+ goto out_iput;
+
+ inode->i_uid = 0;
+ inode->i_gid = 0;
+ inode->i_mode = p->mode;
+ if (S_ISDIR(inode->i_mode))
+ inode->i_nlink = 2;
+ if (S_ISLNK(inode->i_mode))
+ inode->i_size = 64;
+ if (p->iop)
+ inode->i_op = p->iop;
+ if (p->fop)
+ inode->i_fop = p->fop;
+ ei->op = p->op;
+ dentry->d_op = &proc_base_dentry_operations;
+ d_add(dentry, inode);
+ error = NULL;
+out:
+ return error;
+out_iput:
+ iput(inode);
+ goto out;
+}
+
+static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
+{
struct dentry *error;
struct task_struct *task = get_proc_task(dir);
- struct pid_entry *p;
- struct proc_inode *ei;
+ struct pid_entry *p, *last;
error = ERR_PTR(-ENOENT);
- inode = NULL;
if (!task)
goto out_no_task;
- for (p = ents; p->name; p++) {
+ /* Lookup the directory entry */
+ last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
+ for (p = proc_base_stuff; p <= last; p++) {
if (p->len != dentry->d_name.len)
continue;
if (!memcmp(dentry->d_name.name, p->name, p->len))
break;
}
- if (!p->name)
+ if (p > last)
goto out;
- error = ERR_PTR(-EINVAL);
- inode = proc_pid_make_inode(dir->i_sb, task, p->type);
- if (!inode)
- goto out;
+ error = proc_base_instantiate(dir, dentry, task, p);
- ei = PROC_I(inode);
- inode->i_mode = p->mode;
- /*
- * Yes, it does not scale. And it should not. Don't add
- * new entries into /proc/<tgid>/ without very good reasons.
- */
- switch(p->type) {
- case PROC_TGID_TASK:
- inode->i_nlink = 2;
- inode->i_op = &proc_task_inode_operations;
- inode->i_fop = &proc_task_operations;
- break;
- case PROC_TID_FD:
- case PROC_TGID_FD:
- inode->i_nlink = 2;
- inode->i_op = &proc_fd_inode_operations;
- inode->i_fop = &proc_fd_operations;
- break;
- case PROC_TID_EXE:
- case PROC_TGID_EXE:
- inode->i_op = &proc_pid_link_inode_operations;
- ei->op.proc_get_link = proc_exe_link;
- break;
- case PROC_TID_CWD:
- case PROC_TGID_CWD:
- inode->i_op = &proc_pid_link_inode_operations;
- ei->op.proc_get_link = proc_cwd_link;
- break;
- case PROC_TID_ROOT:
- case PROC_TGID_ROOT:
- inode->i_op = &proc_pid_link_inode_operations;
- ei->op.proc_get_link = proc_root_link;
- break;
- case PROC_TID_ENVIRON:
- case PROC_TGID_ENVIRON:
- inode->i_fop = &proc_info_file_operations;
- ei->op.proc_read = proc_pid_environ;
- break;
- case PROC_TID_AUXV:
- case PROC_TGID_AUXV:
- inode->i_fop = &proc_info_file_operations;
- ei->op.proc_read = proc_pid_auxv;
- break;
- case PROC_TID_STATUS:
- case PROC_TGID_STATUS:
- inode->i_fop = &proc_info_file_operations;
- ei->op.proc_read = proc_pid_status;
- break;
- case PROC_TID_STAT:
- inode->i_fop = &proc_info_file_operations;
- ei->op.proc_read = proc_tid_stat;
- break;
- case PROC_TGID_STAT:
- inode->i_fop = &proc_info_file_operations;
- ei->op.proc_read = proc_tgid_stat;
- break;
- case PROC_TID_CMDLINE:
- case PROC_TGID_CMDLINE:
- inode->i_fop = &proc_info_file_operations;
- ei->op.proc_read = proc_pid_cmdline;
- break;
- case PROC_TID_STATM:
- case PROC_TGID_STATM:
- inode->i_fop = &proc_info_file_operations;
- ei->op.proc_read = proc_pid_statm;
- break;
- case PROC_TID_MAPS:
- case PROC_TGID_MAPS:
- inode->i_fop = &proc_maps_operations;
- break;
+out:
+ put_task_struct(task);
+out_no_task:
+ return error;
+}
+
+static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+ struct task_struct *task, struct pid_entry *p)
+{
+ return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
+ proc_base_instantiate, task, p);
+}
+
+/*
+ * Thread groups
+ */
+static struct file_operations proc_task_operations;
+static struct inode_operations proc_task_inode_operations;
+
+static struct pid_entry tgid_base_stuff[] = {
+ DIR("task", S_IRUGO|S_IXUGO, task),
+ DIR("fd", S_IRUSR|S_IXUSR, fd),
+ INF("environ", S_IRUSR, pid_environ),
+ INF("auxv", S_IRUSR, pid_auxv),
+ INF("status", S_IRUGO, pid_status),
+ INF("cmdline", S_IRUGO, pid_cmdline),
+ INF("stat", S_IRUGO, tgid_stat),
+ INF("statm", S_IRUGO, pid_statm),
+ REG("maps", S_IRUGO, maps),
#ifdef CONFIG_NUMA
- case PROC_TID_NUMA_MAPS:
- case PROC_TGID_NUMA_MAPS:
- inode->i_fop = &proc_numa_maps_operations;
- break;
+ REG("numa_maps", S_IRUGO, numa_maps),
#endif
- case PROC_TID_MEM:
- case PROC_TGID_MEM:
- inode->i_fop = &proc_mem_operations;
- break;
+ REG("mem", S_IRUSR|S_IWUSR, mem),
#ifdef CONFIG_SECCOMP
- case PROC_TID_SECCOMP:
- case PROC_TGID_SECCOMP:
- inode->i_fop = &proc_seccomp_operations;
- break;
-#endif /* CONFIG_SECCOMP */
- case PROC_TID_MOUNTS:
- case PROC_TGID_MOUNTS:
- inode->i_fop = &proc_mounts_operations;
- break;
+ REG("seccomp", S_IRUSR|S_IWUSR, seccomp),
+#endif
+ LNK("cwd", cwd),
+ LNK("root", root),
+ LNK("exe", exe),
+ REG("mounts", S_IRUGO, mounts),
+ REG("mountstats", S_IRUSR, mountstats),
#ifdef CONFIG_MMU
- case PROC_TID_SMAPS:
- case PROC_TGID_SMAPS:
- inode->i_fop = &proc_smaps_operations;
- break;
+ REG("smaps", S_IRUGO, smaps),
#endif
- case PROC_TID_MOUNTSTATS:
- case PROC_TGID_MOUNTSTATS:
- inode->i_fop = &proc_mountstats_operations;
- break;
#ifdef CONFIG_SECURITY
- case PROC_TID_ATTR:
- inode->i_nlink = 2;
- inode->i_op = &proc_tid_attr_inode_operations;
- inode->i_fop = &proc_tid_attr_operations;
- break;
- case PROC_TGID_ATTR:
- inode->i_nlink = 2;
- inode->i_op = &proc_tgid_attr_inode_operations;
- inode->i_fop = &proc_tgid_attr_operations;
- break;
- case PROC_TID_ATTR_CURRENT:
- case PROC_TGID_ATTR_CURRENT:
- case PROC_TID_ATTR_PREV:
- case PROC_TGID_ATTR_PREV:
- case PROC_TID_ATTR_EXEC:
- case PROC_TGID_ATTR_EXEC:
- case PROC_TID_ATTR_FSCREATE:
- case PROC_TGID_ATTR_FSCREATE:
- case PROC_TID_ATTR_KEYCREATE:
- case PROC_TGID_ATTR_KEYCREATE:
- case PROC_TID_ATTR_SOCKCREATE:
- case PROC_TGID_ATTR_SOCKCREATE:
- inode->i_fop = &proc_pid_attr_operations;
- break;
+ DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
#endif
#ifdef CONFIG_KALLSYMS
- case PROC_TID_WCHAN:
- case PROC_TGID_WCHAN:
- inode->i_fop = &proc_info_file_operations;
- ei->op.proc_read = proc_pid_wchan;
- break;
+ INF("wchan", S_IRUGO, pid_wchan),
#endif
#ifdef CONFIG_SCHEDSTATS
- case PROC_TID_SCHEDSTAT:
- case PROC_TGID_SCHEDSTAT:
- inode->i_fop = &proc_info_file_operations;
- ei->op.proc_read = proc_pid_schedstat;
- break;
+ INF("schedstat", S_IRUGO, pid_schedstat),
#endif
#ifdef CONFIG_CPUSETS
- case PROC_TID_CPUSET:
- case PROC_TGID_CPUSET:
- inode->i_fop = &proc_cpuset_operations;
- break;
+ REG("cpuset", S_IRUGO, cpuset),
#endif
- case PROC_TID_OOM_SCORE:
- case PROC_TGID_OOM_SCORE:
- inode->i_fop = &proc_info_file_operations;
- ei->op.proc_read = proc_oom_score;
- break;
- case PROC_TID_OOM_ADJUST:
- case PROC_TGID_OOM_ADJUST:
- inode->i_fop = &proc_oom_adjust_operations;
- break;
+ INF("oom_score", S_IRUGO, oom_score),
+ REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust),
#ifdef CONFIG_AUDITSYSCALL
- case PROC_TID_LOGINUID:
- case PROC_TGID_LOGINUID:
- inode->i_fop = &proc_loginuid_operations;
- break;
+ REG("loginuid", S_IWUSR|S_IRUGO, loginuid),
#endif
- default:
- printk("procfs: impossible type (%d)",p->type);
- iput(inode);
- error = ERR_PTR(-EINVAL);
- goto out;
- }
- dentry->d_op = &pid_dentry_operations;
- d_add(dentry, inode);
- /* Close the race of the process dying before we return the dentry */
- if (pid_revalidate(dentry, NULL))
- error = NULL;
-out:
- put_task_struct(task);
-out_no_task:
- return error;
-}
-
-static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
- return proc_pident_lookup(dir, dentry, tgid_base_stuff);
-}
-
-static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
- return proc_pident_lookup(dir, dentry, tid_base_stuff);
-}
-
-static struct file_operations proc_tgid_base_operations = {
- .read = generic_read_dir,
- .readdir = proc_tgid_base_readdir,
};
-static struct file_operations proc_tid_base_operations = {
- .read = generic_read_dir,
- .readdir = proc_tid_base_readdir,
-};
-
-static struct inode_operations proc_tgid_base_inode_operations = {
- .lookup = proc_tgid_base_lookup,
- .getattr = pid_getattr,
- .setattr = proc_setattr,
-};
-
-static struct inode_operations proc_tid_base_inode_operations = {
- .lookup = proc_tid_base_lookup,
- .getattr = pid_getattr,
- .setattr = proc_setattr,
-};
-
-#ifdef CONFIG_SECURITY
-static int proc_tgid_attr_readdir(struct file * filp,
- void * dirent, filldir_t filldir)
-{
- return proc_pident_readdir(filp,dirent,filldir,
- tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff));
-}
-
-static int proc_tid_attr_readdir(struct file * filp,
+static int proc_tgid_base_readdir(struct file * filp,
void * dirent, filldir_t filldir)
{
return proc_pident_readdir(filp,dirent,filldir,
- tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff));
+ tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
}
-static struct file_operations proc_tgid_attr_operations = {
- .read = generic_read_dir,
- .readdir = proc_tgid_attr_readdir,
-};
-
-static struct file_operations proc_tid_attr_operations = {
+static struct file_operations proc_tgid_base_operations = {
.read = generic_read_dir,
- .readdir = proc_tid_attr_readdir,
+ .readdir = proc_tgid_base_readdir,
};
-static struct dentry *proc_tgid_attr_lookup(struct inode *dir,
- struct dentry *dentry, struct nameidata *nd)
-{
- return proc_pident_lookup(dir, dentry, tgid_attr_stuff);
-}
-
-static struct dentry *proc_tid_attr_lookup(struct inode *dir,
- struct dentry *dentry, struct nameidata *nd)
-{
- return proc_pident_lookup(dir, dentry, tid_attr_stuff);
+static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
+ return proc_pident_lookup(dir, dentry,
+ tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
}
-static struct inode_operations proc_tgid_attr_inode_operations = {
- .lookup = proc_tgid_attr_lookup,
- .getattr = pid_getattr,
- .setattr = proc_setattr,
-};
-
-static struct inode_operations proc_tid_attr_inode_operations = {
- .lookup = proc_tid_attr_lookup,
+static struct inode_operations proc_tgid_base_inode_operations = {
+ .lookup = proc_tgid_base_lookup,
.getattr = pid_getattr,
.setattr = proc_setattr,
};
-#endif
-
-/*
- * /proc/self:
- */
-static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
- int buflen)
-{
- char tmp[PROC_NUMBUF];
- sprintf(tmp, "%d", current->tgid);
- return vfs_readlink(dentry,buffer,buflen,tmp);
-}
-
-static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- char tmp[PROC_NUMBUF];
- sprintf(tmp, "%d", current->tgid);
- return ERR_PTR(vfs_follow_link(nd,tmp));
-}
-
-static struct inode_operations proc_self_inode_operations = {
- .readlink = proc_self_readlink,
- .follow_link = proc_self_follow_link,
-};
/**
* proc_flush_task - Remove dcache entries for @task from the /proc dcache.
@@ -2022,54 +1880,23 @@ out:
return;
}
-/* SMP-safe */
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+struct dentry *proc_pid_instantiate(struct inode *dir,
+ struct dentry * dentry, struct task_struct *task, void *ptr)
{
- struct dentry *result = ERR_PTR(-ENOENT);
- struct task_struct *task;
+ struct dentry *error = ERR_PTR(-ENOENT);
struct inode *inode;
- struct proc_inode *ei;
- unsigned tgid;
-
- if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
- inode = new_inode(dir->i_sb);
- if (!inode)
- return ERR_PTR(-ENOMEM);
- ei = PROC_I(inode);
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
- inode->i_ino = fake_ino(0, PROC_TGID_INO);
- ei->pde = NULL;
- inode->i_mode = S_IFLNK|S_IRWXUGO;
- inode->i_uid = inode->i_gid = 0;
- inode->i_size = 64;
- inode->i_op = &proc_self_inode_operations;
- d_add(dentry, inode);
- return NULL;
- }
- tgid = name_to_int(dentry);
- if (tgid == ~0U)
- goto out;
- rcu_read_lock();
- task = find_task_by_pid(tgid);
- if (task)
- get_task_struct(task);
- rcu_read_unlock();
- if (!task)
- goto out;
-
- inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
+ inode = proc_pid_make_inode(dir->i_sb, task);
if (!inode)
- goto out_put_task;
+ goto out;
inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
inode->i_op = &proc_tgid_base_inode_operations;
inode->i_fop = &proc_tgid_base_operations;
inode->i_flags|=S_IMMUTABLE;
-#ifdef CONFIG_SECURITY
- inode->i_nlink = 5;
-#else
inode->i_nlink = 4;
+#ifdef CONFIG_SECURITY
+ inode->i_nlink += 1;
#endif
dentry->d_op = &pid_dentry_operations;
@@ -2077,179 +1904,251 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, NULL))
- result = NULL;
-
-out_put_task:
- put_task_struct(task);
+ error = NULL;
out:
- return result;
+ return error;
}
-/* SMP-safe */
-static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
{
struct dentry *result = ERR_PTR(-ENOENT);
struct task_struct *task;
- struct task_struct *leader = get_proc_task(dir);
- struct inode *inode;
- unsigned tid;
+ unsigned tgid;
- if (!leader)
- goto out_no_task;
+ result = proc_base_lookup(dir, dentry);
+ if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
+ goto out;
- tid = name_to_int(dentry);
- if (tid == ~0U)
+ tgid = name_to_int(dentry);
+ if (tgid == ~0U)
goto out;
rcu_read_lock();
- task = find_task_by_pid(tid);
+ task = find_task_by_pid(tgid);
if (task)
get_task_struct(task);
rcu_read_unlock();
if (!task)
goto out;
- if (leader->tgid != task->tgid)
- goto out_drop_task;
-
- inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
-
-
- if (!inode)
- goto out_drop_task;
- inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
- inode->i_op = &proc_tid_base_inode_operations;
- inode->i_fop = &proc_tid_base_operations;
- inode->i_flags|=S_IMMUTABLE;
-#ifdef CONFIG_SECURITY
- inode->i_nlink = 4;
-#else
- inode->i_nlink = 3;
-#endif
-
- dentry->d_op = &pid_dentry_operations;
-
- d_add(dentry, inode);
- /* Close the race of the process dying before we return the dentry */
- if (pid_revalidate(dentry, NULL))
- result = NULL;
-out_drop_task:
+ result = proc_pid_instantiate(dir, dentry, task, NULL);
put_task_struct(task);
out:
- put_task_struct(leader);
-out_no_task:
return result;
}
/*
- * Find the first tgid to return to user space.
- *
- * Usually this is just whatever follows &init_task, but if the users
- * buffer was too small to hold the full list or there was a seek into
- * the middle of the directory we have more work to do.
- *
- * In the case of a short read we start with find_task_by_pid.
+ * Find the first task with tgid >= tgid
*
- * In the case of a seek we start with &init_task and walk nr
- * threads past it.
*/
-static struct task_struct *first_tgid(int tgid, unsigned int nr)
+static struct task_struct *next_tgid(unsigned int tgid)
{
- struct task_struct *pos;
- rcu_read_lock();
- if (tgid && nr) {
- pos = find_task_by_pid(tgid);
- if (pos && thread_group_leader(pos))
- goto found;
- }
- /* If nr exceeds the number of processes get out quickly */
- pos = NULL;
- if (nr && nr >= nr_processes())
- goto done;
+ struct task_struct *task;
+ struct pid *pid;
- /* If we haven't found our starting place yet start with
- * the init_task and walk nr tasks forward.
- */
- for (pos = next_task(&init_task); nr > 0; --nr) {
- pos = next_task(pos);
- if (pos == &init_task) {
- pos = NULL;
- goto done;
- }
+ rcu_read_lock();
+retry:
+ task = NULL;
+ pid = find_ge_pid(tgid);
+ if (pid) {
+ tgid = pid->nr + 1;
+ task = pid_task(pid, PIDTYPE_PID);
+ /* What we to know is if the pid we have find is the
+ * pid of a thread_group_leader. Testing for task
+ * being a thread_group_leader is the obvious thing
+ * todo but there is a window when it fails, due to
+ * the pid transfer logic in de_thread.
+ *
+ * So we perform the straight forward test of seeing
+ * if the pid we have found is the pid of a thread
+ * group leader, and don't worry if the task we have
+ * found doesn't happen to be a thread group leader.
+ * As we don't care in the case of readdir.
+ */
+ if (!task || !has_group_leader_pid(task))
+ goto retry;
+ get_task_struct(task);
}
-found:
- get_task_struct(pos);
-done:
rcu_read_unlock();
- return pos;
+ return task;
}
-/*
- * Find the next task in the task list.
- * Return NULL if we loop or there is any error.
- *
- * The reference to the input task_struct is released.
- */
-static struct task_struct *next_tgid(struct task_struct *start)
+#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
+
+static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+ struct task_struct *task, int tgid)
{
- struct task_struct *pos;
- rcu_read_lock();
- pos = start;
- if (pid_alive(start))
- pos = next_task(start);
- if (pid_alive(pos) && (pos != &init_task)) {
- get_task_struct(pos);
- goto done;
- }
- pos = NULL;
-done:
- rcu_read_unlock();
- put_task_struct(start);
- return pos;
+ char name[PROC_NUMBUF];
+ int len = snprintf(name, sizeof(name), "%d", tgid);
+ return proc_fill_cache(filp, dirent, filldir, name, len,
+ proc_pid_instantiate, task, NULL);
}
/* for the /proc/ directory itself, after non-process stuff has been done */
int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
- char buf[PROC_NUMBUF];
unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
+ struct task_struct *reaper = get_proc_task(filp->f_dentry->d_inode);
struct task_struct *task;
int tgid;
- if (!nr) {
- ino_t ino = fake_ino(0,PROC_TGID_INO);
- if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
- return 0;
- filp->f_pos++;
- nr++;
+ if (!reaper)
+ goto out_no_task;
+
+ for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
+ struct pid_entry *p = &proc_base_stuff[nr];
+ if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
+ goto out;
}
- nr -= 1;
- /* f_version caches the tgid value that the last readdir call couldn't
- * return. lseek aka telldir automagically resets f_version to 0.
- */
- tgid = filp->f_version;
- filp->f_version = 0;
- for (task = first_tgid(tgid, nr);
+ tgid = filp->f_pos - TGID_OFFSET;
+ for (task = next_tgid(tgid);
task;
- task = next_tgid(task), filp->f_pos++) {
- int len;
- ino_t ino;
+ put_task_struct(task), task = next_tgid(tgid + 1)) {
tgid = task->pid;
- len = snprintf(buf, sizeof(buf), "%d", tgid);
- ino = fake_ino(tgid, PROC_TGID_INO);
- if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) {
- /* returning this tgid failed, save it as the first
- * pid for the next readir call */
- filp->f_version = tgid;
+ filp->f_pos = tgid + TGID_OFFSET;
+ if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) {
put_task_struct(task);
- break;
+ goto out;
}
}
+ filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
+out:
+ put_task_struct(reaper);
+out_no_task:
return 0;
}
/*
+ * Tasks
+ */
+static struct pid_entry tid_base_stuff[] = {
+ DIR("fd", S_IRUSR|S_IXUSR, fd),
+ INF("environ", S_IRUSR, pid_environ),
+ INF("auxv", S_IRUSR, pid_auxv),
+ INF("status", S_IRUGO, pid_status),
+ INF("cmdline", S_IRUGO, pid_cmdline),
+ INF("stat", S_IRUGO, tid_stat),
+ INF("statm", S_IRUGO, pid_statm),
+ REG("maps", S_IRUGO, maps),
+#ifdef CONFIG_NUMA
+ REG("numa_maps", S_IRUGO, numa_maps),
+#endif
+ REG("mem", S_IRUSR|S_IWUSR, mem),
+#ifdef CONFIG_SECCOMP
+ REG("seccomp", S_IRUSR|S_IWUSR, seccomp),
+#endif
+ LNK("cwd", cwd),
+ LNK("root", root),
+ LNK("exe", exe),
+ REG("mounts", S_IRUGO, mounts),
+#ifdef CONFIG_MMU
+ REG("smaps", S_IRUGO, smaps),
+#endif
+#ifdef CONFIG_SECURITY
+ DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
+#endif
+#ifdef CONFIG_KALLSYMS
+ INF("wchan", S_IRUGO, pid_wchan),
+#endif
+#ifdef CONFIG_SCHEDSTATS
+ INF("schedstat", S_IRUGO, pid_schedstat),
+#endif
+#ifdef CONFIG_CPUSETS
+ REG("cpuset", S_IRUGO, cpuset),
+#endif
+ INF("oom_score", S_IRUGO, oom_score),
+ REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust),
+#ifdef CONFIG_AUDITSYSCALL
+ REG("loginuid", S_IWUSR|S_IRUGO, loginuid),
+#endif
+};
+
+static int proc_tid_base_readdir(struct file * filp,
+ void * dirent, filldir_t filldir)
+{
+ return proc_pident_readdir(filp,dirent,filldir,
+ tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
+}
+
+static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
+ return proc_pident_lookup(dir, dentry,
+ tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
+}
+
+static struct file_operations proc_tid_base_operations = {
+ .read = generic_read_dir,
+ .readdir = proc_tid_base_readdir,
+};
+
+static struct inode_operations proc_tid_base_inode_operations = {
+ .lookup = proc_tid_base_lookup,
+ .getattr = pid_getattr,
+ .setattr = proc_setattr,
+};
+
+static struct dentry *proc_task_instantiate(struct inode *dir,
+ struct dentry *dentry, struct task_struct *task, void *ptr)
+{
+ struct dentry *error = ERR_PTR(-ENOENT);
+ struct inode *inode;
+ inode = proc_pid_make_inode(dir->i_sb, task);
+
+ if (!inode)
+ goto out;
+ inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
+ inode->i_op = &proc_tid_base_inode_operations;
+ inode->i_fop = &proc_tid_base_operations;
+ inode->i_flags|=S_IMMUTABLE;
+ inode->i_nlink = 3;
+#ifdef CONFIG_SECURITY
+ inode->i_nlink += 1;
+#endif
+
+ dentry->d_op = &pid_dentry_operations;
+
+ d_add(dentry, inode);
+ /* Close the race of the process dying before we return the dentry */
+ if (pid_revalidate(dentry, NULL))
+ error = NULL;
+out:
+ return error;
+}
+
+static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+{
+ struct dentry *result = ERR_PTR(-ENOENT);
+ struct task_struct *task;
+ struct task_struct *leader = get_proc_task(dir);
+ unsigned tid;
+
+ if (!leader)
+ goto out_no_task;
+
+ tid = name_to_int(dentry);
+ if (tid == ~0U)
+ goto out;
+
+ rcu_read_lock();
+ task = find_task_by_pid(tid);
+ if (task)
+ get_task_struct(task);
+ rcu_read_unlock();
+ if (!task)
+ goto out;
+ if (leader->tgid != task->tgid)
+ goto out_drop_task;
+
+ result = proc_task_instantiate(dir, dentry, task, NULL);
+out_drop_task:
+ put_task_struct(task);
+out:
+ put_task_struct(leader);
+out_no_task:
+ return result;
+}
+
+/*
* Find the first tid of a thread group to return to user space.
*
* Usually this is just the thread group leader, but if the users
@@ -2318,10 +2217,18 @@ static struct task_struct *next_tid(struct task_struct *start)
return pos;
}
+static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+ struct task_struct *task, int tid)
+{
+ char name[PROC_NUMBUF];
+ int len = snprintf(name, sizeof(name), "%d", tid);
+ return proc_fill_cache(filp, dirent, filldir, name, len,
+ proc_task_instantiate, task, NULL);
+}
+
/* for the /proc/TGID/task/ directories */
static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
- char buf[PROC_NUMBUF];
struct dentry *dentry = filp->f_dentry;
struct inode *inode = dentry->d_inode;
struct task_struct *leader = get_proc_task(inode);
@@ -2358,11 +2265,8 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
for (task = first_tid(leader, tid, pos - 2);
task;
task = next_tid(task), pos++) {
- int len;
tid = task->pid;
- len = snprintf(buf, sizeof(buf), "%d", tid);
- ino = fake_ino(tid, PROC_TID_INO);
- if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) {
+ if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
/* returning this tgid failed, save it as the first
* pid for the next readir call */
filp->f_version = tid;
@@ -2392,3 +2296,14 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
return 0;
}
+
+static struct inode_operations proc_task_inode_operations = {
+ .lookup = proc_task_lookup,
+ .getattr = proc_task_getattr,
+ .setattr = proc_setattr,
+};
+
+static struct file_operations proc_task_operations = {
+ .read = generic_read_dir,
+ .readdir = proc_task_readdir,
+};
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 66bc425f2f3..8d88e58ed5c 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -45,6 +45,7 @@
#include <linux/sysrq.h>
#include <linux/vmalloc.h>
#include <linux/crash_dump.h>
+#include <linux/pspace.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/io.h>
@@ -91,7 +92,7 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
LOAD_INT(a), LOAD_FRAC(a),
LOAD_INT(b), LOAD_FRAC(b),
LOAD_INT(c), LOAD_FRAC(c),
- nr_running(), nr_threads, last_pid);
+ nr_running(), nr_threads, init_pspace.last_pid);
return proc_calc_metrics(page, start, off, count, eof, len);
}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 8901c65caca..ffe66c38488 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -16,6 +16,7 @@
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/smp_lock.h>
+#include <linux/mount.h>
#include "internal.h"
@@ -28,6 +29,17 @@ struct proc_dir_entry *proc_sys_root;
static int proc_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
+ if (proc_mnt) {
+ /* Seed the root directory with a pid so it doesn't need
+ * to be special in base.c. I would do this earlier but
+ * the only task alive when /proc is mounted the first time
+ * is the init_task and it doesn't have any pids.
+ */
+ struct proc_inode *ei;
+ ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
+ if (!ei->pid)
+ ei->pid = find_get_pid(1);
+ }
return get_sb_single(fs_type, flags, data, proc_fill_super, mnt);
}
diff --git a/include/asm-alpha/unistd.h b/include/asm-alpha/unistd.h
index bc6e6a9259d..2cabbd465c0 100644
--- a/include/asm-alpha/unistd.h
+++ b/include/asm-alpha/unistd.h
@@ -580,75 +580,6 @@ type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5, type6 arg6)\
#define __ARCH_WANT_SYS_OLDUMOUNT
#define __ARCH_WANT_SYS_SIGPENDING
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/signal.h>
-#include <linux/syscalls.h>
-#include <asm/ptrace.h>
-
-static inline long open(const char * name, int mode, int flags)
-{
- return sys_open(name, mode, flags);
-}
-
-static inline long dup(int fd)
-{
- return sys_dup(fd);
-}
-
-static inline long close(int fd)
-{
- return sys_close(fd);
-}
-
-static inline off_t lseek(int fd, off_t off, int whence)
-{
- return sys_lseek(fd, off, whence);
-}
-
-static inline void _exit(int value)
-{
- sys_exit(value);
-}
-
-#define exit(x) _exit(x)
-
-static inline long write(int fd, const char * buf, size_t nr)
-{
- return sys_write(fd, buf, nr);
-}
-
-static inline long read(int fd, char * buf, size_t nr)
-{
- return sys_read(fd, buf, nr);
-}
-
-extern int execve(char *, char **, char **);
-
-static inline long setsid(void)
-{
- return sys_setsid();
-}
-
-static inline pid_t waitpid(int pid, int * wait_stat, int flags)
-{
- return sys_wait4(pid, wait_stat, flags, NULL);
-}
-
-asmlinkage int sys_execve(char *ufilename, char **argv, char **envp,
- unsigned long a3, unsigned long a4, unsigned long a5,
- struct pt_regs regs);
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize,
- void *restorer);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
/* "Conditional" syscalls. What we want is
__attribute__((weak,alias("sys_ni_syscall")))
diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h
index 2ab4078334b..14a87eec5a2 100644
--- a/include/asm-arm/unistd.h
+++ b/include/asm-arm/unistd.h
@@ -549,30 +549,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6
#define __ARCH_WANT_SYS_SOCKETCALL
#endif
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/syscalls.h>
-
-extern long execve(const char *file, char **argv, char **envp);
-
-struct pt_regs;
-asmlinkage int sys_execve(char *filenamei, char **argv, char **envp,
- struct pt_regs *regs);
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
- struct pt_regs *regs);
-asmlinkage int sys_fork(struct pt_regs *regs);
-asmlinkage int sys_vfork(struct pt_regs *regs);
-asmlinkage int sys_pipe(unsigned long *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
/*
* "Conditional" syscalls
*
diff --git a/include/asm-arm26/unistd.h b/include/asm-arm26/unistd.h
index c6d2436c9d3..25a5eead85b 100644
--- a/include/asm-arm26/unistd.h
+++ b/include/asm-arm26/unistd.h
@@ -464,30 +464,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_SYS_RT_SIGACTION
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/syscalls.h>
-
-extern long execve(const char *file, char **argv, char **envp);
-
-struct pt_regs;
-asmlinkage int sys_execve(char *filenamei, char **argv, char **envp,
- struct pt_regs *regs);
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
- struct pt_regs *regs);
-asmlinkage int sys_fork(struct pt_regs *regs);
-asmlinkage int sys_vfork(struct pt_regs *regs);
-asmlinkage int sys_pipe(unsigned long *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
/*
* "Conditional" syscalls
*
diff --git a/include/asm-avr32/unistd.h b/include/asm-avr32/unistd.h
index 1f528f92690..a50e5004550 100644
--- a/include/asm-avr32/unistd.h
+++ b/include/asm-avr32/unistd.h
@@ -281,30 +281,10 @@
#define __NR_tee 263
#define __NR_vmsplice 264
+#ifdef __KERNEL__
#define NR_syscalls 265
-/*
- * AVR32 calling convention for system calls:
- * - System call number in r8
- * - Parameters in r12 and downwards to r9 as well as r6 and r5.
- * - Return value in r12
- */
-
-/*
- * user-visible error numbers are in the range -1 - -124: see
- * <asm-generic/errno.h>
- */
-
-#define __syscall_return(type, res) do { \
- if ((unsigned long)(res) >= (unsigned long)(-125)) { \
- errno = -(res); \
- res = -1; \
- } \
- return (type) (res); \
- } while (0)
-
-#ifdef __KERNEL__
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_ALARM
@@ -319,62 +299,6 @@
#define __ARCH_WANT_SYS_GETPGRP
#define __ARCH_WANT_SYS_RT_SIGACTION
#define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#endif
-
-#if defined(__KERNEL_SYSCALLS__) || defined(__CHECKER__)
-
-#include <linux/types.h>
-#include <linux/linkage.h>
-#include <asm/signal.h>
-
-struct pt_regs;
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-static inline int execve(const char *file, char **argv, char **envp)
-{
- register long scno asm("r8") = __NR_execve;
- register long sc1 asm("r12") = (long)file;
- register long sc2 asm("r11") = (long)argv;
- register long sc3 asm("r10") = (long)envp;
- int res;
-
- asm volatile("scall"
- : "=r"(sc1)
- : "r"(scno), "0"(sc1), "r"(sc2), "r"(sc3)
- : "lr", "memory");
- res = sc1;
- __syscall_return(int, res);
-}
-
-asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
-asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
- struct pt_regs *regs);
-asmlinkage int sys_rt_sigreturn(struct pt_regs *regs);
-asmlinkage int sys_pipe(unsigned long __user *filedes);
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, off_t offset);
-asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len);
-asmlinkage int sys_fork(struct pt_regs *regs);
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
- unsigned long parent_tidptr,
- unsigned long child_tidptr, struct pt_regs *regs);
-asmlinkage int sys_vfork(struct pt_regs *regs);
-asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv,
- char __user *__user *uenvp, struct pt_regs *regs);
-
-#endif
/*
* "Conditional" syscalls
@@ -384,4 +308,6 @@ asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv,
*/
#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall");
+#endif /* __KERNEL__ */
+
#endif /* __ASM_AVR32_UNISTD_H */
diff --git a/include/asm-cris/unistd.h b/include/asm-cris/unistd.h
index 7372efae051..7c90fa970c3 100644
--- a/include/asm-cris/unistd.h
+++ b/include/asm-cris/unistd.h
@@ -322,67 +322,6 @@
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_SYS_RT_SIGACTION
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/linkage.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static inline _syscall0(pid_t,setsid)
-static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-static inline _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static inline _syscall1(int,dup,int,fd)
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-static inline _syscall3(int,open,const char *,file,int,flag,int,mode)
-static inline _syscall1(int,close,int,fd)
-
-struct pt_regs;
-asmlinkage long sys_mmap2(
- unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(const char *fname, char **argv, char **envp,
- long r13, long mof, long srp, struct pt_regs *regs);
-asmlinkage int sys_clone(unsigned long newusp, unsigned long flags,
- int* parent_tid, int* child_tid, long mof, long srp,
- struct pt_regs *regs);
-asmlinkage int sys_fork(long r10, long r11, long r12, long r13,
- long mof, long srp, struct pt_regs *regs);
-asmlinkage int sys_vfork(long r10, long r11, long r12, long r13,
- long mof, long srp, struct pt_regs *regs);
-asmlinkage int sys_pipe(unsigned long __user *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize);
-
-/*
- * Since we define it "external", it collides with the built-in
- * definition, which has the "noreturn" attribute and will cause
- * complaints. We don't want to use -fno-builtin, so just use a
- * different name when in the kernel.
- */
-#define _exit kernel_syscall_exit
-static inline _syscall1(int,_exit,int,exitcode)
-static inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-#endif /* __KERNEL_SYSCALLS__ */
-
-
/*
* "Conditional" syscalls
*
diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h
index d104d1b91d3..725e854928c 100644
--- a/include/asm-frv/unistd.h
+++ b/include/asm-frv/unistd.h
@@ -440,31 +440,6 @@ type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg
__syscall_return(type, __sc0); \
}
-
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-
-#endif /* __KERNEL_SYSCALLS__ */
-
#define __ARCH_WANT_IPC_PARSE_VERSION
/* #define __ARCH_WANT_OLD_READDIR */
#define __ARCH_WANT_OLD_STAT
diff --git a/include/asm-h8300/unistd.h b/include/asm-h8300/unistd.h
index a2dd90462d8..747788d629a 100644
--- a/include/asm-h8300/unistd.h
+++ b/include/asm-h8300/unistd.h
@@ -485,57 +485,6 @@ type name(atype a, btype b, ctype c, dtype d, etype e, ftype f) \
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_SYS_RT_SIGACTION
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static inline _syscall0(int,pause)
-static inline _syscall0(int,sync)
-static inline _syscall0(pid_t,setsid)
-static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-static inline _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static inline _syscall1(int,dup,int,fd)
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-static inline _syscall3(int,open,const char *,file,int,flag,int,mode)
-static inline _syscall1(int,close,int,fd)
-static inline _syscall1(int,_exit,int,exitcode)
-static inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-static inline _syscall1(int,delete_module,const char *,name)
-
-static inline pid_t wait(int * wait_stat)
-{
- return waitpid(-1,wait_stat,0);
-}
-
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(char *name, char **argv, char **envp,
- int dummy, ...);
-asmlinkage int sys_pipe(unsigned long *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
/*
* "Conditional" syscalls
*/
diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h
index 2a9e4ee5904..592ffeeda45 100644
--- a/include/asm-i386/bugs.h
+++ b/include/asm-i386/bugs.h
@@ -189,6 +189,6 @@ static void __init check_bugs(void)
check_fpu();
check_hlt();
check_popad();
- system_utsname.machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+ init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
alternative_instructions();
}
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h
index db4344d9f73..3a05436f31c 100644
--- a/include/asm-i386/elf.h
+++ b/include/asm-i386/elf.h
@@ -112,7 +112,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
For the moment, we have only optimizations for the Intel generations,
but that could change... */
-#define ELF_PLATFORM (system_utsname.machine)
+#define ELF_PLATFORM (utsname()->machine)
#define SET_PERSONALITY(ex, ibcs2) do { } while (0)
diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h
index a4a0e5207db..d505f501077 100644
--- a/include/asm-i386/ptrace.h
+++ b/include/asm-i386/ptrace.h
@@ -47,7 +47,10 @@ static inline int user_mode_vm(struct pt_regs *regs)
{
return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL;
}
+
#define instruction_pointer(regs) ((regs)->eip)
+#define regs_return_value(regs) ((regs)->eax)
+
extern unsigned long profile_pc(struct pt_regs *regs);
#endif /* __KERNEL__ */
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index bd9987087ad..3ca7ab963d7 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -451,45 +451,6 @@ __syscall_return(type,__res); \
#define __ARCH_WANT_SYS_RT_SIGACTION
#define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-
-asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount);
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(struct pt_regs regs);
-asmlinkage int sys_clone(struct pt_regs regs);
-asmlinkage int sys_fork(struct pt_regs regs);
-asmlinkage int sys_vfork(struct pt_regs regs);
-asmlinkage int sys_pipe(unsigned long __user *fildes);
-asmlinkage long sys_iopl(unsigned long unused);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
/*
* "Conditional" syscalls
*
diff --git a/include/asm-ia64/ptrace.h b/include/asm-ia64/ptrace.h
index 1414316efd4..f4ef87a3623 100644
--- a/include/asm-ia64/ptrace.h
+++ b/include/asm-ia64/ptrace.h
@@ -241,6 +241,9 @@ struct switch_stack {
* the canonical representation by adding to instruction pointer.
*/
# define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri)
+
+#define regs_return_value(regs) ((regs)->r8)
+
/* Conserve space in histogram by encoding slot bits in address
* bits 2 and 3 rather than bits 0 and 1.
*/
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index bb0eb727dcd..53c5c0ee122 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -319,78 +319,6 @@
extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr);
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/string.h>
-#include <linux/signal.h>
-#include <asm/ptrace.h>
-#include <linux/stringify.h>
-#include <linux/syscalls.h>
-
-static inline long
-open (const char * name, int mode, int flags)
-{
- return sys_open(name, mode, flags);
-}
-
-static inline long
-dup (int fd)
-{
- return sys_dup(fd);
-}
-
-static inline long
-close (int fd)
-{
- return sys_close(fd);
-}
-
-static inline off_t
-lseek (int fd, off_t off, int whence)
-{
- return sys_lseek(fd, off, whence);
-}
-
-static inline void
-_exit (int value)
-{
- sys_exit(value);
-}
-
-#define exit(x) _exit(x)
-
-static inline long
-write (int fd, const char * buf, size_t nr)
-{
- return sys_write(fd, buf, nr);
-}
-
-static inline long
-read (int fd, char * buf, size_t nr)
-{
- return sys_read(fd, buf, nr);
-}
-
-
-static inline long
-setsid (void)
-{
- return sys_setsid();
-}
-
-static inline pid_t
-waitpid (int pid, int * wait_stat, int flags)
-{
- return sys_wait4(pid, wait_stat, flags, NULL);
-}
-
-
-extern int execve (const char *filename, char *const av[], char *const ep[]);
-extern pid_t clone (unsigned long flags, void *sp);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
asmlinkage unsigned long sys_mmap(
unsigned long addr, unsigned long len,
int prot, int flags,
diff --git a/include/asm-m32r/unistd.h b/include/asm-m32r/unistd.h
index 5c6a9ac6cf1..95aa34298d8 100644
--- a/include/asm-m32r/unistd.h
+++ b/include/asm-m32r/unistd.h
@@ -424,43 +424,6 @@ __syscall_return(type,__res); \
#define __ARCH_WANT_SYS_OLDUMOUNT
#define __ARCH_WANT_SYS_RT_SIGACTION
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-static __inline__ _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(struct pt_regs regs);
-asmlinkage int sys_clone(struct pt_regs regs);
-asmlinkage int sys_fork(struct pt_regs regs);
-asmlinkage int sys_vfork(struct pt_regs regs);
-asmlinkage int sys_pipe(unsigned long __user *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
/*
* "Conditional" syscalls
*
diff --git a/include/asm-m68k/unistd.h b/include/asm-m68k/unistd.h
index 751632b904d..3ab716f0fc1 100644
--- a/include/asm-m68k/unistd.h
+++ b/include/asm-m68k/unistd.h
@@ -409,12 +409,6 @@ __syscall_return(type,__res); \
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_SYS_RT_SIGACTION
-#ifdef __KERNEL_SYSCALLS__
-
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-
-#endif /* __KERNEL_SYSCALLS__ */
-
/*
* "Conditional" syscalls
*
diff --git a/include/asm-m68knommu/unistd.h b/include/asm-m68knommu/unistd.h
index 21fdc37c5c2..daafb5d43ef 100644
--- a/include/asm-m68knommu/unistd.h
+++ b/include/asm-m68knommu/unistd.h
@@ -463,61 +463,6 @@ type name(atype a, btype b, ctype c, dtype d, etype e) \
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_SYS_RT_SIGACTION
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/interrupt.h>
-#include <linux/types.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static inline _syscall0(int,pause)
-static inline _syscall0(int,sync)
-static inline _syscall0(pid_t,setsid)
-static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-static inline _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static inline _syscall1(int,dup,int,fd)
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-static inline _syscall3(int,open,const char *,file,int,flag,int,mode)
-static inline _syscall1(int,close,int,fd)
-static inline _syscall1(int,_exit,int,exitcode)
-static inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-static inline _syscall1(int,delete_module,const char *,name)
-
-static inline pid_t wait(int * wait_stat)
-{
- return waitpid(-1,wait_stat,0);
-}
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(char *name, char **argv, char **envp);
-asmlinkage int sys_pipe(unsigned long *fildes);
-struct pt_regs;
-int sys_request_irq(unsigned int,
- irqreturn_t (*)(int, void *, struct pt_regs *),
- unsigned long, const char *, void *);
-void sys_free_irq(unsigned int, void *);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
/*
* "Conditional" syscalls
*
diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h
index c39142920fe..685c91467e6 100644
--- a/include/asm-mips/unistd.h
+++ b/include/asm-mips/unistd.h
@@ -1212,45 +1212,6 @@ type name (atype a,btype b,ctype c,dtype d,etype e,ftype f) \
# define __ARCH_WANT_COMPAT_SYS_TIME
# endif
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm/sim.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-
-asmlinkage unsigned long sys_mmap(
- unsigned long addr, size_t len,
- int prot, int flags,
- int fd, off_t offset);
-asmlinkage long sys_mmap2(
- unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(nabi_no_regargs struct pt_regs regs);
-asmlinkage int sys_pipe(nabi_no_regargs struct pt_regs regs);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
#endif /* !__ASSEMBLY__ */
/*
diff --git a/include/asm-parisc/unistd.h b/include/asm-parisc/unistd.h
index 27bcfad1c3e..53b0f5d290e 100644
--- a/include/asm-parisc/unistd.h
+++ b/include/asm-parisc/unistd.h
@@ -952,92 +952,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_SYS_RT_SIGACTION
-/* mmap & mmap2 take 6 arguments */
-#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5,type6,arg6) \
-type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) \
-{ \
- return K_INLINE_SYSCALL(name, 6, arg1, arg2, arg3, arg4, arg5, arg6); \
-}
-
-#ifdef __KERNEL_SYSCALLS__
-
-#include <asm/current.h>
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/syscalls.h>
-
-static inline pid_t setsid(void)
-{
- return sys_setsid();
-}
-
-static inline int write(int fd, const char *buf, off_t count)
-{
- return sys_write(fd, buf, count);
-}
-
-static inline int read(int fd, char *buf, off_t count)
-{
- return sys_read(fd, buf, count);
-}
-
-static inline off_t lseek(int fd, off_t offset, int count)
-{
- return sys_lseek(fd, offset, count);
-}
-
-static inline int dup(int fd)
-{
- return sys_dup(fd);
-}
-
-static inline int execve(char *filename, char * argv [],
- char * envp[])
-{
- extern int __execve(char *, char **, char **, struct task_struct *);
- return __execve(filename, argv, envp, current);
-}
-
-static inline int open(const char *file, int flag, int mode)
-{
- return sys_open(file, flag, mode);
-}
-
-static inline int close(int fd)
-{
- return sys_close(fd);
-}
-
-static inline void _exit(int exitcode)
-{
- sys_exit(exitcode);
-}
-
-static inline pid_t waitpid(pid_t pid, int *wait_stat, int options)
-{
- return sys_wait4(pid, wait_stat, options, NULL);
-}
-
-asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long offset);
-asmlinkage unsigned long sys_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff);
-struct pt_regs;
-asmlinkage int sys_execve(struct pt_regs *regs);
-int sys_clone(unsigned long clone_flags, unsigned long usp,
- struct pt_regs *regs);
-int sys_vfork(struct pt_regs *regs);
-int sys_pipe(int *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
#endif /* __ASSEMBLY__ */
#undef STR
diff --git a/include/asm-powerpc/kprobes.h b/include/asm-powerpc/kprobes.h
index 34e1f89a5fa..2dafa376a63 100644
--- a/include/asm-powerpc/kprobes.h
+++ b/include/asm-powerpc/kprobes.h
@@ -44,6 +44,28 @@ typedef unsigned int kprobe_opcode_t;
#define IS_TDI(instr) (((instr) & 0xfc000000) == 0x08000000)
#define IS_TWI(instr) (((instr) & 0xfc000000) == 0x0c000000)
+/*
+ * 64bit powerpc uses function descriptors.
+ * Handle cases where:
+ * - User passes a <.symbol> or <module:.symbol>
+ * - User passes a <symbol> or <module:symbol>
+ * - User passes a non-existant symbol, kallsyms_lookup_name
+ * returns 0. Don't deref the NULL pointer in that case
+ */
+#define kprobe_lookup_name(name, addr) \
+{ \
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); \
+ if (addr) { \
+ char *colon; \
+ if ((colon = strchr(name, ':')) != NULL) { \
+ colon++; \
+ if (*colon != '\0' && *colon != '.') \
+ addr = *(kprobe_opcode_t **)addr; \
+ } else if (name[0] != '.') \
+ addr = *(kprobe_opcode_t **)addr; \
+ } \
+}
+
#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)((func_descr_t *)pentry)
#define is_trap(instr) (IS_TW(instr) || IS_TD(instr) || \
diff --git a/include/asm-powerpc/ptrace.h b/include/asm-powerpc/ptrace.h
index 4435efe85d0..4ad77a13f86 100644
--- a/include/asm-powerpc/ptrace.h
+++ b/include/asm-powerpc/ptrace.h
@@ -73,6 +73,8 @@ struct pt_regs {
#ifndef __ASSEMBLY__
#define instruction_pointer(regs) ((regs)->nip)
+#define regs_return_value(regs) ((regs)->gpr[3])
+
#ifdef CONFIG_SMP
extern unsigned long profile_pc(struct pt_regs *regs);
#else
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index eb66eae6616..464a48cce7f 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -479,13 +479,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6
#endif
/*
- * System call prototypes.
- */
-#ifdef __KERNEL_SYSCALLS__
-extern int execve(const char *file, char **argv, char **envp);
-#endif /* __KERNEL_SYSCALLS__ */
-
-/*
* "Conditional" syscalls
*
* What we want is __attribute__((weak,alias("sys_ni_syscall"))),
diff --git a/include/asm-s390/ptrace.h b/include/asm-s390/ptrace.h
index 8d2bf65b0b6..7b768c5c68a 100644
--- a/include/asm-s390/ptrace.h
+++ b/include/asm-s390/ptrace.h
@@ -472,6 +472,7 @@ struct user_regs_struct
#define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+#define regs_return_value(regs)((regs)->gprs[2])
#define profile_pc(regs) instruction_pointer(regs)
extern void show_regs(struct pt_regs * regs);
#endif
diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h
index 0361ac5dcde..0cccfd83c45 100644
--- a/include/asm-s390/unistd.h
+++ b/include/asm-s390/unistd.h
@@ -523,57 +523,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
# define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
# endif
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/ptrace.h>
-#include <asm/stat.h>
-#include <linux/syscalls.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static inline _syscall0(pid_t,setsid)
-static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-static inline _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static inline _syscall1(int,dup,int,fd)
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-static inline _syscall3(int,open,const char *,file,int,flag,int,mode)
-static inline _syscall1(int,close,int,fd)
-static inline _syscall2(long,stat,char *,filename,struct stat *,statbuf)
-
-static inline pid_t waitpid(int pid, int *wait_stat, int flags)
-{
- return sys_wait4(pid, wait_stat, flags, NULL);
-}
-struct mmap_arg_struct;
-asmlinkage long sys_mmap2(struct mmap_arg_struct __user *arg);
-
-asmlinkage long sys_execve(struct pt_regs regs);
-asmlinkage long sys_clone(struct pt_regs regs);
-asmlinkage long sys_fork(struct pt_regs regs);
-asmlinkage long sys_vfork(struct pt_regs regs);
-asmlinkage long sys_pipe(unsigned long __user *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
/*
* "Conditional" syscalls
*
diff --git a/include/asm-sh/bugs.h b/include/asm-sh/bugs.h
index b4000c8bf31..beeea40f549 100644
--- a/include/asm-sh/bugs.h
+++ b/include/asm-sh/bugs.h
@@ -18,7 +18,7 @@ static void __init check_bugs(void)
{
extern char *get_cpu_subtype(void);
extern unsigned long loops_per_jiffy;
- char *p= &system_utsname.machine[2]; /* "sh" */
+ char *p= &init_utsname()->machine[2]; /* "sh" */
cpu_data->loops_per_jiffy = loops_per_jiffy;
diff --git a/include/asm-sh/unistd.h b/include/asm-sh/unistd.h
index 5d5e9f94def..f1a0cbc966b 100644
--- a/include/asm-sh/unistd.h
+++ b/include/asm-sh/unistd.h
@@ -472,76 +472,6 @@ __syscall_return(type,__sc0); \
#define __ARCH_WANT_SYS_RT_SIGACTION
#define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static __inline__ _syscall0(int,pause)
-static __inline__ _syscall0(int,sync)
-static __inline__ _syscall0(pid_t,setsid)
-static __inline__ _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-static __inline__ _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static __inline__ _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static __inline__ _syscall1(int,dup,int,fd)
-static __inline__ _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-static __inline__ _syscall3(int,open,const char *,file,int,flag,int,mode)
-static __inline__ _syscall1(int,close,int,fd)
-static __inline__ _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-static __inline__ _syscall1(int,delete_module,const char *,name)
-
-static __inline__ pid_t wait(int * wait_stat)
-{
- return waitpid(-1,wait_stat,0);
-}
-
-asmlinkage long sys_mmap2(
- unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(char *ufilename, char **uargv,
- char **uenvp, unsigned long r7,
- struct pt_regs regs);
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
- unsigned long parent_tidptr,
- unsigned long child_tidptr,
- struct pt_regs regs);
-asmlinkage int sys_fork(unsigned long r4, unsigned long r5,
- unsigned long r6, unsigned long r7,
- struct pt_regs regs);
-asmlinkage int sys_vfork(unsigned long r4, unsigned long r5,
- unsigned long r6, unsigned long r7,
- struct pt_regs regs);
-asmlinkage int sys_pipe(unsigned long r4, unsigned long r5,
- unsigned long r6, unsigned long r7,
- struct pt_regs regs);
-asmlinkage ssize_t sys_pread_wrapper(unsigned int fd, char *buf,
- size_t count, long dummy, loff_t pos);
-asmlinkage ssize_t sys_pwrite_wrapper(unsigned int fd, const char *buf,
- size_t count, long dummy, loff_t pos);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
/*
* "Conditional" syscalls
*
diff --git a/include/asm-sh64/unistd.h b/include/asm-sh64/unistd.h
index c113566bef3..ee7828b27ad 100644
--- a/include/asm-sh64/unistd.h
+++ b/include/asm-sh64/unistd.h
@@ -513,47 +513,6 @@ __syscall_return(type,__sc0); \
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_SYS_RT_SIGACTION
-#ifdef __KERNEL_SYSCALLS__
-
-/* Copy from sh */
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/ptrace.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static inline _syscall0(int,pause)
-static inline _syscall1(int,setup,int,magic)
-static inline _syscall0(int,sync)
-static inline _syscall0(pid_t,setsid)
-static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-static inline _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static inline _syscall1(int,dup,int,fd)
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-static inline _syscall3(int,open,const char *,file,int,flag,int,mode)
-static inline _syscall1(int,close,int,fd)
-static inline _syscall1(int,_exit,int,exitcode)
-static inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-static inline _syscall1(int,delete_module,const char *,name)
-
-static inline pid_t wait(int * wait_stat)
-{
- return waitpid(-1,wait_stat,0);
-}
-#endif /* __KERNEL_SYSCALLS__ */
-
/*
* "Conditional" syscalls
*
diff --git a/include/asm-sparc/unistd.h b/include/asm-sparc/unistd.h
index 2553762465c..c7a495afc82 100644
--- a/include/asm-sparc/unistd.h
+++ b/include/asm-sparc/unistd.h
@@ -478,53 +478,6 @@ return -1; \
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static __inline__ _syscall0(pid_t,setsid)
-static __inline__ _syscall3(int,write,int,fd,__const__ char *,buf,off_t,count)
-static __inline__ _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static __inline__ _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static __inline__ _syscall1(int,dup,int,fd)
-static __inline__ _syscall3(int,execve,__const__ char *,file,char **,argv,char **,envp)
-static __inline__ _syscall3(int,open,__const__ char *,file,int,flag,int,mode)
-static __inline__ _syscall1(int,close,int,fd)
-static __inline__ _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-
-#include <linux/linkage.h>
-
-asmlinkage unsigned long sys_mmap(
- unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long off);
-asmlinkage unsigned long sys_mmap2(
- unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- void __user *restorer,
- size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
/*
* "Conditional" syscalls
*
diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h
index badc73fdcb9..124cf076717 100644
--- a/include/asm-sparc64/unistd.h
+++ b/include/asm-sparc64/unistd.h
@@ -445,48 +445,6 @@ if (__res>=0) \
errno = -__res; \
return -1; \
}
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static __inline__ _syscall0(pid_t,setsid)
-static __inline__ _syscall3(int,write,int,fd,__const__ char *,buf,off_t,count)
-static __inline__ _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static __inline__ _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static __inline__ _syscall1(int,dup,int,fd)
-static __inline__ _syscall3(int,execve,__const__ char *,file,char **,argv,char **,envp)
-static __inline__ _syscall3(int,open,__const__ char *,file,int,flag,int,mode)
-static __inline__ _syscall1(int,close,int,fd)
-static __inline__ _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-
-#include <linux/linkage.h>
-
-asmlinkage unsigned long sys_mmap(
- unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long off);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- void __user *restorer,
- size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
/* sysconf options, for SunOS compatibility */
#define _SC_ARG_MAX 1
diff --git a/include/asm-um/unistd.h b/include/asm-um/unistd.h
index afccfcaa9ea..732c83f04c3 100644
--- a/include/asm-um/unistd.h
+++ b/include/asm-um/unistd.h
@@ -37,34 +37,6 @@ extern int um_execve(const char *file, char *const argv[], char *const env[]);
#define __ARCH_WANT_SYS_RT_SIGSUSPEND
#endif
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-
-static inline int execve(const char *filename, char *const argv[],
- char *const envp[])
-{
- mm_segment_t fs;
- int ret;
-
- fs = get_fs();
- set_fs(KERNEL_DS);
- ret = um_execve(filename, argv, envp);
- set_fs(fs);
-
- if (ret >= 0)
- return ret;
-
- errno = -(long)ret;
- return -1;
-}
-
-int sys_execve(char *file, char **argv, char **env);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
-#undef __KERNEL_SYSCALLS__
#include "asm/arch/unistd.h"
#endif /* _UM_UNISTD_H_*/
diff --git a/include/asm-v850/unistd.h b/include/asm-v850/unistd.h
index 552b7c873a5..737401e7d3a 100644
--- a/include/asm-v850/unistd.h
+++ b/include/asm-v850/unistd.h
@@ -387,57 +387,6 @@ type name (atype a, btype b, ctype c, dtype d, etype e, ftype f) \
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_SYS_RT_SIGACTION
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-extern inline _syscall0(pid_t,setsid)
-extern inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-extern inline _syscall3(int,read,int,fd,char *,buf,off_t,count)
-extern inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-extern inline _syscall1(int,dup,int,fd)
-extern inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-extern inline _syscall3(int,open,const char *,file,int,flag,int,mode)
-extern inline _syscall1(int,close,int,fd)
-extern inline _syscall1(int,_exit,int,exitcode)
-extern inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-
-extern inline pid_t wait(int * wait_stat)
-{
- return waitpid (-1, wait_stat, 0);
-}
-
-unsigned long sys_mmap(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, off_t offset);
-unsigned long sys_mmap2(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff);
-struct pt_regs;
-int sys_execve (char *name, char **argv, char **envp, struct pt_regs *regs);
-int sys_pipe (int *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
- const struct sigaction __user *act,
- struct sigaction __user *oact,
- size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
/*
* "Conditional" syscalls
*/
diff --git a/include/asm-x86_64/ptrace.h b/include/asm-x86_64/ptrace.h
index ab827dc381d..5ea84dbb1e9 100644
--- a/include/asm-x86_64/ptrace.h
+++ b/include/asm-x86_64/ptrace.h
@@ -39,6 +39,8 @@ struct pt_regs {
#define user_mode(regs) (!!((regs)->cs & 3))
#define user_mode_vm(regs) user_mode(regs)
#define instruction_pointer(regs) ((regs)->rip)
+#define regs_return_value(regs) ((regs)->rax)
+
extern unsigned long profile_pc(struct pt_regs *regs);
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 6137146516d..777288eb7e7 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -620,10 +620,11 @@ __SYSCALL(__NR_vmsplice, sys_vmsplice)
#define __NR_move_pages 279
__SYSCALL(__NR_move_pages, sys_move_pages)
-#ifdef __KERNEL__
-
#define __NR_syscall_max __NR_move_pages
+
+#ifdef __KERNEL__
#include <linux/err.h>
+#endif
#ifndef __NO_STUBS
@@ -663,8 +664,6 @@ do { \
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_COMPAT_SYS_TIME
-#ifndef __KERNEL_SYSCALLS__
-
#define __syscall "syscall"
#define _syscall0(type,name) \
@@ -746,83 +745,7 @@ __asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; movq %7,%%r9 ; " __syscall \
__syscall_return(type,__res); \
}
-#else /* __KERNEL_SYSCALLS__ */
-
-#include <linux/syscalls.h>
-#include <asm/ptrace.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-
-static inline pid_t setsid(void)
-{
- return sys_setsid();
-}
-
-static inline ssize_t write(unsigned int fd, char * buf, size_t count)
-{
- return sys_write(fd, buf, count);
-}
-
-static inline ssize_t read(unsigned int fd, char * buf, size_t count)
-{
- return sys_read(fd, buf, count);
-}
-
-static inline off_t lseek(unsigned int fd, off_t offset, unsigned int origin)
-{
- return sys_lseek(fd, offset, origin);
-}
-
-static inline long dup(unsigned int fd)
-{
- return sys_dup(fd);
-}
-
-/* implemented in asm in arch/x86_64/kernel/entry.S */
-extern int execve(const char *, char * const *, char * const *);
-
-static inline long open(const char * filename, int flags, int mode)
-{
- return sys_open(filename, flags, mode);
-}
-
-static inline long close(unsigned int fd)
-{
- return sys_close(fd);
-}
-
-static inline pid_t waitpid(int pid, int * wait_stat, int flags)
-{
- return sys_wait4(pid, wait_stat, flags, NULL);
-}
-
-extern long sys_mmap(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long off);
-
-extern int sys_modify_ldt(int func, void *ptr, unsigned long bytecount);
-
-asmlinkage long sys_execve(char *name, char **argv, char **envp,
- struct pt_regs regs);
-asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp,
- void *parent_tid, void *child_tid,
- struct pt_regs regs);
-asmlinkage long sys_fork(struct pt_regs regs);
-asmlinkage long sys_vfork(struct pt_regs regs);
-asmlinkage long sys_pipe(int *fildes);
-
+#ifdef __KERNEL__
#ifndef __ASSEMBLY__
#include <linux/linkage.h>
@@ -839,8 +762,8 @@ asmlinkage long sys_rt_sigaction(int sig,
size_t sigsetsize);
#endif /* __ASSEMBLY__ */
-
-#endif /* __KERNEL_SYSCALLS__ */
+#endif /* __KERNEL__ */
+#endif /* __NO_STUBS */
/*
* "Conditional" syscalls
@@ -850,8 +773,4 @@ asmlinkage long sys_rt_sigaction(int sig,
*/
#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
-#endif /* __NO_STUBS */
-
-#endif /* __KERNEL__ */
-
#endif /* _ASM_X86_64_UNISTD_H_ */
diff --git a/include/asm-xtensa/unistd.h b/include/asm-xtensa/unistd.h
index 5e1b99dc4ab..411f810a55c 100644
--- a/include/asm-xtensa/unistd.h
+++ b/include/asm-xtensa/unistd.h
@@ -402,11 +402,6 @@ __asm__ __volatile__ ( \
__syscall_return(type,__res); \
}
-
-#ifdef __KERNEL_SYSCALLS__
-static __inline__ _syscall3(int,execve,const char*,file,char**,argv,char**,envp)
-#endif
-
/*
* "Conditional" syscalls
*
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 9760753e662..6f110957cc9 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -13,6 +13,7 @@
#include <asm/compat.h>
#include <asm/siginfo.h>
+#include <asm/signal.h>
#define compat_jiffies_to_clock_t(x) \
(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index 25423f79bf9..ed6c0fee1ac 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -54,7 +54,7 @@ struct vc_data {
struct tty_struct *vc_tty; /* TTY we are attached to */
/* data for manual vt switching */
struct vt_mode vt_mode;
- int vt_pid;
+ struct pid *vt_pid;
int vt_newvt;
wait_queue_head_t paste_wait;
/* mode flags */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2e29a2edaee..91c0b2a32a9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -684,7 +684,8 @@ extern struct block_device *I_BDEV(struct inode *inode);
struct fown_struct {
rwlock_t lock; /* protects pid, uid, euid fields */
- int pid; /* pid or -pgrp where SIGIO should be sent */
+ struct pid *pid; /* pid or -pgrp where SIGIO should be sent */
+ enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */
uid_t uid, euid; /* uid/euid of process setting the owner */
int signum; /* posix.1b rt signal to be delivered on IO */
};
@@ -880,8 +881,10 @@ extern void kill_fasync(struct fasync_struct **, int, int);
/* only for net: no internal synchronization */
extern void __kill_fasync(struct fasync_struct *, int, int);
+extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
extern int f_setown(struct file *filp, unsigned long arg, int force);
extern void f_delown(struct file *filp);
+extern pid_t f_getown(struct file *filp);
extern int send_sigurg(struct fown_struct *fown);
/*
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 690c42803d2..9869ef3674a 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -31,5 +31,6 @@ struct gen_pool_chunk {
extern struct gen_pool *gen_pool_create(int, int);
extern int gen_pool_add(struct gen_pool *, unsigned long, size_t, int);
+extern void gen_pool_destroy(struct gen_pool *);
extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 60aac2cea0c..33c5daacc74 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -4,7 +4,9 @@
#include <linux/file.h>
#include <linux/rcupdate.h>
#include <linux/irqflags.h>
+#include <linux/utsname.h>
#include <linux/lockdep.h>
+#include <linux/ipc.h>
#define INIT_FDTABLE \
{ \
@@ -68,6 +70,15 @@
.session = 1, \
}
+extern struct nsproxy init_nsproxy;
+#define INIT_NSPROXY(nsproxy) { \
+ .count = ATOMIC_INIT(1), \
+ .nslock = SPIN_LOCK_UNLOCKED, \
+ .uts_ns = &init_uts_ns, \
+ .namespace = NULL, \
+ INIT_IPC_NS(ipc_ns) \
+}
+
#define INIT_SIGHAND(sighand) { \
.count = ATOMIC_INIT(1), \
.action = { { { .sa_handler = NULL, } }, }, \
@@ -117,6 +128,7 @@ extern struct group_info init_groups;
.files = &init_files, \
.signal = &init_signals, \
.sighand = &init_sighand, \
+ .nsproxy = &init_nsproxy, \
.pending = { \
.list = LIST_HEAD_INIT(tsk.pending.list), \
.signal = {{0}}}, \
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index b291189737e..d9e2b3f36c3 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -2,6 +2,7 @@
#define _LINUX_IPC_H
#include <linux/types.h>
+#include <linux/kref.h>
#define IPC_PRIVATE ((__kernel_key_t) 0)
@@ -68,6 +69,59 @@ struct kern_ipc_perm
void *security;
};
+struct ipc_ids;
+struct ipc_namespace {
+ struct kref kref;
+ struct ipc_ids *ids[3];
+
+ int sem_ctls[4];
+ int used_sems;
+
+ int msg_ctlmax;
+ int msg_ctlmnb;
+ int msg_ctlmni;
+
+ size_t shm_ctlmax;
+ size_t shm_ctlall;
+ int shm_ctlmni;
+ int shm_tot;
+};
+
+extern struct ipc_namespace init_ipc_ns;
+
+#ifdef CONFIG_SYSVIPC
+#define INIT_IPC_NS(ns) .ns = &init_ipc_ns,
+#else
+#define INIT_IPC_NS(ns)
+#endif
+
+#ifdef CONFIG_IPC_NS
+extern void free_ipc_ns(struct kref *kref);
+extern int copy_ipcs(unsigned long flags, struct task_struct *tsk);
+extern int unshare_ipcs(unsigned long flags, struct ipc_namespace **ns);
+#else
+static inline int copy_ipcs(unsigned long flags, struct task_struct *tsk)
+{
+ return 0;
+}
+#endif
+
+static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+{
+#ifdef CONFIG_IPC_NS
+ if (ns)
+ kref_get(&ns->kref);
+#endif
+ return ns;
+}
+
+static inline void put_ipc_ns(struct ipc_namespace *ns)
+{
+#ifdef CONFIG_IPC_NS
+ kref_put(&ns->kref, free_ipc_ns);
+#endif
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_IPC_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 8bf6702da2a..ac4c0559f75 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -77,6 +77,12 @@ struct kprobe {
/* location of the probe point */
kprobe_opcode_t *addr;
+ /* Allow user to indicate symbol name of the probe point */
+ char *symbol_name;
+
+ /* Offset into the symbol */
+ unsigned int offset;
+
/* Called before addr is executed. */
kprobe_pre_handler_t pre_handler;
@@ -196,7 +202,7 @@ void unregister_kretprobe(struct kretprobe *rp);
struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp);
void add_rp_inst(struct kretprobe_instance *ri);
void kprobe_flush_task(struct task_struct *tk);
-void recycle_rp_inst(struct kretprobe_instance *ri);
+void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
#else /* CONFIG_KPROBES */
#define __kprobes /**/
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index b054debef2e..81e3a185f95 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -30,7 +30,7 @@ extern struct nlmsvc_binding * nlmsvc_ops;
* Functions exported by the lockd module
*/
extern int nlmclnt_proc(struct inode *, int, struct file_lock *);
-extern int lockd_up(void);
+extern int lockd_up(int proto);
extern void lockd_down(void);
#endif /* LINUX_LOCKD_BIND_H */
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 0d92c468d55..47b7dbd647a 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -80,7 +80,7 @@ struct nlm_wait;
/*
* Memory chunk for NLM client RPC request.
*/
-#define NLMCLNT_OHSIZE (sizeof(system_utsname.nodename)+10)
+#define NLMCLNT_OHSIZE (sizeof(utsname()->nodename)+10)
struct nlm_rqst {
unsigned int a_flags; /* initial RPC task flags */
struct nlm_host * a_host; /* host handle */
diff --git a/include/linux/module.h b/include/linux/module.h
index 2c599175c58..4b2d8091a41 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -320,6 +320,8 @@ struct module
/* Am I GPL-compatible */
int license_gplok;
+ unsigned int taints; /* same bits as kernel:tainted */
+
#ifdef CONFIG_MODULE_UNLOAD
/* Reference counts */
struct module_ref ref[NR_CPUS];
diff --git a/include/linux/namespace.h b/include/linux/namespace.h
index 3abc8e3b487..d137009f0b2 100644
--- a/include/linux/namespace.h
+++ b/include/linux/namespace.h
@@ -4,6 +4,7 @@
#include <linux/mount.h>
#include <linux/sched.h>
+#include <linux/nsproxy.h>
struct namespace {
atomic_t count;
@@ -26,11 +27,8 @@ static inline void put_namespace(struct namespace *namespace)
static inline void exit_namespace(struct task_struct *p)
{
- struct namespace *namespace = p->namespace;
+ struct namespace *namespace = p->nsproxy->namespace;
if (namespace) {
- task_lock(p);
- p->namespace = NULL;
- task_unlock(p);
put_namespace(namespace);
}
}
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 2dcad295fec..e1dbc86c270 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -140,6 +140,11 @@ struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
#endif
+enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
+int nfsd_vers(int vers, enum vers_op change);
+void nfsd_reset_versions(void);
+int nfsd_create_serv(void);
+
/*
* NFSv4 State
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 31a3cb617ce..069257ea99a 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -290,8 +290,9 @@ fill_post_wcc(struct svc_fh *fhp)
* vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
* so, any changes here should be reflected there.
*/
+
static inline void
-fh_lock(struct svc_fh *fhp)
+fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
{
struct dentry *dentry = fhp->fh_dentry;
struct inode *inode;
@@ -310,11 +311,17 @@ fh_lock(struct svc_fh *fhp)
}
inode = dentry->d_inode;
- mutex_lock(&inode->i_mutex);
+ mutex_lock_nested(&inode->i_mutex, subclass);
fill_pre_wcc(fhp);
fhp->fh_locked = 1;
}
+static inline void
+fh_lock(struct svc_fh *fhp)
+{
+ fh_lock_nested(fhp, I_MUTEX_NORMAL);
+}
+
/*
* Unlock a file handle/inode
*/
diff --git a/include/linux/nfsd/syscall.h b/include/linux/nfsd/syscall.h
index dae0faea280..8bcddccb6c4 100644
--- a/include/linux/nfsd/syscall.h
+++ b/include/linux/nfsd/syscall.h
@@ -38,21 +38,6 @@
#define NFSCTL_GETFD 7 /* get an fh by path (used by mountd) */
#define NFSCTL_GETFS 8 /* get an fh by path with max FH len */
-/*
- * Macros used to set version
- */
-#define NFSCTL_VERSET(_cltbits, _v) ((_cltbits) |= (1 << (_v)))
-#define NFSCTL_VERUNSET(_cltbits, _v) ((_cltbits) &= ~(1 << (_v)))
-#define NFSCTL_VERISSET(_cltbits, _v) ((_cltbits) & (1 << (_v)))
-
-#if defined(CONFIG_NFSD_V4)
-#define NFSCTL_VERALL (0x1c /* 0b011100 */)
-#elif defined(CONFIG_NFSD_V3)
-#define NFSCTL_VERALL (0x0c /* 0b001100 */)
-#else
-#define NFSCTL_VERALL (0x04 /* 0b000100 */)
-#endif
-
/* SVC */
struct nfsctl_svc {
unsigned short svc_port;
@@ -134,8 +119,6 @@ extern int exp_delclient(struct nfsctl_client *ncp);
extern int exp_export(struct nfsctl_export *nxp);
extern int exp_unexport(struct nfsctl_export *nxp);
-extern unsigned int nfsd_versbits;
-
#endif /* __KERNEL__ */
#endif /* NFSD_SYSCALL_H */
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 1a9ef3e627d..5dce5c21822 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -352,6 +352,7 @@ extern nodemask_t node_possible_map;
#define node_possible(node) node_isset((node), node_possible_map)
#define first_online_node first_node(node_online_map)
#define next_online_node(nid) next_node((nid), node_online_map)
+int highest_possible_node_id(void);
#else
#define num_online_nodes() 1
#define num_possible_nodes() 1
@@ -359,6 +360,7 @@ extern nodemask_t node_possible_map;
#define node_possible(node) ((node) == 0)
#define first_online_node 0
#define next_online_node(nid) (MAX_NUMNODES)
+#define highest_possible_node_id() 0
#endif
#define any_online_node(mask) \
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
new file mode 100644
index 00000000000..f6baecdeecd
--- /dev/null
+++ b/include/linux/nsproxy.h
@@ -0,0 +1,52 @@
+#ifndef _LINUX_NSPROXY_H
+#define _LINUX_NSPROXY_H
+
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+
+struct namespace;
+struct uts_namespace;
+struct ipc_namespace;
+
+/*
+ * A structure to contain pointers to all per-process
+ * namespaces - fs (mount), uts, network, sysvipc, etc.
+ *
+ * 'count' is the number of tasks holding a reference.
+ * The count for each namespace, then, will be the number
+ * of nsproxies pointing to it, not the number of tasks.
+ *
+ * The nsproxy is shared by tasks which share all namespaces.
+ * As soon as a single namespace is cloned or unshared, the
+ * nsproxy is copied.
+ */
+struct nsproxy {
+ atomic_t count;
+ spinlock_t nslock;
+ struct uts_namespace *uts_ns;
+ struct ipc_namespace *ipc_ns;
+ struct namespace *namespace;
+};
+extern struct nsproxy init_nsproxy;
+
+struct nsproxy *dup_namespaces(struct nsproxy *orig);
+int copy_namespaces(int flags, struct task_struct *tsk);
+void get_task_namespaces(struct task_struct *tsk);
+void free_nsproxy(struct nsproxy *ns);
+
+static inline void put_nsproxy(struct nsproxy *ns)
+{
+ if (atomic_dec_and_test(&ns->count)) {
+ free_nsproxy(ns);
+ }
+}
+
+static inline void exit_task_namespaces(struct task_struct *p)
+{
+ struct nsproxy *ns = p->nsproxy;
+ if (ns) {
+ put_nsproxy(ns);
+ p->nsproxy = NULL;
+ }
+}
+#endif
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 93da7e2d9f3..17b9e04d358 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -68,6 +68,8 @@ extern struct task_struct *FASTCALL(pid_task(struct pid *pid, enum pid_type));
extern struct task_struct *FASTCALL(get_pid_task(struct pid *pid,
enum pid_type));
+extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
+
/*
* attach_pid() and detach_pid() must be called with the tasklist_lock
* write-held.
@@ -89,33 +91,42 @@ extern struct pid *FASTCALL(find_pid(int nr));
* Lookup a PID in the hash table, and return with it's count elevated.
*/
extern struct pid *find_get_pid(int nr);
+extern struct pid *find_ge_pid(int nr);
extern struct pid *alloc_pid(void);
extern void FASTCALL(free_pid(struct pid *pid));
-#define pid_next(task, type) \
- ((task)->pids[(type)].node.next)
+static inline pid_t pid_nr(struct pid *pid)
+{
+ pid_t nr = 0;
+ if (pid)
+ nr = pid->nr;
+ return nr;
+}
+
-#define pid_next_task(task, type) \
- hlist_entry(pid_next(task, type), struct task_struct, \
- pids[(type)].node)
+#define do_each_task_pid(who, type, task) \
+ do { \
+ struct hlist_node *pos___; \
+ struct pid *pid___ = find_pid(who); \
+ if (pid___ != NULL) \
+ hlist_for_each_entry_rcu((task), pos___, \
+ &pid___->tasks[type], pids[type].node) {
+#define while_each_task_pid(who, type, task) \
+ } \
+ } while (0)
-/* We could use hlist_for_each_entry_rcu here but it takes more arguments
- * than the do_each_task_pid/while_each_task_pid. So we roll our own
- * to preserve the existing interface.
- */
-#define do_each_task_pid(who, type, task) \
- if ((task = find_task_by_pid_type(type, who))) { \
- prefetch(pid_next(task, type)); \
- do {
-
-#define while_each_task_pid(who, type, task) \
- } while (pid_next(task, type) && ({ \
- task = pid_next_task(task, type); \
- rcu_dereference(task); \
- prefetch(pid_next(task, type)); \
- 1; }) ); \
- }
+
+#define do_each_pid_task(pid, type, task) \
+ do { \
+ struct hlist_node *pos___; \
+ if (pid != NULL) \
+ hlist_for_each_entry_rcu((task), pos___, \
+ &pid->tasks[type], pids[type].node) {
+
+#define while_each_pid_task(pid, type, task) \
+ } \
+ } while (0)
#endif /* _LINUX_PID_H */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 57f70bc8b24..87dec8fe6de 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -244,13 +244,15 @@ static inline void kclist_add(struct kcore_list *new, void *addr, size_t size)
extern void kclist_add(struct kcore_list *, void *, size_t);
#endif
+union proc_op {
+ int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
+ int (*proc_read)(struct task_struct *task, char *page);
+};
+
struct proc_inode {
struct pid *pid;
int fd;
- union {
- int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
- int (*proc_read)(struct task_struct *task, char *page);
- } op;
+ union proc_op op;
struct proc_dir_entry *pde;
struct inode vfs_inode;
};
diff --git a/include/linux/pspace.h b/include/linux/pspace.h
new file mode 100644
index 00000000000..91d48b8b2d9
--- /dev/null
+++ b/include/linux/pspace.h
@@ -0,0 +1,23 @@
+#ifndef _LINUX_PSPACE_H
+#define _LINUX_PSPACE_H
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/threads.h>
+#include <linux/pid.h>
+
+struct pidmap {
+ atomic_t nr_free;
+ void *page;
+};
+
+#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
+
+struct pspace {
+ struct pidmap pidmap[PIDMAP_ENTRIES];
+ int last_pid;
+};
+
+extern struct pspace init_pspace;
+
+#endif /* _LINUX_PSPACE_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7ef899c47c2..38530232d92 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -24,6 +24,8 @@
#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */
#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
#define CLONE_STOPPED 0x02000000 /* Start in stopped state */
+#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
+#define CLONE_NEWIPC 0x08000000 /* New ipcs */
/*
* Scheduling policies
@@ -118,7 +120,6 @@ extern unsigned long avenrun[]; /* Load averages */
extern unsigned long total_forks;
extern int nr_threads;
-extern int last_pid;
DECLARE_PER_CPU(unsigned long, process_counts);
extern int nr_processes(void);
extern unsigned long nr_running(void);
@@ -239,7 +240,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout);
extern signed long schedule_timeout_uninterruptible(signed long timeout);
asmlinkage void schedule(void);
-struct namespace;
+struct nsproxy;
/* Maximum number of active map areas.. This is a random (large) number */
#define DEFAULT_MAX_MAP_COUNT 65536
@@ -754,6 +755,7 @@ static inline void prefetch_stack(struct task_struct *t) { }
struct audit_context; /* See audit.c */
struct mempolicy;
struct pipe_inode_info;
+struct uts_namespace;
enum sleep_type {
SLEEP_NORMAL,
@@ -897,8 +899,8 @@ struct task_struct {
struct fs_struct *fs;
/* open file information */
struct files_struct *files;
-/* namespace */
- struct namespace *namespace;
+/* namespaces */
+ struct nsproxy *nsproxy;
/* signal handlers */
struct signal_struct *signal;
struct sighand_struct *sighand;
@@ -1020,6 +1022,26 @@ static inline pid_t process_group(struct task_struct *tsk)
return tsk->signal->pgrp;
}
+static inline struct pid *task_pid(struct task_struct *task)
+{
+ return task->pids[PIDTYPE_PID].pid;
+}
+
+static inline struct pid *task_tgid(struct task_struct *task)
+{
+ return task->group_leader->pids[PIDTYPE_PID].pid;
+}
+
+static inline struct pid *task_pgrp(struct task_struct *task)
+{
+ return task->group_leader->pids[PIDTYPE_PGID].pid;
+}
+
+static inline struct pid *task_session(struct task_struct *task)
+{
+ return task->group_leader->pids[PIDTYPE_SID].pid;
+}
+
/**
* pid_alive - check that a task structure is not stale
* @p: Task structure to be checked.
@@ -1043,6 +1065,8 @@ static inline int is_init(struct task_struct *tsk)
return tsk->pid == 1;
}
+extern struct pid *cad_pid;
+
extern void free_task(struct task_struct *tsk);
#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
@@ -1247,10 +1271,15 @@ extern int send_sig_info(int, struct siginfo *, struct task_struct *);
extern int send_group_sig_info(int, struct siginfo *, struct task_struct *);
extern int force_sigsegv(int, struct task_struct *);
extern int force_sig_info(int, struct siginfo *, struct task_struct *);
+extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
+extern int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
+extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
+extern int kill_pid_info_as_uid(int, struct siginfo *, struct pid *, uid_t, uid_t, u32);
+extern int kill_pgrp(struct pid *pid, int sig, int priv);
+extern int kill_pid(struct pid *pid, int sig, int priv);
extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp);
extern int kill_pg_info(int, struct siginfo *, pid_t);
extern int kill_proc_info(int, struct siginfo *, pid_t);
-extern int kill_proc_info_as_uid(int, struct siginfo *, pid_t, uid_t, uid_t, u32);
extern void do_notify_parent(struct task_struct *, int);
extern void force_sig(int, struct task_struct *);
extern void force_sig_specific(int, struct task_struct *);
@@ -1265,6 +1294,11 @@ extern int send_group_sigqueue(int, struct sigqueue *, struct task_struct *);
extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long);
+static inline int kill_cad_pid(int sig, int priv)
+{
+ return kill_pid(cad_pid, sig, priv);
+}
+
/* These can be the second arg to send_sig_info/send_group_sig_info. */
#define SEND_SIG_NOINFO ((struct siginfo *) 0)
#define SEND_SIG_PRIV ((struct siginfo *) 1)
@@ -1358,6 +1392,17 @@ extern void wait_task_inactive(struct task_struct * p);
/* de_thread depends on thread_group_leader not being a pid based check */
#define thread_group_leader(p) (p == p->group_leader)
+/* Do to the insanities of de_thread it is possible for a process
+ * to have the pid of the thread group leader without actually being
+ * the thread group leader. For iteration through the pids in proc
+ * all we care about is that we have a task with the appropriate
+ * pid, we don't actually care if we have the right task.
+ */
+static inline int has_group_leader_pid(struct task_struct *p)
+{
+ return p->pid == p->tgid;
+}
+
static inline struct task_struct *next_thread(const struct task_struct *p)
{
return list_entry(rcu_dereference(p->thread_group.next),
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 73140ee5c63..4ebcdf91f3b 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -18,6 +18,30 @@
#include <linux/mm.h>
/*
+ * This is the RPC server thread function prototype
+ */
+typedef void (*svc_thread_fn)(struct svc_rqst *);
+
+/*
+ *
+ * RPC service thread pool.
+ *
+ * Pool of threads and temporary sockets. Generally there is only
+ * a single one of these per RPC service, but on NUMA machines those
+ * services that can benefit from it (i.e. nfs but not lockd) will
+ * have one pool per NUMA node. This optimisation reduces cross-
+ * node traffic on multi-node NUMA NFS servers.
+ */
+struct svc_pool {
+ unsigned int sp_id; /* pool id; also node id on NUMA */
+ spinlock_t sp_lock; /* protects all fields */
+ struct list_head sp_threads; /* idle server threads */
+ struct list_head sp_sockets; /* pending sockets */
+ unsigned int sp_nrthreads; /* # of threads in pool */
+ struct list_head sp_all_threads; /* all server threads */
+} ____cacheline_aligned_in_smp;
+
+/*
* RPC service.
*
* An RPC service is a ``daemon,'' possibly multithreaded, which
@@ -28,8 +52,6 @@
* We currently do not support more than one RPC program per daemon.
*/
struct svc_serv {
- struct list_head sv_threads; /* idle server threads */
- struct list_head sv_sockets; /* pending sockets */
struct svc_program * sv_program; /* RPC program */
struct svc_stat * sv_stats; /* RPC statistics */
spinlock_t sv_lock;
@@ -40,11 +62,36 @@ struct svc_serv {
struct list_head sv_permsocks; /* all permanent sockets */
struct list_head sv_tempsocks; /* all temporary sockets */
int sv_tmpcnt; /* count of temporary sockets */
+ struct timer_list sv_temptimer; /* timer for aging temporary sockets */
char * sv_name; /* service name */
+
+ unsigned int sv_nrpools; /* number of thread pools */
+ struct svc_pool * sv_pools; /* array of thread pools */
+
+ void (*sv_shutdown)(struct svc_serv *serv);
+ /* Callback to use when last thread
+ * exits.
+ */
+
+ struct module * sv_module; /* optional module to count when
+ * adding threads */
+ svc_thread_fn sv_function; /* main function for threads */
+ int sv_kill_signal; /* signal to kill threads */
};
/*
+ * We use sv_nrthreads as a reference count. svc_destroy() drops
+ * this refcount, so we need to bump it up around operations that
+ * change the number of threads. Horrible, but there it is.
+ * Should be called with the BKL held.
+ */
+static inline void svc_get(struct svc_serv *serv)
+{
+ serv->sv_nrthreads++;
+}
+
+/*
* Maximum payload size supported by a kernel RPC server.
* This is use to determine the max number of pages nfsd is
* willing to return in a single READ operation.
@@ -127,11 +174,13 @@ static inline void svc_putu32(struct kvec *iov, __be32 val)
*/
struct svc_rqst {
struct list_head rq_list; /* idle list */
+ struct list_head rq_all; /* all threads list */
struct svc_sock * rq_sock; /* socket */
struct sockaddr_in rq_addr; /* peer address */
int rq_addrlen;
struct svc_serv * rq_server; /* RPC service definition */
+ struct svc_pool * rq_pool; /* thread pool */
struct svc_procedure * rq_procinfo; /* procedure info */
struct auth_ops * rq_authop; /* authentication flavour */
struct svc_cred rq_cred; /* auth info */
@@ -180,6 +229,7 @@ struct svc_rqst {
* to prevent encrypting page
* cache pages */
wait_queue_head_t rq_wait; /* synchronization */
+ struct task_struct *rq_task; /* service thread */
};
/*
@@ -321,20 +371,21 @@ struct svc_procedure {
};
/*
- * This is the RPC server thread function prototype
- */
-typedef void (*svc_thread_fn)(struct svc_rqst *);
-
-/*
* Function prototypes.
*/
-struct svc_serv * svc_create(struct svc_program *, unsigned int);
+struct svc_serv * svc_create(struct svc_program *, unsigned int,
+ void (*shutdown)(struct svc_serv*));
int svc_create_thread(svc_thread_fn, struct svc_serv *);
void svc_exit_thread(struct svc_rqst *);
+struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
+ void (*shutdown)(struct svc_serv*),
+ svc_thread_fn, int sig, struct module *);
+int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
void svc_destroy(struct svc_serv *);
-int svc_process(struct svc_serv *, struct svc_rqst *);
+int svc_process(struct svc_rqst *);
int svc_register(struct svc_serv *, int, unsigned short);
void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space);
+struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu);
#endif /* SUNRPC_SVC_H */
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index b4acb3d37c3..4c296152cbf 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -20,8 +20,9 @@ struct svc_sock {
struct socket * sk_sock; /* berkeley socket layer */
struct sock * sk_sk; /* INET layer */
+ struct svc_pool * sk_pool; /* current pool iff queued */
struct svc_serv * sk_server; /* service for this socket */
- unsigned int sk_inuse; /* use count */
+ atomic_t sk_inuse; /* use count */
unsigned long sk_flags;
#define SK_BUSY 0 /* enqueued/receiving */
#define SK_CONN 1 /* conn pending */
@@ -31,9 +32,12 @@ struct svc_sock {
#define SK_DEAD 6 /* socket closed */
#define SK_CHNGBUF 7 /* need to change snd/rcv buffer sizes */
#define SK_DEFERRED 8 /* request on sk_deferred */
+#define SK_OLD 9 /* used for temp socket aging mark+sweep */
+#define SK_DETACHED 10 /* detached from tempsocks list */
- int sk_reserved; /* space on outq that is reserved */
+ atomic_t sk_reserved; /* space on outq that is reserved */
+ spinlock_t sk_defer_lock; /* protects sk_deferred */
struct list_head sk_deferred; /* deferred requests that need to
* be revisted */
struct mutex sk_mutex; /* to serialize sending data */
@@ -57,9 +61,14 @@ struct svc_sock {
*/
int svc_makesock(struct svc_serv *, int, unsigned short);
void svc_delete_socket(struct svc_sock *);
-int svc_recv(struct svc_serv *, struct svc_rqst *, long);
+int svc_recv(struct svc_rqst *, long);
int svc_send(struct svc_rqst *);
void svc_drop(struct svc_rqst *);
void svc_sock_update_bufs(struct svc_serv *serv);
+int svc_sock_names(char *buf, struct svc_serv *serv, char *toclose);
+int svc_addsock(struct svc_serv *serv,
+ int fd,
+ char *name_return,
+ int *proto);
#endif /* SUNRPC_SVCSOCK_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2d1c3d5c83a..3efcfc7e9c6 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -599,4 +599,6 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
size_t len);
asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache);
+int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
+
#endif
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 58c961c9e17..5c8473bb688 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -219,7 +219,8 @@ extern struct list_head tty_drivers;
struct tty_driver *alloc_tty_driver(int lines);
void put_tty_driver(struct tty_driver *driver);
-void tty_set_operations(struct tty_driver *driver, struct tty_operations *op);
+void tty_set_operations(struct tty_driver *driver,
+ const struct tty_operations *op);
/* tty driver magic number */
#define TTY_DRIVER_MAGIC 0x5402
diff --git a/include/linux/unistd.h b/include/linux/unistd.h
index c18c60f3254..aa8d5b5e2e3 100644
--- a/include/linux/unistd.h
+++ b/include/linux/unistd.h
@@ -1,12 +1,8 @@
#ifndef _LINUX_UNISTD_H_
#define _LINUX_UNISTD_H_
-#ifdef __KERNEL__
-extern int errno;
-#endif
-
/*
- * Include machine specific syscallX macros
+ * Include machine specific syscall numbers
*/
#include <asm/unistd.h>
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 13e1da0c538..02e4b697206 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -1,6 +1,11 @@
#ifndef _LINUX_UTSNAME_H
#define _LINUX_UTSNAME_H
+#include <linux/sched.h>
+#include <linux/kref.h>
+#include <linux/nsproxy.h>
+#include <asm/atomic.h>
+
#define __OLD_UTS_LEN 8
struct oldold_utsname {
@@ -30,7 +35,55 @@ struct new_utsname {
char domainname[65];
};
-extern struct new_utsname system_utsname;
+struct uts_namespace {
+ struct kref kref;
+ struct new_utsname name;
+};
+extern struct uts_namespace init_uts_ns;
+
+static inline void get_uts_ns(struct uts_namespace *ns)
+{
+ kref_get(&ns->kref);
+}
+
+#ifdef CONFIG_UTS_NS
+extern int unshare_utsname(unsigned long unshare_flags,
+ struct uts_namespace **new_uts);
+extern int copy_utsname(int flags, struct task_struct *tsk);
+extern void free_uts_ns(struct kref *kref);
+
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+ kref_put(&ns->kref, free_uts_ns);
+}
+#else
+static inline int unshare_utsname(unsigned long unshare_flags,
+ struct uts_namespace **new_uts)
+{
+ if (unshare_flags & CLONE_NEWUTS)
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline int copy_utsname(int flags, struct task_struct *tsk)
+{
+ return 0;
+}
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+}
+#endif
+
+static inline struct new_utsname *utsname(void)
+{
+ return &current->nsproxy->uts_ns->name;
+}
+
+static inline struct new_utsname *init_utsname(void)
+{
+ return &init_uts_ns.name;
+}
extern struct rw_semaphore uts_sem;
#endif
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 1009d3fe1fc..37a1a41f5b6 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -84,4 +84,11 @@ void reset_vc(struct vc_data *vc);
extern char con_buf[CON_BUF_SIZE];
extern struct semaphore con_buf_sem;
+struct vt_spawn_console {
+ spinlock_t lock;
+ struct pid *pid;
+ int sig;
+};
+extern struct vt_spawn_console vt_spawn_con;
+
#endif /* _VT_KERN_H */
diff --git a/init/Kconfig b/init/Kconfig
index f7a04d0daf0..10382931eea 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -115,6 +115,15 @@ config SYSVIPC
section 6.4 of the Linux Programmer's Guide, available from
<http://www.tldp.org/guides.html>.
+config IPC_NS
+ bool "IPC Namespaces"
+ depends on SYSVIPC
+ default n
+ help
+ Support ipc namespaces. This allows containers, i.e. virtual
+ environments, to use ipc namespaces to provide different ipc
+ objects for different servers. If unsure, say N.
+
config POSIX_MQUEUE
bool "POSIX Message Queues"
depends on NET && EXPERIMENTAL
@@ -182,6 +191,14 @@ config TASK_DELAY_ACCT
Say N if unsure.
+config UTS_NS
+ bool "UTS Namespaces"
+ default n
+ help
+ Support uts namespaces. This allows containers, i.e.
+ vservers, to use uts namespaces to provide different
+ uts info for different servers. If unsure, say N.
+
config AUDIT
bool "Auditing support"
depends on NET
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index a06f037fa00..919a80cb322 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -1,4 +1,3 @@
-#define __KERNEL_SYSCALLS__
#include <linux/unistd.h>
#include <linux/kernel.h>
#include <linux/fs.h>
@@ -35,7 +34,7 @@ static int __init do_linuxrc(void * shell)
(void) sys_open("/dev/console",O_RDWR,0);
(void) sys_dup(0);
(void) sys_dup(0);
- return execve(shell, argv, envp_init);
+ return kernel_execve(shell, argv, envp_init);
}
static void __init handle_initrd(void)
diff --git a/init/main.c b/init/main.c
index 0766e69712b..ee123243fb5 100644
--- a/init/main.c
+++ b/init/main.c
@@ -9,8 +9,6 @@
* Simplified starting of init: Michael A. Griffith <grif@acm.org>
*/
-#define __KERNEL_SYSCALLS__
-
#include <linux/types.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
@@ -703,7 +701,7 @@ static void do_pre_smp_initcalls(void)
static void run_init_process(char *init_filename)
{
argv_init[0] = init_filename;
- execve(init_filename, argv_init, envp_init);
+ kernel_execve(init_filename, argv_init, envp_init);
}
static int init(void * unused)
@@ -723,6 +721,8 @@ static int init(void * unused)
*/
child_reaper = current;
+ cad_pid = task_pid(current);
+
smp_prepare_cpus(max_cpus);
do_pre_smp_initcalls();
diff --git a/init/version.c b/init/version.c
index e290802c6bd..8f28344d9c7 100644
--- a/init/version.c
+++ b/init/version.c
@@ -12,22 +12,27 @@
#include <linux/utsname.h>
#include <linux/utsrelease.h>
#include <linux/version.h>
+#include <linux/sched.h>
#define version(a) Version_ ## a
#define version_string(a) version(a)
int version_string(LINUX_VERSION_CODE);
-struct new_utsname system_utsname = {
- .sysname = UTS_SYSNAME,
- .nodename = UTS_NODENAME,
- .release = UTS_RELEASE,
- .version = UTS_VERSION,
- .machine = UTS_MACHINE,
- .domainname = UTS_DOMAINNAME,
+struct uts_namespace init_uts_ns = {
+ .kref = {
+ .refcount = ATOMIC_INIT(2),
+ },
+ .name = {
+ .sysname = UTS_SYSNAME,
+ .nodename = UTS_NODENAME,
+ .release = UTS_RELEASE,
+ .version = UTS_VERSION,
+ .machine = UTS_MACHINE,
+ .domainname = UTS_DOMAINNAME,
+ },
};
-
-EXPORT_SYMBOL(system_utsname);
+EXPORT_SYMBOL_GPL(init_uts_ns);
const char linux_banner[] =
"Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index d75d0ba8336..c45ae86cec3 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -73,7 +73,7 @@ struct mqueue_inode_info {
struct mq_attr attr;
struct sigevent notify;
- pid_t notify_owner;
+ struct pid* notify_owner;
struct user_struct *user; /* user who created, for accounting */
struct sock *notify_sock;
struct sk_buff *notify_cookie;
@@ -134,7 +134,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
INIT_LIST_HEAD(&info->e_wait_q[0].list);
INIT_LIST_HEAD(&info->e_wait_q[1].list);
info->messages = NULL;
- info->notify_owner = 0;
+ info->notify_owner = NULL;
info->qsize = 0;
info->user = NULL; /* set when all is ok */
memset(&info->attr, 0, sizeof(info->attr));
@@ -338,7 +338,7 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
(info->notify_owner &&
info->notify.sigev_notify == SIGEV_SIGNAL) ?
info->notify.sigev_signo : 0,
- info->notify_owner);
+ pid_nr(info->notify_owner));
spin_unlock(&info->lock);
buffer[sizeof(buffer)-1] = '\0';
slen = strlen(buffer)+1;
@@ -363,7 +363,7 @@ static int mqueue_flush_file(struct file *filp, fl_owner_t id)
struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
spin_lock(&info->lock);
- if (current->tgid == info->notify_owner)
+ if (task_tgid(current) == info->notify_owner)
remove_notification(info);
spin_unlock(&info->lock);
@@ -518,8 +518,8 @@ static void __do_notify(struct mqueue_inode_info *info)
sig_i.si_pid = current->tgid;
sig_i.si_uid = current->uid;
- kill_proc_info(info->notify.sigev_signo,
- &sig_i, info->notify_owner);
+ kill_pid_info(info->notify.sigev_signo,
+ &sig_i, info->notify_owner);
break;
case SIGEV_THREAD:
set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
@@ -528,7 +528,8 @@ static void __do_notify(struct mqueue_inode_info *info)
break;
}
/* after notification unregisters process */
- info->notify_owner = 0;
+ put_pid(info->notify_owner);
+ info->notify_owner = NULL;
}
wake_up(&info->wait_q);
}
@@ -566,12 +567,13 @@ static long prepare_timeout(const struct timespec __user *u_arg)
static void remove_notification(struct mqueue_inode_info *info)
{
- if (info->notify_owner != 0 &&
+ if (info->notify_owner != NULL &&
info->notify.sigev_notify == SIGEV_THREAD) {
set_cookie(info->notify_cookie, NOTIFY_REMOVED);
netlink_sendskb(info->notify_sock, info->notify_cookie, 0);
}
- info->notify_owner = 0;
+ put_pid(info->notify_owner);
+ info->notify_owner = NULL;
}
static int mq_attr_ok(struct mq_attr *attr)
@@ -1062,11 +1064,11 @@ retry:
ret = 0;
spin_lock(&info->lock);
if (u_notification == NULL) {
- if (info->notify_owner == current->tgid) {
+ if (info->notify_owner == task_tgid(current)) {
remove_notification(info);
inode->i_atime = inode->i_ctime = CURRENT_TIME;
}
- } else if (info->notify_owner != 0) {
+ } else if (info->notify_owner != NULL) {
ret = -EBUSY;
} else {
switch (notification.sigev_notify) {
@@ -1086,7 +1088,8 @@ retry:
info->notify.sigev_notify = SIGEV_SIGNAL;
break;
}
- info->notify_owner = current->tgid;
+
+ info->notify_owner = get_pid(task_tgid(current));
inode->i_atime = inode->i_ctime = CURRENT_TIME;
}
spin_unlock(&info->lock);
diff --git a/ipc/msg.c b/ipc/msg.c
index 2b4fccf8ea5..5b213d95254 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -16,6 +16,10 @@
*
* support for audit of ipc object properties and permission changes
* Dustin Kirkland <dustin.kirkland@us.ibm.com>
+ *
+ * namespaces support
+ * OpenVZ, SWsoft Inc.
+ * Pavel Emelianov <xemul@openvz.org>
*/
#include <linux/capability.h>
@@ -31,16 +35,12 @@
#include <linux/audit.h>
#include <linux/seq_file.h>
#include <linux/mutex.h>
+#include <linux/nsproxy.h>
#include <asm/current.h>
#include <asm/uaccess.h>
#include "util.h"
-/* sysctl: */
-int msg_ctlmax = MSGMAX;
-int msg_ctlmnb = MSGMNB;
-int msg_ctlmni = MSGMNI;
-
/*
* one msg_receiver structure for each sleeping receiver:
*/
@@ -69,30 +69,75 @@ struct msg_sender {
static atomic_t msg_bytes = ATOMIC_INIT(0);
static atomic_t msg_hdrs = ATOMIC_INIT(0);
-static struct ipc_ids msg_ids;
+static struct ipc_ids init_msg_ids;
-#define msg_lock(id) ((struct msg_queue *)ipc_lock(&msg_ids, id))
-#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm)
-#define msg_rmid(id) ((struct msg_queue *)ipc_rmid(&msg_ids, id))
-#define msg_checkid(msq, msgid) ipc_checkid(&msg_ids, &msq->q_perm, msgid)
-#define msg_buildid(id, seq) ipc_buildid(&msg_ids, id, seq)
+#define msg_ids(ns) (*((ns)->ids[IPC_MSG_IDS]))
-static void freeque(struct msg_queue *msq, int id);
-static int newque(key_t key, int msgflg);
+#define msg_lock(ns, id) ((struct msg_queue*)ipc_lock(&msg_ids(ns), id))
+#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm)
+#define msg_rmid(ns, id) ((struct msg_queue*)ipc_rmid(&msg_ids(ns), id))
+#define msg_checkid(ns, msq, msgid) \
+ ipc_checkid(&msg_ids(ns), &msq->q_perm, msgid)
+#define msg_buildid(ns, id, seq) \
+ ipc_buildid(&msg_ids(ns), id, seq)
+
+static void freeque (struct ipc_namespace *ns, struct msg_queue *msq, int id);
+static int newque (struct ipc_namespace *ns, key_t key, int msgflg);
#ifdef CONFIG_PROC_FS
static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
#endif
+static void __ipc_init __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+{
+ ns->ids[IPC_MSG_IDS] = ids;
+ ns->msg_ctlmax = MSGMAX;
+ ns->msg_ctlmnb = MSGMNB;
+ ns->msg_ctlmni = MSGMNI;
+ ipc_init_ids(ids, ns->msg_ctlmni);
+}
+
+#ifdef CONFIG_IPC_NS
+int msg_init_ns(struct ipc_namespace *ns)
+{
+ struct ipc_ids *ids;
+
+ ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
+ if (ids == NULL)
+ return -ENOMEM;
+
+ __msg_init_ns(ns, ids);
+ return 0;
+}
+
+void msg_exit_ns(struct ipc_namespace *ns)
+{
+ int i;
+ struct msg_queue *msq;
+
+ mutex_lock(&msg_ids(ns).mutex);
+ for (i = 0; i <= msg_ids(ns).max_id; i++) {
+ msq = msg_lock(ns, i);
+ if (msq == NULL)
+ continue;
+
+ freeque(ns, msq, i);
+ }
+ mutex_unlock(&msg_ids(ns).mutex);
+
+ kfree(ns->ids[IPC_MSG_IDS]);
+ ns->ids[IPC_MSG_IDS] = NULL;
+}
+#endif
+
void __init msg_init(void)
{
- ipc_init_ids(&msg_ids, msg_ctlmni);
+ __msg_init_ns(&init_ipc_ns, &init_msg_ids);
ipc_init_proc_interface("sysvipc/msg",
" key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
- &msg_ids,
- sysvipc_msg_proc_show);
+ IPC_MSG_IDS, sysvipc_msg_proc_show);
}
-static int newque(key_t key, int msgflg)
+static int newque (struct ipc_namespace *ns, key_t key, int msgflg)
{
struct msg_queue *msq;
int id, retval;
@@ -111,18 +156,18 @@ static int newque(key_t key, int msgflg)
return retval;
}
- id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni);
+ id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
if (id == -1) {
security_msg_queue_free(msq);
ipc_rcu_putref(msq);
return -ENOSPC;
}
- msq->q_id = msg_buildid(id, msq->q_perm.seq);
+ msq->q_id = msg_buildid(ns, id, msq->q_perm.seq);
msq->q_stime = msq->q_rtime = 0;
msq->q_ctime = get_seconds();
msq->q_cbytes = msq->q_qnum = 0;
- msq->q_qbytes = msg_ctlmnb;
+ msq->q_qbytes = ns->msg_ctlmnb;
msq->q_lspid = msq->q_lrpid = 0;
INIT_LIST_HEAD(&msq->q_messages);
INIT_LIST_HEAD(&msq->q_receivers);
@@ -186,13 +231,13 @@ static void expunge_all(struct msg_queue *msq, int res)
* msg_ids.mutex and the spinlock for this message queue is hold
* before freeque() is called. msg_ids.mutex remains locked on exit.
*/
-static void freeque(struct msg_queue *msq, int id)
+static void freeque(struct ipc_namespace *ns, struct msg_queue *msq, int id)
{
struct list_head *tmp;
expunge_all(msq, -EIDRM);
ss_wakeup(&msq->q_senders, 1);
- msq = msg_rmid(id);
+ msq = msg_rmid(ns, id);
msg_unlock(msq);
tmp = msq->q_messages.next;
@@ -212,24 +257,27 @@ asmlinkage long sys_msgget(key_t key, int msgflg)
{
struct msg_queue *msq;
int id, ret = -EPERM;
+ struct ipc_namespace *ns;
+
+ ns = current->nsproxy->ipc_ns;
- mutex_lock(&msg_ids.mutex);
+ mutex_lock(&msg_ids(ns).mutex);
if (key == IPC_PRIVATE)
- ret = newque(key, msgflg);
- else if ((id = ipc_findkey(&msg_ids, key)) == -1) { /* key not used */
+ ret = newque(ns, key, msgflg);
+ else if ((id = ipc_findkey(&msg_ids(ns), key)) == -1) { /* key not used */
if (!(msgflg & IPC_CREAT))
ret = -ENOENT;
else
- ret = newque(key, msgflg);
+ ret = newque(ns, key, msgflg);
} else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
ret = -EEXIST;
} else {
- msq = msg_lock(id);
+ msq = msg_lock(ns, id);
BUG_ON(msq == NULL);
if (ipcperms(&msq->q_perm, msgflg))
ret = -EACCES;
else {
- int qid = msg_buildid(id, msq->q_perm.seq);
+ int qid = msg_buildid(ns, id, msq->q_perm.seq);
ret = security_msg_queue_associate(msq, msgflg);
if (!ret)
@@ -237,7 +285,7 @@ asmlinkage long sys_msgget(key_t key, int msgflg)
}
msg_unlock(msq);
}
- mutex_unlock(&msg_ids.mutex);
+ mutex_unlock(&msg_ids(ns).mutex);
return ret;
}
@@ -341,11 +389,13 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
struct msq_setbuf setbuf;
struct msg_queue *msq;
int err, version;
+ struct ipc_namespace *ns;
if (msqid < 0 || cmd < 0)
return -EINVAL;
version = ipc_parse_version(&cmd);
+ ns = current->nsproxy->ipc_ns;
switch (cmd) {
case IPC_INFO:
@@ -366,14 +416,14 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
return err;
memset(&msginfo, 0, sizeof(msginfo));
- msginfo.msgmni = msg_ctlmni;
- msginfo.msgmax = msg_ctlmax;
- msginfo.msgmnb = msg_ctlmnb;
+ msginfo.msgmni = ns->msg_ctlmni;
+ msginfo.msgmax = ns->msg_ctlmax;
+ msginfo.msgmnb = ns->msg_ctlmnb;
msginfo.msgssz = MSGSSZ;
msginfo.msgseg = MSGSEG;
- mutex_lock(&msg_ids.mutex);
+ mutex_lock(&msg_ids(ns).mutex);
if (cmd == MSG_INFO) {
- msginfo.msgpool = msg_ids.in_use;
+ msginfo.msgpool = msg_ids(ns).in_use;
msginfo.msgmap = atomic_read(&msg_hdrs);
msginfo.msgtql = atomic_read(&msg_bytes);
} else {
@@ -381,8 +431,8 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
msginfo.msgpool = MSGPOOL;
msginfo.msgtql = MSGTQL;
}
- max_id = msg_ids.max_id;
- mutex_unlock(&msg_ids.mutex);
+ max_id = msg_ids(ns).max_id;
+ mutex_unlock(&msg_ids(ns).mutex);
if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
return -EFAULT;
return (max_id < 0) ? 0 : max_id;
@@ -395,20 +445,20 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
if (!buf)
return -EFAULT;
- if (cmd == MSG_STAT && msqid >= msg_ids.entries->size)
+ if (cmd == MSG_STAT && msqid >= msg_ids(ns).entries->size)
return -EINVAL;
memset(&tbuf, 0, sizeof(tbuf));
- msq = msg_lock(msqid);
+ msq = msg_lock(ns, msqid);
if (msq == NULL)
return -EINVAL;
if (cmd == MSG_STAT) {
- success_return = msg_buildid(msqid, msq->q_perm.seq);
+ success_return = msg_buildid(ns, msqid, msq->q_perm.seq);
} else {
err = -EIDRM;
- if (msg_checkid(msq, msqid))
+ if (msg_checkid(ns, msq, msqid))
goto out_unlock;
success_return = 0;
}
@@ -446,14 +496,14 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
return -EINVAL;
}
- mutex_lock(&msg_ids.mutex);
- msq = msg_lock(msqid);
+ mutex_lock(&msg_ids(ns).mutex);
+ msq = msg_lock(ns, msqid);
err = -EINVAL;
if (msq == NULL)
goto out_up;
err = -EIDRM;
- if (msg_checkid(msq, msqid))
+ if (msg_checkid(ns, msq, msqid))
goto out_unlock_up;
ipcp = &msq->q_perm;
@@ -481,7 +531,7 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
case IPC_SET:
{
err = -EPERM;
- if (setbuf.qbytes > msg_ctlmnb && !capable(CAP_SYS_RESOURCE))
+ if (setbuf.qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE))
goto out_unlock_up;
msq->q_qbytes = setbuf.qbytes;
@@ -503,12 +553,12 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
break;
}
case IPC_RMID:
- freeque(msq, msqid);
+ freeque(ns, msq, msqid);
break;
}
err = 0;
out_up:
- mutex_unlock(&msg_ids.mutex);
+ mutex_unlock(&msg_ids(ns).mutex);
return err;
out_unlock_up:
msg_unlock(msq);
@@ -582,8 +632,11 @@ sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg)
struct msg_msg *msg;
long mtype;
int err;
+ struct ipc_namespace *ns;
+
+ ns = current->nsproxy->ipc_ns;
- if (msgsz > msg_ctlmax || (long) msgsz < 0 || msqid < 0)
+ if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
return -EINVAL;
if (get_user(mtype, &msgp->mtype))
return -EFAULT;
@@ -597,13 +650,13 @@ sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg)
msg->m_type = mtype;
msg->m_ts = msgsz;
- msq = msg_lock(msqid);
+ msq = msg_lock(ns, msqid);
err = -EINVAL;
if (msq == NULL)
goto out_free;
err= -EIDRM;
- if (msg_checkid(msq, msqid))
+ if (msg_checkid(ns, msq, msqid))
goto out_unlock_free;
for (;;) {
@@ -694,17 +747,19 @@ asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz,
struct msg_queue *msq;
struct msg_msg *msg;
int mode;
+ struct ipc_namespace *ns;
if (msqid < 0 || (long) msgsz < 0)
return -EINVAL;
mode = convert_mode(&msgtyp, msgflg);
+ ns = current->nsproxy->ipc_ns;
- msq = msg_lock(msqid);
+ msq = msg_lock(ns, msqid);
if (msq == NULL)
return -EINVAL;
msg = ERR_PTR(-EIDRM);
- if (msg_checkid(msq, msqid))
+ if (msg_checkid(ns, msq, msqid))
goto out_unlock;
for (;;) {
diff --git a/ipc/sem.c b/ipc/sem.c
index 6013c751156..0dafcc455f9 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -64,6 +64,10 @@
*
* support for audit of ipc object properties and permission changes
* Dustin Kirkland <dustin.kirkland@us.ibm.com>
+ *
+ * namespaces support
+ * OpenVZ, SWsoft Inc.
+ * Pavel Emelianov <xemul@openvz.org>
*/
#include <linux/slab.h>
@@ -78,22 +82,25 @@
#include <linux/capability.h>
#include <linux/seq_file.h>
#include <linux/mutex.h>
+#include <linux/nsproxy.h>
#include <asm/uaccess.h>
#include "util.h"
+#define sem_ids(ns) (*((ns)->ids[IPC_SEM_IDS]))
+
+#define sem_lock(ns, id) ((struct sem_array*)ipc_lock(&sem_ids(ns), id))
+#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
+#define sem_rmid(ns, id) ((struct sem_array*)ipc_rmid(&sem_ids(ns), id))
+#define sem_checkid(ns, sma, semid) \
+ ipc_checkid(&sem_ids(ns),&sma->sem_perm,semid)
+#define sem_buildid(ns, id, seq) \
+ ipc_buildid(&sem_ids(ns), id, seq)
-#define sem_lock(id) ((struct sem_array*)ipc_lock(&sem_ids,id))
-#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
-#define sem_rmid(id) ((struct sem_array*)ipc_rmid(&sem_ids,id))
-#define sem_checkid(sma, semid) \
- ipc_checkid(&sem_ids,&sma->sem_perm,semid)
-#define sem_buildid(id, seq) \
- ipc_buildid(&sem_ids, id, seq)
-static struct ipc_ids sem_ids;
+static struct ipc_ids init_sem_ids;
-static int newary (key_t, int, int);
-static void freeary (struct sem_array *sma, int id);
+static int newary(struct ipc_namespace *, key_t, int, int);
+static void freeary(struct ipc_namespace *ns, struct sem_array *sma, int id);
#ifdef CONFIG_PROC_FS
static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
#endif
@@ -110,22 +117,61 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
*
*/
-int sem_ctls[4] = {SEMMSL, SEMMNS, SEMOPM, SEMMNI};
-#define sc_semmsl (sem_ctls[0])
-#define sc_semmns (sem_ctls[1])
-#define sc_semopm (sem_ctls[2])
-#define sc_semmni (sem_ctls[3])
+#define sc_semmsl sem_ctls[0]
+#define sc_semmns sem_ctls[1]
+#define sc_semopm sem_ctls[2]
+#define sc_semmni sem_ctls[3]
-static int used_sems;
+static void __ipc_init __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+{
+ ns->ids[IPC_SEM_IDS] = ids;
+ ns->sc_semmsl = SEMMSL;
+ ns->sc_semmns = SEMMNS;
+ ns->sc_semopm = SEMOPM;
+ ns->sc_semmni = SEMMNI;
+ ns->used_sems = 0;
+ ipc_init_ids(ids, ns->sc_semmni);
+}
+
+#ifdef CONFIG_IPC_NS
+int sem_init_ns(struct ipc_namespace *ns)
+{
+ struct ipc_ids *ids;
+
+ ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
+ if (ids == NULL)
+ return -ENOMEM;
+
+ __sem_init_ns(ns, ids);
+ return 0;
+}
+
+void sem_exit_ns(struct ipc_namespace *ns)
+{
+ int i;
+ struct sem_array *sma;
+
+ mutex_lock(&sem_ids(ns).mutex);
+ for (i = 0; i <= sem_ids(ns).max_id; i++) {
+ sma = sem_lock(ns, i);
+ if (sma == NULL)
+ continue;
+
+ freeary(ns, sma, i);
+ }
+ mutex_unlock(&sem_ids(ns).mutex);
+
+ kfree(ns->ids[IPC_SEM_IDS]);
+ ns->ids[IPC_SEM_IDS] = NULL;
+}
+#endif
void __init sem_init (void)
{
- used_sems = 0;
- ipc_init_ids(&sem_ids,sc_semmni);
+ __sem_init_ns(&init_ipc_ns, &init_sem_ids);
ipc_init_proc_interface("sysvipc/sem",
" key semid perms nsems uid gid cuid cgid otime ctime\n",
- &sem_ids,
- sysvipc_sem_proc_show);
+ IPC_SEM_IDS, sysvipc_sem_proc_show);
}
/*
@@ -162,7 +208,7 @@ void __init sem_init (void)
*/
#define IN_WAKEUP 1
-static int newary (key_t key, int nsems, int semflg)
+static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg)
{
int id;
int retval;
@@ -171,7 +217,7 @@ static int newary (key_t key, int nsems, int semflg)
if (!nsems)
return -EINVAL;
- if (used_sems + nsems > sc_semmns)
+ if (ns->used_sems + nsems > ns->sc_semmns)
return -ENOSPC;
size = sizeof (*sma) + nsems * sizeof (struct sem);
@@ -191,15 +237,15 @@ static int newary (key_t key, int nsems, int semflg)
return retval;
}
- id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni);
+ id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
if(id == -1) {
security_sem_free(sma);
ipc_rcu_putref(sma);
return -ENOSPC;
}
- used_sems += nsems;
+ ns->used_sems += nsems;
- sma->sem_id = sem_buildid(id, sma->sem_perm.seq);
+ sma->sem_id = sem_buildid(ns, id, sma->sem_perm.seq);
sma->sem_base = (struct sem *) &sma[1];
/* sma->sem_pending = NULL; */
sma->sem_pending_last = &sma->sem_pending;
@@ -215,29 +261,32 @@ asmlinkage long sys_semget (key_t key, int nsems, int semflg)
{
int id, err = -EINVAL;
struct sem_array *sma;
+ struct ipc_namespace *ns;
- if (nsems < 0 || nsems > sc_semmsl)
+ ns = current->nsproxy->ipc_ns;
+
+ if (nsems < 0 || nsems > ns->sc_semmsl)
return -EINVAL;
- mutex_lock(&sem_ids.mutex);
+ mutex_lock(&sem_ids(ns).mutex);
if (key == IPC_PRIVATE) {
- err = newary(key, nsems, semflg);
- } else if ((id = ipc_findkey(&sem_ids, key)) == -1) { /* key not used */
+ err = newary(ns, key, nsems, semflg);
+ } else if ((id = ipc_findkey(&sem_ids(ns), key)) == -1) { /* key not used */
if (!(semflg & IPC_CREAT))
err = -ENOENT;
else
- err = newary(key, nsems, semflg);
+ err = newary(ns, key, nsems, semflg);
} else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
err = -EEXIST;
} else {
- sma = sem_lock(id);
+ sma = sem_lock(ns, id);
BUG_ON(sma==NULL);
if (nsems > sma->sem_nsems)
err = -EINVAL;
else if (ipcperms(&sma->sem_perm, semflg))
err = -EACCES;
else {
- int semid = sem_buildid(id, sma->sem_perm.seq);
+ int semid = sem_buildid(ns, id, sma->sem_perm.seq);
err = security_sem_associate(sma, semflg);
if (!err)
err = semid;
@@ -245,7 +294,7 @@ asmlinkage long sys_semget (key_t key, int nsems, int semflg)
sem_unlock(sma);
}
- mutex_unlock(&sem_ids.mutex);
+ mutex_unlock(&sem_ids(ns).mutex);
return err;
}
@@ -444,7 +493,7 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
* the spinlock for this semaphore set hold. sem_ids.mutex remains locked
* on exit.
*/
-static void freeary (struct sem_array *sma, int id)
+static void freeary (struct ipc_namespace *ns, struct sem_array *sma, int id)
{
struct sem_undo *un;
struct sem_queue *q;
@@ -472,10 +521,10 @@ static void freeary (struct sem_array *sma, int id)
}
/* Remove the semaphore set from the ID array*/
- sma = sem_rmid(id);
+ sma = sem_rmid(ns, id);
sem_unlock(sma);
- used_sems -= sma->sem_nsems;
+ ns->used_sems -= sma->sem_nsems;
size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem);
security_sem_free(sma);
ipc_rcu_putref(sma);
@@ -503,7 +552,8 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in,
}
}
-static int semctl_nolock(int semid, int semnum, int cmd, int version, union semun arg)
+static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
+ int cmd, int version, union semun arg)
{
int err = -EINVAL;
struct sem_array *sma;
@@ -520,24 +570,24 @@ static int semctl_nolock(int semid, int semnum, int cmd, int version, union semu
return err;
memset(&seminfo,0,sizeof(seminfo));
- seminfo.semmni = sc_semmni;
- seminfo.semmns = sc_semmns;
- seminfo.semmsl = sc_semmsl;
- seminfo.semopm = sc_semopm;
+ seminfo.semmni = ns->sc_semmni;
+ seminfo.semmns = ns->sc_semmns;
+ seminfo.semmsl = ns->sc_semmsl;
+ seminfo.semopm = ns->sc_semopm;
seminfo.semvmx = SEMVMX;
seminfo.semmnu = SEMMNU;
seminfo.semmap = SEMMAP;
seminfo.semume = SEMUME;
- mutex_lock(&sem_ids.mutex);
+ mutex_lock(&sem_ids(ns).mutex);
if (cmd == SEM_INFO) {
- seminfo.semusz = sem_ids.in_use;
- seminfo.semaem = used_sems;
+ seminfo.semusz = sem_ids(ns).in_use;
+ seminfo.semaem = ns->used_sems;
} else {
seminfo.semusz = SEMUSZ;
seminfo.semaem = SEMAEM;
}
- max_id = sem_ids.max_id;
- mutex_unlock(&sem_ids.mutex);
+ max_id = sem_ids(ns).max_id;
+ mutex_unlock(&sem_ids(ns).mutex);
if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo)))
return -EFAULT;
return (max_id < 0) ? 0: max_id;
@@ -547,12 +597,12 @@ static int semctl_nolock(int semid, int semnum, int cmd, int version, union semu
struct semid64_ds tbuf;
int id;
- if(semid >= sem_ids.entries->size)
+ if(semid >= sem_ids(ns).entries->size)
return -EINVAL;
memset(&tbuf,0,sizeof(tbuf));
- sma = sem_lock(semid);
+ sma = sem_lock(ns, semid);
if(sma == NULL)
return -EINVAL;
@@ -564,7 +614,7 @@ static int semctl_nolock(int semid, int semnum, int cmd, int version, union semu
if (err)
goto out_unlock;
- id = sem_buildid(semid, sma->sem_perm.seq);
+ id = sem_buildid(ns, semid, sma->sem_perm.seq);
kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
tbuf.sem_otime = sma->sem_otime;
@@ -584,7 +634,8 @@ out_unlock:
return err;
}
-static int semctl_main(int semid, int semnum, int cmd, int version, union semun arg)
+static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
+ int cmd, int version, union semun arg)
{
struct sem_array *sma;
struct sem* curr;
@@ -593,14 +644,14 @@ static int semctl_main(int semid, int semnum, int cmd, int version, union semun
ushort* sem_io = fast_sem_io;
int nsems;
- sma = sem_lock(semid);
+ sma = sem_lock(ns, semid);
if(sma==NULL)
return -EINVAL;
nsems = sma->sem_nsems;
err=-EIDRM;
- if (sem_checkid(sma,semid))
+ if (sem_checkid(ns,sma,semid))
goto out_unlock;
err = -EACCES;
@@ -802,7 +853,8 @@ static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __
}
}
-static int semctl_down(int semid, int semnum, int cmd, int version, union semun arg)
+static int semctl_down(struct ipc_namespace *ns, int semid, int semnum,
+ int cmd, int version, union semun arg)
{
struct sem_array *sma;
int err;
@@ -813,11 +865,11 @@ static int semctl_down(int semid, int semnum, int cmd, int version, union semun
if(copy_semid_from_user (&setbuf, arg.buf, version))
return -EFAULT;
}
- sma = sem_lock(semid);
+ sma = sem_lock(ns, semid);
if(sma==NULL)
return -EINVAL;
- if (sem_checkid(sma,semid)) {
+ if (sem_checkid(ns,sma,semid)) {
err=-EIDRM;
goto out_unlock;
}
@@ -844,7 +896,7 @@ static int semctl_down(int semid, int semnum, int cmd, int version, union semun
switch(cmd){
case IPC_RMID:
- freeary(sma, semid);
+ freeary(ns, sma, semid);
err = 0;
break;
case IPC_SET:
@@ -872,17 +924,19 @@ asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg)
{
int err = -EINVAL;
int version;
+ struct ipc_namespace *ns;
if (semid < 0)
return -EINVAL;
version = ipc_parse_version(&cmd);
+ ns = current->nsproxy->ipc_ns;
switch(cmd) {
case IPC_INFO:
case SEM_INFO:
case SEM_STAT:
- err = semctl_nolock(semid,semnum,cmd,version,arg);
+ err = semctl_nolock(ns,semid,semnum,cmd,version,arg);
return err;
case GETALL:
case GETVAL:
@@ -892,13 +946,13 @@ asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg)
case IPC_STAT:
case SETVAL:
case SETALL:
- err = semctl_main(semid,semnum,cmd,version,arg);
+ err = semctl_main(ns,semid,semnum,cmd,version,arg);
return err;
case IPC_RMID:
case IPC_SET:
- mutex_lock(&sem_ids.mutex);
- err = semctl_down(semid,semnum,cmd,version,arg);
- mutex_unlock(&sem_ids.mutex);
+ mutex_lock(&sem_ids(ns).mutex);
+ err = semctl_down(ns,semid,semnum,cmd,version,arg);
+ mutex_unlock(&sem_ids(ns).mutex);
return err;
default:
return -EINVAL;
@@ -949,15 +1003,12 @@ static inline void unlock_semundo(void)
static inline int get_undo_list(struct sem_undo_list **undo_listp)
{
struct sem_undo_list *undo_list;
- int size;
undo_list = current->sysvsem.undo_list;
if (!undo_list) {
- size = sizeof(struct sem_undo_list);
- undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL);
+ undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
if (undo_list == NULL)
return -ENOMEM;
- memset(undo_list, 0, size);
spin_lock_init(&undo_list->lock);
atomic_set(&undo_list->refcnt, 1);
current->sysvsem.undo_list = undo_list;
@@ -986,7 +1037,7 @@ static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
return un;
}
-static struct sem_undo *find_undo(int semid)
+static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
{
struct sem_array *sma;
struct sem_undo_list *ulp;
@@ -1005,12 +1056,12 @@ static struct sem_undo *find_undo(int semid)
goto out;
/* no undo structure around - allocate one. */
- sma = sem_lock(semid);
+ sma = sem_lock(ns, semid);
un = ERR_PTR(-EINVAL);
if(sma==NULL)
goto out;
un = ERR_PTR(-EIDRM);
- if (sem_checkid(sma,semid)) {
+ if (sem_checkid(ns,sma,semid)) {
sem_unlock(sma);
goto out;
}
@@ -1070,10 +1121,13 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
int undos = 0, alter = 0, max;
struct sem_queue queue;
unsigned long jiffies_left = 0;
+ struct ipc_namespace *ns;
+
+ ns = current->nsproxy->ipc_ns;
if (nsops < 1 || semid < 0)
return -EINVAL;
- if (nsops > sc_semopm)
+ if (nsops > ns->sc_semopm)
return -E2BIG;
if(nsops > SEMOPM_FAST) {
sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
@@ -1109,7 +1163,7 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
retry_undos:
if (undos) {
- un = find_undo(semid);
+ un = find_undo(ns, semid);
if (IS_ERR(un)) {
error = PTR_ERR(un);
goto out_free;
@@ -1117,12 +1171,12 @@ retry_undos:
} else
un = NULL;
- sma = sem_lock(semid);
+ sma = sem_lock(ns, semid);
error=-EINVAL;
if(sma==NULL)
goto out_free;
error = -EIDRM;
- if (sem_checkid(sma,semid))
+ if (sem_checkid(ns,sma,semid))
goto out_unlock_free;
/*
* semid identifies are not unique - find_undo may have
@@ -1190,7 +1244,7 @@ retry_undos:
goto out_free;
}
- sma = sem_lock(semid);
+ sma = sem_lock(ns, semid);
if(sma==NULL) {
BUG_ON(queue.prev != NULL);
error = -EIDRM;
@@ -1267,6 +1321,7 @@ void exit_sem(struct task_struct *tsk)
{
struct sem_undo_list *undo_list;
struct sem_undo *u, **up;
+ struct ipc_namespace *ns;
undo_list = tsk->sysvsem.undo_list;
if (!undo_list)
@@ -1275,6 +1330,7 @@ void exit_sem(struct task_struct *tsk)
if (!atomic_dec_and_test(&undo_list->refcnt))
return;
+ ns = tsk->nsproxy->ipc_ns;
/* There's no need to hold the semundo list lock, as current
* is the last task exiting for this undo list.
*/
@@ -1288,14 +1344,14 @@ void exit_sem(struct task_struct *tsk)
if(semid == -1)
continue;
- sma = sem_lock(semid);
+ sma = sem_lock(ns, semid);
if (sma == NULL)
continue;
if (u->semid == -1)
goto next_entry;
- BUG_ON(sem_checkid(sma,u->semid));
+ BUG_ON(sem_checkid(ns,sma,u->semid));
/* remove u from the sma->undo list */
for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
diff --git a/ipc/shm.c b/ipc/shm.c
index 940b0c9b13a..bfbd317ec11 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -15,6 +15,10 @@
*
* support for audit of ipc object properties and permission changes
* Dustin Kirkland <dustin.kirkland@us.ibm.com>
+ *
+ * namespaces support
+ * OpenVZ, SWsoft Inc.
+ * Pavel Emelianov <xemul@openvz.org>
*/
#include <linux/slab.h>
@@ -32,6 +36,7 @@
#include <linux/ptrace.h>
#include <linux/seq_file.h>
#include <linux/mutex.h>
+#include <linux/nsproxy.h>
#include <asm/uaccess.h>
@@ -40,59 +45,115 @@
static struct file_operations shm_file_operations;
static struct vm_operations_struct shm_vm_ops;
-static struct ipc_ids shm_ids;
+static struct ipc_ids init_shm_ids;
+
+#define shm_ids(ns) (*((ns)->ids[IPC_SHM_IDS]))
-#define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id))
-#define shm_unlock(shp) ipc_unlock(&(shp)->shm_perm)
-#define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id))
-#define shm_buildid(id, seq) \
- ipc_buildid(&shm_ids, id, seq)
+#define shm_lock(ns, id) \
+ ((struct shmid_kernel*)ipc_lock(&shm_ids(ns),id))
+#define shm_unlock(shp) \
+ ipc_unlock(&(shp)->shm_perm)
+#define shm_get(ns, id) \
+ ((struct shmid_kernel*)ipc_get(&shm_ids(ns),id))
+#define shm_buildid(ns, id, seq) \
+ ipc_buildid(&shm_ids(ns), id, seq)
-static int newseg (key_t key, int shmflg, size_t size);
+static int newseg (struct ipc_namespace *ns, key_t key,
+ int shmflg, size_t size);
static void shm_open (struct vm_area_struct *shmd);
static void shm_close (struct vm_area_struct *shmd);
+static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
#ifdef CONFIG_PROC_FS
static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
#endif
-size_t shm_ctlmax = SHMMAX;
-size_t shm_ctlall = SHMALL;
-int shm_ctlmni = SHMMNI;
+static void __ipc_init __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+{
+ ns->ids[IPC_SHM_IDS] = ids;
+ ns->shm_ctlmax = SHMMAX;
+ ns->shm_ctlall = SHMALL;
+ ns->shm_ctlmni = SHMMNI;
+ ns->shm_tot = 0;
+ ipc_init_ids(ids, 1);
+}
+
+static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp)
+{
+ if (shp->shm_nattch){
+ shp->shm_perm.mode |= SHM_DEST;
+ /* Do not find it any more */
+ shp->shm_perm.key = IPC_PRIVATE;
+ shm_unlock(shp);
+ } else
+ shm_destroy(ns, shp);
+}
+
+#ifdef CONFIG_IPC_NS
+int shm_init_ns(struct ipc_namespace *ns)
+{
+ struct ipc_ids *ids;
+
+ ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
+ if (ids == NULL)
+ return -ENOMEM;
-static int shm_tot; /* total number of shared memory pages */
+ __shm_init_ns(ns, ids);
+ return 0;
+}
+
+void shm_exit_ns(struct ipc_namespace *ns)
+{
+ int i;
+ struct shmid_kernel *shp;
+
+ mutex_lock(&shm_ids(ns).mutex);
+ for (i = 0; i <= shm_ids(ns).max_id; i++) {
+ shp = shm_lock(ns, i);
+ if (shp == NULL)
+ continue;
+
+ do_shm_rmid(ns, shp);
+ }
+ mutex_unlock(&shm_ids(ns).mutex);
+
+ kfree(ns->ids[IPC_SHM_IDS]);
+ ns->ids[IPC_SHM_IDS] = NULL;
+}
+#endif
void __init shm_init (void)
{
- ipc_init_ids(&shm_ids, 1);
+ __shm_init_ns(&init_ipc_ns, &init_shm_ids);
ipc_init_proc_interface("sysvipc/shm",
" key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n",
- &shm_ids,
- sysvipc_shm_proc_show);
+ IPC_SHM_IDS, sysvipc_shm_proc_show);
}
-static inline int shm_checkid(struct shmid_kernel *s, int id)
+static inline int shm_checkid(struct ipc_namespace *ns,
+ struct shmid_kernel *s, int id)
{
- if (ipc_checkid(&shm_ids,&s->shm_perm,id))
+ if (ipc_checkid(&shm_ids(ns), &s->shm_perm, id))
return -EIDRM;
return 0;
}
-static inline struct shmid_kernel *shm_rmid(int id)
+static inline struct shmid_kernel *shm_rmid(struct ipc_namespace *ns, int id)
{
- return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
+ return (struct shmid_kernel *)ipc_rmid(&shm_ids(ns), id);
}
-static inline int shm_addid(struct shmid_kernel *shp)
+static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp)
{
- return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni);
+ return ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
}
-static inline void shm_inc (int id) {
+static inline void shm_inc(struct ipc_namespace *ns, int id)
+{
struct shmid_kernel *shp;
- shp = shm_lock(id);
+ shp = shm_lock(ns, id);
BUG_ON(!shp);
shp->shm_atim = get_seconds();
shp->shm_lprid = current->tgid;
@@ -100,10 +161,13 @@ static inline void shm_inc (int id) {
shm_unlock(shp);
}
+#define shm_file_ns(file) (*((struct ipc_namespace **)&(file)->private_data))
+
/* This is called by fork, once for every shm attach. */
-static void shm_open (struct vm_area_struct *shmd)
+static void shm_open(struct vm_area_struct *shmd)
{
- shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
+ shm_inc(shm_file_ns(shmd->vm_file),
+ shmd->vm_file->f_dentry->d_inode->i_ino);
}
/*
@@ -114,10 +178,10 @@ static void shm_open (struct vm_area_struct *shmd)
* It has to be called with shp and shm_ids.mutex locked,
* but returns with shp unlocked and freed.
*/
-static void shm_destroy (struct shmid_kernel *shp)
+static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
{
- shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
- shm_rmid (shp->id);
+ ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ shm_rmid(ns, shp->id);
shm_unlock(shp);
if (!is_file_hugepages(shp->shm_file))
shmem_lock(shp->shm_file, 0, shp->mlock_user);
@@ -140,20 +204,23 @@ static void shm_close (struct vm_area_struct *shmd)
struct file * file = shmd->vm_file;
int id = file->f_dentry->d_inode->i_ino;
struct shmid_kernel *shp;
+ struct ipc_namespace *ns;
- mutex_lock(&shm_ids.mutex);
+ ns = shm_file_ns(file);
+
+ mutex_lock(&shm_ids(ns).mutex);
/* remove from the list of attaches of the shm segment */
- shp = shm_lock(id);
+ shp = shm_lock(ns, id);
BUG_ON(!shp);
shp->shm_lprid = current->tgid;
shp->shm_dtim = get_seconds();
shp->shm_nattch--;
if(shp->shm_nattch == 0 &&
shp->shm_perm.mode & SHM_DEST)
- shm_destroy (shp);
+ shm_destroy(ns, shp);
else
shm_unlock(shp);
- mutex_unlock(&shm_ids.mutex);
+ mutex_unlock(&shm_ids(ns).mutex);
}
static int shm_mmap(struct file * file, struct vm_area_struct * vma)
@@ -165,14 +232,25 @@ static int shm_mmap(struct file * file, struct vm_area_struct * vma)
vma->vm_ops = &shm_vm_ops;
if (!(vma->vm_flags & VM_WRITE))
vma->vm_flags &= ~VM_MAYWRITE;
- shm_inc(file->f_dentry->d_inode->i_ino);
+ shm_inc(shm_file_ns(file), file->f_dentry->d_inode->i_ino);
}
return ret;
}
+static int shm_release(struct inode *ino, struct file *file)
+{
+ struct ipc_namespace *ns;
+
+ ns = shm_file_ns(file);
+ put_ipc_ns(ns);
+ shm_file_ns(file) = NULL;
+ return 0;
+}
+
static struct file_operations shm_file_operations = {
- .mmap = shm_mmap,
+ .mmap = shm_mmap,
+ .release = shm_release,
#ifndef CONFIG_MMU
.get_unmapped_area = shmem_get_unmapped_area,
#endif
@@ -188,7 +266,7 @@ static struct vm_operations_struct shm_vm_ops = {
#endif
};
-static int newseg (key_t key, int shmflg, size_t size)
+static int newseg (struct ipc_namespace *ns, key_t key, int shmflg, size_t size)
{
int error;
struct shmid_kernel *shp;
@@ -197,10 +275,10 @@ static int newseg (key_t key, int shmflg, size_t size)
char name[13];
int id;
- if (size < SHMMIN || size > shm_ctlmax)
+ if (size < SHMMIN || size > ns->shm_ctlmax)
return -EINVAL;
- if (shm_tot + numpages >= shm_ctlall)
+ if (ns->shm_tot + numpages >= ns->shm_ctlall)
return -ENOSPC;
shp = ipc_rcu_alloc(sizeof(*shp));
@@ -239,7 +317,7 @@ static int newseg (key_t key, int shmflg, size_t size)
goto no_file;
error = -ENOSPC;
- id = shm_addid(shp);
+ id = shm_addid(ns, shp);
if(id == -1)
goto no_id;
@@ -249,15 +327,17 @@ static int newseg (key_t key, int shmflg, size_t size)
shp->shm_ctim = get_seconds();
shp->shm_segsz = size;
shp->shm_nattch = 0;
- shp->id = shm_buildid(id,shp->shm_perm.seq);
+ shp->id = shm_buildid(ns, id, shp->shm_perm.seq);
shp->shm_file = file;
file->f_dentry->d_inode->i_ino = shp->id;
+ shm_file_ns(file) = get_ipc_ns(ns);
+
/* Hugetlb ops would have already been assigned. */
if (!(shmflg & SHM_HUGETLB))
file->f_op = &shm_file_operations;
- shm_tot += numpages;
+ ns->shm_tot += numpages;
shm_unlock(shp);
return shp->id;
@@ -273,33 +353,36 @@ asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
{
struct shmid_kernel *shp;
int err, id = 0;
+ struct ipc_namespace *ns;
+
+ ns = current->nsproxy->ipc_ns;
- mutex_lock(&shm_ids.mutex);
+ mutex_lock(&shm_ids(ns).mutex);
if (key == IPC_PRIVATE) {
- err = newseg(key, shmflg, size);
- } else if ((id = ipc_findkey(&shm_ids, key)) == -1) {
+ err = newseg(ns, key, shmflg, size);
+ } else if ((id = ipc_findkey(&shm_ids(ns), key)) == -1) {
if (!(shmflg & IPC_CREAT))
err = -ENOENT;
else
- err = newseg(key, shmflg, size);
+ err = newseg(ns, key, shmflg, size);
} else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
err = -EEXIST;
} else {
- shp = shm_lock(id);
+ shp = shm_lock(ns, id);
BUG_ON(shp==NULL);
if (shp->shm_segsz < size)
err = -EINVAL;
else if (ipcperms(&shp->shm_perm, shmflg))
err = -EACCES;
else {
- int shmid = shm_buildid(id, shp->shm_perm.seq);
+ int shmid = shm_buildid(ns, id, shp->shm_perm.seq);
err = security_shm_associate(shp, shmflg);
if (!err)
err = shmid;
}
shm_unlock(shp);
}
- mutex_unlock(&shm_ids.mutex);
+ mutex_unlock(&shm_ids(ns).mutex);
return err;
}
@@ -395,18 +478,19 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
}
}
-static void shm_get_stat(unsigned long *rss, unsigned long *swp)
+static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
+ unsigned long *swp)
{
int i;
*rss = 0;
*swp = 0;
- for (i = 0; i <= shm_ids.max_id; i++) {
+ for (i = 0; i <= shm_ids(ns).max_id; i++) {
struct shmid_kernel *shp;
struct inode *inode;
- shp = shm_get(i);
+ shp = shm_get(ns, i);
if(!shp)
continue;
@@ -430,6 +514,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
struct shm_setbuf setbuf;
struct shmid_kernel *shp;
int err, version;
+ struct ipc_namespace *ns;
if (cmd < 0 || shmid < 0) {
err = -EINVAL;
@@ -437,6 +522,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
}
version = ipc_parse_version(&cmd);
+ ns = current->nsproxy->ipc_ns;
switch (cmd) { /* replace with proc interface ? */
case IPC_INFO:
@@ -448,15 +534,15 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
return err;
memset(&shminfo,0,sizeof(shminfo));
- shminfo.shmmni = shminfo.shmseg = shm_ctlmni;
- shminfo.shmmax = shm_ctlmax;
- shminfo.shmall = shm_ctlall;
+ shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
+ shminfo.shmmax = ns->shm_ctlmax;
+ shminfo.shmall = ns->shm_ctlall;
shminfo.shmmin = SHMMIN;
if(copy_shminfo_to_user (buf, &shminfo, version))
return -EFAULT;
/* reading a integer is always atomic */
- err= shm_ids.max_id;
+ err= shm_ids(ns).max_id;
if(err<0)
err = 0;
goto out;
@@ -470,14 +556,14 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
return err;
memset(&shm_info,0,sizeof(shm_info));
- mutex_lock(&shm_ids.mutex);
- shm_info.used_ids = shm_ids.in_use;
- shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
- shm_info.shm_tot = shm_tot;
+ mutex_lock(&shm_ids(ns).mutex);
+ shm_info.used_ids = shm_ids(ns).in_use;
+ shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
+ shm_info.shm_tot = ns->shm_tot;
shm_info.swap_attempts = 0;
shm_info.swap_successes = 0;
- err = shm_ids.max_id;
- mutex_unlock(&shm_ids.mutex);
+ err = shm_ids(ns).max_id;
+ mutex_unlock(&shm_ids(ns).mutex);
if(copy_to_user (buf, &shm_info, sizeof(shm_info))) {
err = -EFAULT;
goto out;
@@ -492,17 +578,17 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
struct shmid64_ds tbuf;
int result;
memset(&tbuf, 0, sizeof(tbuf));
- shp = shm_lock(shmid);
+ shp = shm_lock(ns, shmid);
if(shp==NULL) {
err = -EINVAL;
goto out;
} else if(cmd==SHM_STAT) {
err = -EINVAL;
- if (shmid > shm_ids.max_id)
+ if (shmid > shm_ids(ns).max_id)
goto out_unlock;
- result = shm_buildid(shmid, shp->shm_perm.seq);
+ result = shm_buildid(ns, shmid, shp->shm_perm.seq);
} else {
- err = shm_checkid(shp,shmid);
+ err = shm_checkid(ns, shp,shmid);
if(err)
goto out_unlock;
result = 0;
@@ -534,12 +620,12 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
case SHM_LOCK:
case SHM_UNLOCK:
{
- shp = shm_lock(shmid);
+ shp = shm_lock(ns, shmid);
if(shp==NULL) {
err = -EINVAL;
goto out;
}
- err = shm_checkid(shp,shmid);
+ err = shm_checkid(ns, shp,shmid);
if(err)
goto out_unlock;
@@ -590,12 +676,12 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
* Instead we set a destroyed flag, and then blow
* the name away when the usage hits zero.
*/
- mutex_lock(&shm_ids.mutex);
- shp = shm_lock(shmid);
+ mutex_lock(&shm_ids(ns).mutex);
+ shp = shm_lock(ns, shmid);
err = -EINVAL;
if (shp == NULL)
goto out_up;
- err = shm_checkid(shp, shmid);
+ err = shm_checkid(ns, shp, shmid);
if(err)
goto out_unlock_up;
@@ -614,14 +700,8 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
if (err)
goto out_unlock_up;
- if (shp->shm_nattch){
- shp->shm_perm.mode |= SHM_DEST;
- /* Do not find it any more */
- shp->shm_perm.key = IPC_PRIVATE;
- shm_unlock(shp);
- } else
- shm_destroy (shp);
- mutex_unlock(&shm_ids.mutex);
+ do_shm_rmid(ns, shp);
+ mutex_unlock(&shm_ids(ns).mutex);
goto out;
}
@@ -631,12 +711,12 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
err = -EFAULT;
goto out;
}
- mutex_lock(&shm_ids.mutex);
- shp = shm_lock(shmid);
+ mutex_lock(&shm_ids(ns).mutex);
+ shp = shm_lock(ns, shmid);
err=-EINVAL;
if(shp==NULL)
goto out_up;
- err = shm_checkid(shp,shmid);
+ err = shm_checkid(ns, shp,shmid);
if(err)
goto out_unlock_up;
err = audit_ipc_obj(&(shp->shm_perm));
@@ -673,7 +753,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
out_unlock_up:
shm_unlock(shp);
out_up:
- mutex_unlock(&shm_ids.mutex);
+ mutex_unlock(&shm_ids(ns).mutex);
goto out;
out_unlock:
shm_unlock(shp);
@@ -699,6 +779,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
unsigned long prot;
int acc_mode;
void *user_addr;
+ struct ipc_namespace *ns;
if (shmid < 0) {
err = -EINVAL;
@@ -737,12 +818,13 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
* We cannot rely on the fs check since SYSV IPC does have an
* additional creator id...
*/
- shp = shm_lock(shmid);
+ ns = current->nsproxy->ipc_ns;
+ shp = shm_lock(ns, shmid);
if(shp == NULL) {
err = -EINVAL;
goto out;
}
- err = shm_checkid(shp,shmid);
+ err = shm_checkid(ns, shp,shmid);
if (err) {
shm_unlock(shp);
goto out;
@@ -783,16 +865,16 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
invalid:
up_write(&current->mm->mmap_sem);
- mutex_lock(&shm_ids.mutex);
- shp = shm_lock(shmid);
+ mutex_lock(&shm_ids(ns).mutex);
+ shp = shm_lock(ns, shmid);
BUG_ON(!shp);
shp->shm_nattch--;
if(shp->shm_nattch == 0 &&
shp->shm_perm.mode & SHM_DEST)
- shm_destroy (shp);
+ shm_destroy(ns, shp);
else
shm_unlock(shp);
- mutex_unlock(&shm_ids.mutex);
+ mutex_unlock(&shm_ids(ns).mutex);
*raddr = (unsigned long) user_addr;
err = 0;
diff --git a/ipc/util.c b/ipc/util.c
index 67b6d178db6..42479e4eec5 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -12,6 +12,9 @@
* Mingming Cao <cmm@us.ibm.com>
* Mar 2006 - support for audit of ipc object properties
* Dustin Kirkland <dustin.kirkland@us.ibm.com>
+ * Jun 2006 - namespaces ssupport
+ * OpenVZ, SWsoft Inc.
+ * Pavel Emelianov <xemul@openvz.org>
*/
#include <linux/mm.h>
@@ -29,6 +32,7 @@
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/audit.h>
+#include <linux/nsproxy.h>
#include <asm/unistd.h>
@@ -37,10 +41,111 @@
struct ipc_proc_iface {
const char *path;
const char *header;
- struct ipc_ids *ids;
+ int ids;
int (*show)(struct seq_file *, void *);
};
+struct ipc_namespace init_ipc_ns = {
+ .kref = {
+ .refcount = ATOMIC_INIT(2),
+ },
+};
+
+#ifdef CONFIG_IPC_NS
+static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
+{
+ int err;
+ struct ipc_namespace *ns;
+
+ err = -ENOMEM;
+ ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
+ if (ns == NULL)
+ goto err_mem;
+
+ err = sem_init_ns(ns);
+ if (err)
+ goto err_sem;
+ err = msg_init_ns(ns);
+ if (err)
+ goto err_msg;
+ err = shm_init_ns(ns);
+ if (err)
+ goto err_shm;
+
+ kref_init(&ns->kref);
+ return ns;
+
+err_shm:
+ msg_exit_ns(ns);
+err_msg:
+ sem_exit_ns(ns);
+err_sem:
+ kfree(ns);
+err_mem:
+ return ERR_PTR(err);
+}
+
+int unshare_ipcs(unsigned long unshare_flags, struct ipc_namespace **new_ipc)
+{
+ struct ipc_namespace *new;
+
+ if (unshare_flags & CLONE_NEWIPC) {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ new = clone_ipc_ns(current->nsproxy->ipc_ns);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+
+ *new_ipc = new;
+ }
+
+ return 0;
+}
+
+int copy_ipcs(unsigned long flags, struct task_struct *tsk)
+{
+ struct ipc_namespace *old_ns = tsk->nsproxy->ipc_ns;
+ struct ipc_namespace *new_ns;
+ int err = 0;
+
+ if (!old_ns)
+ return 0;
+
+ get_ipc_ns(old_ns);
+
+ if (!(flags & CLONE_NEWIPC))
+ return 0;
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out;
+ }
+
+ new_ns = clone_ipc_ns(old_ns);
+ if (!new_ns) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ tsk->nsproxy->ipc_ns = new_ns;
+out:
+ put_ipc_ns(old_ns);
+ return err;
+}
+
+void free_ipc_ns(struct kref *kref)
+{
+ struct ipc_namespace *ns;
+
+ ns = container_of(kref, struct ipc_namespace, kref);
+ sem_exit_ns(ns);
+ msg_exit_ns(ns);
+ shm_exit_ns(ns);
+ kfree(ns);
+}
+#endif
+
/**
* ipc_init - initialise IPC subsystem
*
@@ -67,7 +172,7 @@ __initcall(ipc_init);
* array itself.
*/
-void __init ipc_init_ids(struct ipc_ids* ids, int size)
+void __ipc_init ipc_init_ids(struct ipc_ids* ids, int size)
{
int i;
@@ -110,8 +215,7 @@ static struct file_operations sysvipc_proc_fops;
* @show: show routine.
*/
void __init ipc_init_proc_interface(const char *path, const char *header,
- struct ipc_ids *ids,
- int (*show)(struct seq_file *, void *))
+ int ids, int (*show)(struct seq_file *, void *))
{
struct proc_dir_entry *pde;
struct ipc_proc_iface *iface;
@@ -635,6 +739,9 @@ static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
struct ipc_proc_iface *iface = s->private;
struct kern_ipc_perm *ipc = it;
loff_t p;
+ struct ipc_ids *ids;
+
+ ids = current->nsproxy->ipc_ns->ids[iface->ids];
/* If we had an ipc id locked before, unlock it */
if (ipc && ipc != SEQ_START_TOKEN)
@@ -644,8 +751,8 @@ static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
* p = *pos - 1 (because id 0 starts at position 1)
* + 1 (because we increment the position by one)
*/
- for (p = *pos; p <= iface->ids->max_id; p++) {
- if ((ipc = ipc_lock(iface->ids, p)) != NULL) {
+ for (p = *pos; p <= ids->max_id; p++) {
+ if ((ipc = ipc_lock(ids, p)) != NULL) {
*pos = p + 1;
return ipc;
}
@@ -664,12 +771,15 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
struct ipc_proc_iface *iface = s->private;
struct kern_ipc_perm *ipc;
loff_t p;
+ struct ipc_ids *ids;
+
+ ids = current->nsproxy->ipc_ns->ids[iface->ids];
/*
* Take the lock - this will be released by the corresponding
* call to stop().
*/
- mutex_lock(&iface->ids->mutex);
+ mutex_lock(&ids->mutex);
/* pos < 0 is invalid */
if (*pos < 0)
@@ -680,8 +790,8 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
return SEQ_START_TOKEN;
/* Find the (pos-1)th ipc */
- for (p = *pos - 1; p <= iface->ids->max_id; p++) {
- if ((ipc = ipc_lock(iface->ids, p)) != NULL) {
+ for (p = *pos - 1; p <= ids->max_id; p++) {
+ if ((ipc = ipc_lock(ids, p)) != NULL) {
*pos = p + 1;
return ipc;
}
@@ -693,13 +803,15 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it)
{
struct kern_ipc_perm *ipc = it;
struct ipc_proc_iface *iface = s->private;
+ struct ipc_ids *ids;
/* If we had a locked segment, release it */
if (ipc && ipc != SEQ_START_TOKEN)
ipc_unlock(ipc);
+ ids = current->nsproxy->ipc_ns->ids[iface->ids];
/* Release the lock we took in start() */
- mutex_unlock(&iface->ids->mutex);
+ mutex_unlock(&ids->mutex);
}
static int sysvipc_proc_show(struct seq_file *s, void *it)
diff --git a/ipc/util.h b/ipc/util.h
index 0181553d31d..c8fd6b9d77b 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -3,6 +3,8 @@
* Copyright (C) 1999 Christoph Rohland
*
* ipc helper functions (c) 1999 Manfred Spraul <manfred@colorfullife.com>
+ * namespaces support. 2006 OpenVZ, SWsoft Inc.
+ * Pavel Emelianov <xemul@openvz.org>
*/
#ifndef _IPC_UTIL_H
@@ -15,6 +17,14 @@ void sem_init (void);
void msg_init (void);
void shm_init (void);
+int sem_init_ns(struct ipc_namespace *ns);
+int msg_init_ns(struct ipc_namespace *ns);
+int shm_init_ns(struct ipc_namespace *ns);
+
+void sem_exit_ns(struct ipc_namespace *ns);
+void msg_exit_ns(struct ipc_namespace *ns);
+void shm_exit_ns(struct ipc_namespace *ns);
+
struct ipc_id_ary {
int size;
struct kern_ipc_perm *p[0];
@@ -31,15 +41,23 @@ struct ipc_ids {
};
struct seq_file;
-void __init ipc_init_ids(struct ipc_ids* ids, int size);
+#ifdef CONFIG_IPC_NS
+#define __ipc_init
+#else
+#define __ipc_init __init
+#endif
+void __ipc_init ipc_init_ids(struct ipc_ids *ids, int size);
#ifdef CONFIG_PROC_FS
void __init ipc_init_proc_interface(const char *path, const char *header,
- struct ipc_ids *ids,
- int (*show)(struct seq_file *, void *));
+ int ids, int (*show)(struct seq_file *, void *));
#else
#define ipc_init_proc_interface(path, header, ids, show) do {} while (0)
#endif
+#define IPC_SEM_IDS 0
+#define IPC_MSG_IDS 1
+#define IPC_SHM_IDS 2
+
/* must be called with ids->mutex acquired.*/
int ipc_findkey(struct ipc_ids* ids, key_t key);
int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
diff --git a/kernel/Makefile b/kernel/Makefile
index aacaafb28b9..d948ca12acf 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
- hrtimer.o rwsem.o latency.o
+ hrtimer.o rwsem.o latency.o nsproxy.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
@@ -48,6 +48,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_UTS_NS) += utsname.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
diff --git a/kernel/compat.c b/kernel/compat.c
index b4fbd838cd7..75573e5d27b 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -26,8 +26,6 @@
#include <asm/uaccess.h>
-extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
-
int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
{
return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
diff --git a/kernel/exit.c b/kernel/exit.c
index 3b47f26985f..f250a5e3e28 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -21,6 +21,7 @@
#include <linux/tsacct_kern.h>
#include <linux/file.h>
#include <linux/binfmts.h>
+#include <linux/nsproxy.h>
#include <linux/ptrace.h>
#include <linux/profile.h>
#include <linux/mount.h>
@@ -397,9 +398,11 @@ void daemonize(const char *name, ...)
fs = init_task.fs;
current->fs = fs;
atomic_inc(&fs->count);
- exit_namespace(current);
- current->namespace = init_task.namespace;
- get_namespace(current->namespace);
+
+ exit_task_namespaces(current);
+ current->nsproxy = init_task.nsproxy;
+ get_task_namespaces(current);
+
exit_files(current);
current->files = init_task.files;
atomic_inc(&current->files->count);
@@ -917,7 +920,6 @@ fastcall NORET_TYPE void do_exit(long code)
exit_sem(tsk);
__exit_files(tsk);
__exit_fs(tsk);
- exit_namespace(tsk);
exit_thread();
cpuset_exit(tsk);
exit_keys(tsk);
@@ -932,6 +934,7 @@ fastcall NORET_TYPE void do_exit(long code)
tsk->exit_code = code;
proc_exit_connector(tsk);
exit_notify(tsk);
+ exit_task_namespaces(tsk);
#ifdef CONFIG_NUMA
mpol_free(tsk->mempolicy);
tsk->mempolicy = NULL;
diff --git a/kernel/fork.c b/kernel/fork.c
index 89f666491d1..7dc6140baac 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -27,6 +27,7 @@
#include <linux/binfmts.h>
#include <linux/mman.h>
#include <linux/fs.h>
+#include <linux/nsproxy.h>
#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
@@ -1116,11 +1117,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
goto bad_fork_cleanup_signal;
if ((retval = copy_keys(clone_flags, p)))
goto bad_fork_cleanup_mm;
- if ((retval = copy_namespace(clone_flags, p)))
+ if ((retval = copy_namespaces(clone_flags, p)))
goto bad_fork_cleanup_keys;
retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
if (retval)
- goto bad_fork_cleanup_namespace;
+ goto bad_fork_cleanup_namespaces;
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
/*
@@ -1212,7 +1213,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
retval = -ERESTARTNOINTR;
- goto bad_fork_cleanup_namespace;
+ goto bad_fork_cleanup_namespaces;
}
if (clone_flags & CLONE_THREAD) {
@@ -1260,8 +1261,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
proc_fork_connector(p);
return p;
-bad_fork_cleanup_namespace:
- exit_namespace(p);
+bad_fork_cleanup_namespaces:
+ exit_task_namespaces(p);
bad_fork_cleanup_keys:
exit_keys(p);
bad_fork_cleanup_mm:
@@ -1514,10 +1515,9 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
*/
static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
{
- struct namespace *ns = current->namespace;
+ struct namespace *ns = current->nsproxy->namespace;
- if ((unshare_flags & CLONE_NEWNS) &&
- (ns && atomic_read(&ns->count) > 1)) {
+ if ((unshare_flags & CLONE_NEWNS) && ns) {
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1589,6 +1589,16 @@ static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **n
return 0;
}
+#ifndef CONFIG_IPC_NS
+static inline int unshare_ipcs(unsigned long flags, struct ipc_namespace **ns)
+{
+ if (flags & CLONE_NEWIPC)
+ return -EINVAL;
+
+ return 0;
+}
+#endif
+
/*
* unshare allows a process to 'unshare' part of the process
* context which was originally shared using clone. copy_*
@@ -1606,13 +1616,17 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
struct files_struct *fd, *new_fd = NULL;
struct sem_undo_list *new_ulist = NULL;
+ struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL;
+ struct uts_namespace *uts, *new_uts = NULL;
+ struct ipc_namespace *ipc, *new_ipc = NULL;
check_unshare_flags(&unshare_flags);
/* Return -EINVAL for all unsupported flags */
err = -EINVAL;
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
- CLONE_VM|CLONE_FILES|CLONE_SYSVSEM))
+ CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
+ CLONE_NEWUTS|CLONE_NEWIPC))
goto bad_unshare_out;
if ((err = unshare_thread(unshare_flags)))
@@ -1629,11 +1643,30 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
goto bad_unshare_cleanup_vm;
if ((err = unshare_semundo(unshare_flags, &new_ulist)))
goto bad_unshare_cleanup_fd;
+ if ((err = unshare_utsname(unshare_flags, &new_uts)))
+ goto bad_unshare_cleanup_semundo;
+ if ((err = unshare_ipcs(unshare_flags, &new_ipc)))
+ goto bad_unshare_cleanup_uts;
+
+ if (new_ns || new_uts || new_ipc) {
+ old_nsproxy = current->nsproxy;
+ new_nsproxy = dup_namespaces(old_nsproxy);
+ if (!new_nsproxy) {
+ err = -ENOMEM;
+ goto bad_unshare_cleanup_ipc;
+ }
+ }
- if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
+ if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist ||
+ new_uts || new_ipc) {
task_lock(current);
+ if (new_nsproxy) {
+ current->nsproxy = new_nsproxy;
+ new_nsproxy = old_nsproxy;
+ }
+
if (new_fs) {
fs = current->fs;
current->fs = new_fs;
@@ -1641,8 +1674,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
}
if (new_ns) {
- ns = current->namespace;
- current->namespace = new_ns;
+ ns = current->nsproxy->namespace;
+ current->nsproxy->namespace = new_ns;
new_ns = ns;
}
@@ -1667,9 +1700,33 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
new_fd = fd;
}
+ if (new_uts) {
+ uts = current->nsproxy->uts_ns;
+ current->nsproxy->uts_ns = new_uts;
+ new_uts = uts;
+ }
+
+ if (new_ipc) {
+ ipc = current->nsproxy->ipc_ns;
+ current->nsproxy->ipc_ns = new_ipc;
+ new_ipc = ipc;
+ }
+
task_unlock(current);
}
+ if (new_nsproxy)
+ put_nsproxy(new_nsproxy);
+
+bad_unshare_cleanup_ipc:
+ if (new_ipc)
+ put_ipc_ns(new_ipc);
+
+bad_unshare_cleanup_uts:
+ if (new_uts)
+ put_uts_ns(new_uts);
+
+bad_unshare_cleanup_semundo:
bad_unshare_cleanup_fd:
if (new_fd)
put_files_struct(new_fd);
diff --git a/kernel/futex.c b/kernel/futex.c
index 4b6770e9806..4aaf91951a4 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1527,7 +1527,7 @@ static int futex_fd(u32 __user *uaddr, int signal)
filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
if (signal) {
- err = f_setown(filp, current->pid, 1);
+ err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1);
if (err < 0) {
goto error;
}
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index ab16a5a4cfe..342bca62c49 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -154,7 +154,6 @@ unsigned long kallsyms_lookup_name(const char *name)
}
return module_kallsyms_lookup_name(name);
}
-EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
/*
* Lookup an address
diff --git a/kernel/kmod.c b/kernel/kmod.c
index f8121b95183..bb4e29d924e 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -18,8 +18,6 @@
call_usermodehelper wait flag, and remove exec_usermodehelper.
Rusty Russell <rusty@rustcorp.com.au> Jan 2003
*/
-#define __KERNEL_SYSCALLS__
-
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/syscalls.h>
@@ -169,7 +167,8 @@ static int ____call_usermodehelper(void *data)
retval = -EPERM;
if (current->fs->root)
- retval = execve(sub_info->path, sub_info->argv, sub_info->envp);
+ retval = kernel_execve(sub_info->path,
+ sub_info->argv, sub_info->envp);
/* Exec failed? */
sub_info->retval = retval;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3f57dfdc8f9..610c837ad9e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -37,6 +37,7 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/moduleloader.h>
+#include <linux/kallsyms.h>
#include <asm-generic/sections.h>
#include <asm/cacheflush.h>
#include <asm/errno.h>
@@ -45,6 +46,16 @@
#define KPROBE_HASH_BITS 6
#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
+
+/*
+ * Some oddball architectures like 64bit powerpc have function descriptors
+ * so this must be overridable.
+ */
+#ifndef kprobe_lookup_name
+#define kprobe_lookup_name(name, addr) \
+ addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
+#endif
+
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
static atomic_t kprobe_count;
@@ -308,7 +319,8 @@ void __kprobes add_rp_inst(struct kretprobe_instance *ri)
}
/* Called with kretprobe_lock held */
-void __kprobes recycle_rp_inst(struct kretprobe_instance *ri)
+void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
+ struct hlist_head *head)
{
/* remove rp inst off the rprobe_inst_table */
hlist_del(&ri->hlist);
@@ -320,7 +332,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri)
hlist_add_head(&ri->uflist, &ri->rp->free_instances);
} else
/* Unregistering */
- kfree(ri);
+ hlist_add_head(&ri->hlist, head);
}
struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
@@ -336,18 +348,24 @@ struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
*/
void __kprobes kprobe_flush_task(struct task_struct *tk)
{
- struct kretprobe_instance *ri;
- struct hlist_head *head;
+ struct kretprobe_instance *ri;
+ struct hlist_head *head, empty_rp;
struct hlist_node *node, *tmp;
unsigned long flags = 0;
+ INIT_HLIST_HEAD(&empty_rp);
spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(tk);
- hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
- if (ri->task == tk)
- recycle_rp_inst(ri);
- }
+ head = kretprobe_inst_table_head(tk);
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task == tk)
+ recycle_rp_inst(ri, &empty_rp);
+ }
spin_unlock_irqrestore(&kretprobe_lock, flags);
+
+ hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
}
static inline void free_rp_inst(struct kretprobe *rp)
@@ -447,6 +465,21 @@ static int __kprobes __register_kprobe(struct kprobe *p,
struct kprobe *old_p;
struct module *probed_mod;
+ /*
+ * If we have a symbol_name argument look it up,
+ * and add it to the address. That way the addr
+ * field can either be global or relative to a symbol.
+ */
+ if (p->symbol_name) {
+ if (p->addr)
+ return -EINVAL;
+ kprobe_lookup_name(p->symbol_name, p->addr);
+ }
+
+ if (!p->addr)
+ return -EINVAL;
+ p->addr = (kprobe_opcode_t *)(((char *)p->addr)+ p->offset);
+
if ((!kernel_text_address((unsigned long) p->addr)) ||
in_kprobes_functions((unsigned long) p->addr))
return -EINVAL;
@@ -488,7 +521,7 @@ static int __kprobes __register_kprobe(struct kprobe *p,
(ARCH_INACTIVE_KPROBE_COUNT + 1))
register_page_fault_notifier(&kprobe_page_fault_nb);
- arch_arm_kprobe(p);
+ arch_arm_kprobe(p);
out:
mutex_unlock(&kprobe_mutex);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index e596525669e..4c055346100 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -518,9 +518,9 @@ print_circular_bug_entry(struct lock_list *target, unsigned int depth)
static void print_kernel_version(void)
{
- printk("%s %.*s\n", system_utsname.release,
- (int)strcspn(system_utsname.version, " "),
- system_utsname.version);
+ printk("%s %.*s\n", init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
}
/*
diff --git a/kernel/module.c b/kernel/module.c
index 05625d5dc75..7c77a0a9275 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -851,6 +851,7 @@ static int check_version(Elf_Shdr *sechdrs,
printk("%s: no version for \"%s\" found: kernel tainted.\n",
mod->name, symname);
add_taint(TAINT_FORCED_MODULE);
+ mod->taints |= TAINT_FORCED_MODULE;
}
return 1;
}
@@ -1339,6 +1340,7 @@ static void set_license(struct module *mod, const char *license)
printk(KERN_WARNING "%s: module license '%s' taints kernel.\n",
mod->name, license);
add_taint(TAINT_PROPRIETARY_MODULE);
+ mod->taints |= TAINT_PROPRIETARY_MODULE;
}
}
@@ -1618,6 +1620,7 @@ static struct module *load_module(void __user *umod,
/* This is allowed: modprobe --force will invalidate it. */
if (!modmagic) {
add_taint(TAINT_FORCED_MODULE);
+ mod->taints |= TAINT_FORCED_MODULE;
printk(KERN_WARNING "%s: no version magic, tainting kernel.\n",
mod->name);
} else if (!same_magic(modmagic, vermagic)) {
@@ -1711,10 +1714,14 @@ static struct module *load_module(void __user *umod,
/* Set up license info based on the info section */
set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
- if (strcmp(mod->name, "ndiswrapper") == 0)
+ if (strcmp(mod->name, "ndiswrapper") == 0) {
add_taint(TAINT_PROPRIETARY_MODULE);
- if (strcmp(mod->name, "driverloader") == 0)
+ mod->taints |= TAINT_PROPRIETARY_MODULE;
+ }
+ if (strcmp(mod->name, "driverloader") == 0) {
add_taint(TAINT_PROPRIETARY_MODULE);
+ mod->taints |= TAINT_PROPRIETARY_MODULE;
+ }
/* Set up MODINFO_ATTR fields */
setup_modinfo(mod, sechdrs, infoindex);
@@ -1760,6 +1767,7 @@ static struct module *load_module(void __user *umod,
printk(KERN_WARNING "%s: No versions for exported symbols."
" Tainting kernel.\n", mod->name);
add_taint(TAINT_FORCED_MODULE);
+ mod->taints |= TAINT_FORCED_MODULE;
}
#endif
@@ -2226,14 +2234,37 @@ struct module *module_text_address(unsigned long addr)
return mod;
}
+static char *taint_flags(unsigned int taints, char *buf)
+{
+ *buf = '\0';
+ if (taints) {
+ int bx;
+
+ buf[0] = '(';
+ bx = 1;
+ if (taints & TAINT_PROPRIETARY_MODULE)
+ buf[bx++] = 'P';
+ if (taints & TAINT_FORCED_MODULE)
+ buf[bx++] = 'F';
+ /*
+ * TAINT_FORCED_RMMOD: could be added.
+ * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
+ * apply to modules.
+ */
+ buf[bx] = ')';
+ }
+ return buf;
+}
+
/* Don't grab lock, we're oopsing. */
void print_modules(void)
{
struct module *mod;
+ char buf[8];
printk("Modules linked in:");
list_for_each_entry(mod, &modules, list)
- printk(" %s", mod->name);
+ printk(" %s%s", mod->name, taint_flags(mod->taints, buf));
printk("\n");
}
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
new file mode 100644
index 00000000000..6ebdb82a0ce
--- /dev/null
+++ b/kernel/nsproxy.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2006 IBM Corporation
+ *
+ * Author: Serge Hallyn <serue@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * Jun 2006 - namespaces support
+ * OpenVZ, SWsoft Inc.
+ * Pavel Emelianov <xemul@openvz.org>
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/nsproxy.h>
+#include <linux/init_task.h>
+#include <linux/namespace.h>
+#include <linux/utsname.h>
+
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
+
+static inline void get_nsproxy(struct nsproxy *ns)
+{
+ atomic_inc(&ns->count);
+}
+
+void get_task_namespaces(struct task_struct *tsk)
+{
+ struct nsproxy *ns = tsk->nsproxy;
+ if (ns) {
+ get_nsproxy(ns);
+ }
+}
+
+/*
+ * creates a copy of "orig" with refcount 1.
+ * This does not grab references to the contained namespaces,
+ * so that needs to be done by dup_namespaces.
+ */
+static inline struct nsproxy *clone_namespaces(struct nsproxy *orig)
+{
+ struct nsproxy *ns;
+
+ ns = kmalloc(sizeof(struct nsproxy), GFP_KERNEL);
+ if (ns) {
+ memcpy(ns, orig, sizeof(struct nsproxy));
+ atomic_set(&ns->count, 1);
+ }
+ return ns;
+}
+
+/*
+ * copies the nsproxy, setting refcount to 1, and grabbing a
+ * reference to all contained namespaces. Called from
+ * sys_unshare()
+ */
+struct nsproxy *dup_namespaces(struct nsproxy *orig)
+{
+ struct nsproxy *ns = clone_namespaces(orig);
+
+ if (ns) {
+ if (ns->namespace)
+ get_namespace(ns->namespace);
+ if (ns->uts_ns)
+ get_uts_ns(ns->uts_ns);
+ if (ns->ipc_ns)
+ get_ipc_ns(ns->ipc_ns);
+ }
+
+ return ns;
+}
+
+/*
+ * called from clone. This now handles copy for nsproxy and all
+ * namespaces therein.
+ */
+int copy_namespaces(int flags, struct task_struct *tsk)
+{
+ struct nsproxy *old_ns = tsk->nsproxy;
+ struct nsproxy *new_ns;
+ int err = 0;
+
+ if (!old_ns)
+ return 0;
+
+ get_nsproxy(old_ns);
+
+ if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
+ return 0;
+
+ new_ns = clone_namespaces(old_ns);
+ if (!new_ns) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ tsk->nsproxy = new_ns;
+
+ err = copy_namespace(flags, tsk);
+ if (err)
+ goto out_ns;
+
+ err = copy_utsname(flags, tsk);
+ if (err)
+ goto out_uts;
+
+ err = copy_ipcs(flags, tsk);
+ if (err)
+ goto out_ipc;
+
+out:
+ put_nsproxy(old_ns);
+ return err;
+
+out_ipc:
+ if (new_ns->uts_ns)
+ put_uts_ns(new_ns->uts_ns);
+out_uts:
+ if (new_ns->namespace)
+ put_namespace(new_ns->namespace);
+out_ns:
+ tsk->nsproxy = old_ns;
+ kfree(new_ns);
+ goto out;
+}
+
+void free_nsproxy(struct nsproxy *ns)
+{
+ if (ns->namespace)
+ put_namespace(ns->namespace);
+ if (ns->uts_ns)
+ put_uts_ns(ns->uts_ns);
+ if (ns->ipc_ns)
+ put_ipc_ns(ns->ipc_ns);
+ kfree(ns);
+}
diff --git a/kernel/pid.c b/kernel/pid.c
index 8387e8c6819..b914392085f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -26,6 +26,7 @@
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/hash.h>
+#include <linux/pspace.h>
#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
static struct hlist_head *pid_hash;
@@ -33,17 +34,20 @@ static int pidhash_shift;
static kmem_cache_t *pid_cachep;
int pid_max = PID_MAX_DEFAULT;
-int last_pid;
#define RESERVED_PIDS 300
int pid_max_min = RESERVED_PIDS + 1;
int pid_max_max = PID_MAX_LIMIT;
-#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
#define BITS_PER_PAGE (PAGE_SIZE*8)
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
-#define mk_pid(map, off) (((map) - pidmap_array)*BITS_PER_PAGE + (off))
+
+static inline int mk_pid(struct pspace *pspace, struct pidmap *map, int off)
+{
+ return (map - pspace->pidmap)*BITS_PER_PAGE + off;
+}
+
#define find_next_offset(map, off) \
find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
@@ -53,13 +57,12 @@ int pid_max_max = PID_MAX_LIMIT;
* value does not cause lots of bitmaps to be allocated, but
* the scheme scales to up to 4 million PIDs, runtime.
*/
-typedef struct pidmap {
- atomic_t nr_free;
- void *page;
-} pidmap_t;
-
-static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
- { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
+struct pspace init_pspace = {
+ .pidmap = {
+ [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
+ },
+ .last_pid = 0
+};
/*
* Note: disable interrupts while the pidmap_lock is held as an
@@ -74,40 +77,41 @@ static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
* irq handlers that take it we can leave the interrupts enabled.
* For now it is easier to be safe than to prove it can't happen.
*/
+
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
-static fastcall void free_pidmap(int pid)
+static fastcall void free_pidmap(struct pspace *pspace, int pid)
{
- pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
+ struct pidmap *map = pspace->pidmap + pid / BITS_PER_PAGE;
int offset = pid & BITS_PER_PAGE_MASK;
clear_bit(offset, map->page);
atomic_inc(&map->nr_free);
}
-static int alloc_pidmap(void)
+static int alloc_pidmap(struct pspace *pspace)
{
- int i, offset, max_scan, pid, last = last_pid;
- pidmap_t *map;
+ int i, offset, max_scan, pid, last = pspace->last_pid;
+ struct pidmap *map;
pid = last + 1;
if (pid >= pid_max)
pid = RESERVED_PIDS;
offset = pid & BITS_PER_PAGE_MASK;
- map = &pidmap_array[pid/BITS_PER_PAGE];
+ map = &pspace->pidmap[pid/BITS_PER_PAGE];
max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
for (i = 0; i <= max_scan; ++i) {
if (unlikely(!map->page)) {
- unsigned long page = get_zeroed_page(GFP_KERNEL);
+ void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
/*
* Free the page if someone raced with us
* installing it:
*/
spin_lock_irq(&pidmap_lock);
if (map->page)
- free_page(page);
+ kfree(page);
else
- map->page = (void *)page;
+ map->page = page;
spin_unlock_irq(&pidmap_lock);
if (unlikely(!map->page))
break;
@@ -116,11 +120,11 @@ static int alloc_pidmap(void)
do {
if (!test_and_set_bit(offset, map->page)) {
atomic_dec(&map->nr_free);
- last_pid = pid;
+ pspace->last_pid = pid;
return pid;
}
offset = find_next_offset(map, offset);
- pid = mk_pid(map, offset);
+ pid = mk_pid(pspace, map, offset);
/*
* find_next_offset() found a bit, the pid from it
* is in-bounds, and if we fell back to the last
@@ -131,16 +135,34 @@ static int alloc_pidmap(void)
(i != max_scan || pid < last ||
!((last+1) & BITS_PER_PAGE_MASK)));
}
- if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) {
+ if (map < &pspace->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
++map;
offset = 0;
} else {
- map = &pidmap_array[0];
+ map = &pspace->pidmap[0];
offset = RESERVED_PIDS;
if (unlikely(last == offset))
break;
}
- pid = mk_pid(map, offset);
+ pid = mk_pid(pspace, map, offset);
+ }
+ return -1;
+}
+
+static int next_pidmap(struct pspace *pspace, int last)
+{
+ int offset;
+ struct pidmap *map, *end;
+
+ offset = (last + 1) & BITS_PER_PAGE_MASK;
+ map = &pspace->pidmap[(last + 1)/BITS_PER_PAGE];
+ end = &pspace->pidmap[PIDMAP_ENTRIES];
+ for (; map < end; map++, offset = 0) {
+ if (unlikely(!map->page))
+ continue;
+ offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
+ if (offset < BITS_PER_PAGE)
+ return mk_pid(pspace, map, offset);
}
return -1;
}
@@ -153,6 +175,7 @@ fastcall void put_pid(struct pid *pid)
atomic_dec_and_test(&pid->count))
kmem_cache_free(pid_cachep, pid);
}
+EXPORT_SYMBOL_GPL(put_pid);
static void delayed_put_pid(struct rcu_head *rhp)
{
@@ -169,7 +192,7 @@ fastcall void free_pid(struct pid *pid)
hlist_del_rcu(&pid->pid_chain);
spin_unlock_irqrestore(&pidmap_lock, flags);
- free_pidmap(pid->nr);
+ free_pidmap(&init_pspace, pid->nr);
call_rcu(&pid->rcu, delayed_put_pid);
}
@@ -183,7 +206,7 @@ struct pid *alloc_pid(void)
if (!pid)
goto out;
- nr = alloc_pidmap();
+ nr = alloc_pidmap(&init_pspace);
if (nr < 0)
goto out_free;
@@ -217,6 +240,7 @@ struct pid * fastcall find_pid(int nr)
}
return NULL;
}
+EXPORT_SYMBOL_GPL(find_pid);
int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)
{
@@ -280,6 +304,15 @@ struct task_struct *find_task_by_pid_type(int type, int nr)
EXPORT_SYMBOL(find_task_by_pid_type);
+struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
+{
+ struct pid *pid;
+ rcu_read_lock();
+ pid = get_pid(task->pids[type].pid);
+ rcu_read_unlock();
+ return pid;
+}
+
struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type)
{
struct task_struct *result;
@@ -303,6 +336,26 @@ struct pid *find_get_pid(pid_t nr)
}
/*
+ * Used by proc to find the first pid that is greater then or equal to nr.
+ *
+ * If there is a pid at nr this function is exactly the same as find_pid.
+ */
+struct pid *find_ge_pid(int nr)
+{
+ struct pid *pid;
+
+ do {
+ pid = find_pid(nr);
+ if (pid)
+ break;
+ nr = next_pidmap(&init_pspace, nr);
+ } while (nr > 0);
+
+ return pid;
+}
+EXPORT_SYMBOL_GPL(find_get_pid);
+
+/*
* The pid hash table is scaled according to the amount of memory in the
* machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
* more.
@@ -329,10 +382,10 @@ void __init pidhash_init(void)
void __init pidmap_init(void)
{
- pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
+ init_pspace.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
/* Reserve PID 0. We never call free_pidmap(0) */
- set_bit(0, pidmap_array->page);
- atomic_dec(&pidmap_array->nr_free);
+ set_bit(0, init_pspace.pidmap[0].page);
+ atomic_dec(&init_pspace.pidmap[0].nr_free);
pid_cachep = kmem_cache_create("pid", sizeof(struct pid),
__alignof__(struct pid),
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 1b84313cbab..99f9b7d177d 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -906,7 +906,7 @@ static void init_header(struct swsusp_info *info)
memset(info, 0, sizeof(struct swsusp_info));
info->version_code = LINUX_VERSION_CODE;
info->num_physpages = num_physpages;
- memcpy(&info->uts, &system_utsname, sizeof(system_utsname));
+ memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
info->cpus = num_online_cpus();
info->image_pages = nr_copy_pages;
info->pages = nr_copy_pages + nr_meta_pages + 1;
@@ -1050,13 +1050,13 @@ static inline int check_header(struct swsusp_info *info)
reason = "kernel version";
if (info->num_physpages != num_physpages)
reason = "memory size";
- if (strcmp(info->uts.sysname,system_utsname.sysname))
+ if (strcmp(info->uts.sysname,init_utsname()->sysname))
reason = "system type";
- if (strcmp(info->uts.release,system_utsname.release))
+ if (strcmp(info->uts.release,init_utsname()->release))
reason = "kernel release";
- if (strcmp(info->uts.version,system_utsname.version))
+ if (strcmp(info->uts.version,init_utsname()->version))
reason = "version";
- if (strcmp(info->uts.machine,system_utsname.machine))
+ if (strcmp(info->uts.machine,init_utsname()->machine))
reason = "machine";
if (reason) {
printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
diff --git a/kernel/sched.c b/kernel/sched.c
index 2bbd948f016..e4e54e86f4a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4384,7 +4384,10 @@ EXPORT_SYMBOL(cpu_present_map);
#ifndef CONFIG_SMP
cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_online_map);
+
cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_possible_map);
#endif
long sched_getaffinity(pid_t pid, cpumask_t *mask)
diff --git a/kernel/signal.c b/kernel/signal.c
index fb5da6d19f1..7ed8d5304be 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1055,28 +1055,44 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
}
/*
- * kill_pg_info() sends a signal to a process group: this is what the tty
+ * kill_pgrp_info() sends a signal to a process group: this is what the tty
* control characters do (^C, ^Z etc)
*/
-int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
+int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
{
struct task_struct *p = NULL;
int retval, success;
- if (pgrp <= 0)
- return -EINVAL;
-
success = 0;
retval = -ESRCH;
- do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+ do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
int err = group_send_sig_info(sig, info, p);
success |= !err;
retval = err;
- } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+ } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
return success ? 0 : retval;
}
+int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
+{
+ int retval;
+
+ read_lock(&tasklist_lock);
+ retval = __kill_pgrp_info(sig, info, pgrp);
+ read_unlock(&tasklist_lock);
+
+ return retval;
+}
+
+int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
+{
+ if (pgrp <= 0)
+ return -EINVAL;
+
+ return __kill_pgrp_info(sig, info, find_pid(pgrp));
+}
+
int
kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
{
@@ -1089,8 +1105,7 @@ kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
return retval;
}
-int
-kill_proc_info(int sig, struct siginfo *info, pid_t pid)
+int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
{
int error;
int acquired_tasklist_lock = 0;
@@ -1101,7 +1116,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
read_lock(&tasklist_lock);
acquired_tasklist_lock = 1;
}
- p = find_task_by_pid(pid);
+ p = pid_task(pid, PIDTYPE_PID);
error = -ESRCH;
if (p)
error = group_send_sig_info(sig, info, p);
@@ -1111,8 +1126,18 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
return error;
}
-/* like kill_proc_info(), but doesn't use uid/euid of "current" */
-int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
+int
+kill_proc_info(int sig, struct siginfo *info, pid_t pid)
+{
+ int error;
+ rcu_read_lock();
+ error = kill_pid_info(sig, info, find_pid(pid));
+ rcu_read_unlock();
+ return error;
+}
+
+/* like kill_pid_info(), but doesn't use uid/euid of "current" */
+int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
uid_t uid, uid_t euid, u32 secid)
{
int ret = -EINVAL;
@@ -1122,7 +1147,7 @@ int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
return ret;
read_lock(&tasklist_lock);
- p = find_task_by_pid(pid);
+ p = pid_task(pid, PIDTYPE_PID);
if (!p) {
ret = -ESRCH;
goto out_unlock;
@@ -1146,7 +1171,7 @@ out_unlock:
read_unlock(&tasklist_lock);
return ret;
}
-EXPORT_SYMBOL_GPL(kill_proc_info_as_uid);
+EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
/*
* kill_something_info() interprets pid in interesting ways just like kill(2).
@@ -1264,6 +1289,18 @@ force_sigsegv(int sig, struct task_struct *p)
return 0;
}
+int kill_pgrp(struct pid *pid, int sig, int priv)
+{
+ return kill_pgrp_info(sig, __si_special(priv), pid);
+}
+EXPORT_SYMBOL(kill_pgrp);
+
+int kill_pid(struct pid *pid, int sig, int priv)
+{
+ return kill_pid_info(sig, __si_special(priv), pid);
+}
+EXPORT_SYMBOL(kill_pid);
+
int
kill_pg(pid_t pgrp, int sig, int priv)
{
diff --git a/kernel/sys.c b/kernel/sys.c
index 2460581c928..2314867ae34 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -92,7 +92,8 @@ EXPORT_SYMBOL(fs_overflowgid);
*/
int C_A_D = 1;
-int cad_pid = 1;
+struct pid *cad_pid;
+EXPORT_SYMBOL(cad_pid);
/*
* Notifier list for kernel code which wants to be called
@@ -221,7 +222,7 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
* of the last notifier function called.
*/
-int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
unsigned long val, void *v)
{
int ret;
@@ -773,10 +774,9 @@ void ctrl_alt_del(void)
if (C_A_D)
schedule_work(&cad_work);
else
- kill_proc(cad_pid, SIGINT, 1);
+ kill_cad_pid(SIGINT, 1);
}
-
/*
* Unprivileged users may change the real gid to the effective gid
* or vice versa. (BSD-style)
@@ -1655,7 +1655,7 @@ asmlinkage long sys_newuname(struct new_utsname __user * name)
int errno = 0;
down_read(&uts_sem);
- if (copy_to_user(name,&system_utsname,sizeof *name))
+ if (copy_to_user(name, utsname(), sizeof *name))
errno = -EFAULT;
up_read(&uts_sem);
return errno;
@@ -1673,8 +1673,8 @@ asmlinkage long sys_sethostname(char __user *name, int len)
down_write(&uts_sem);
errno = -EFAULT;
if (!copy_from_user(tmp, name, len)) {
- memcpy(system_utsname.nodename, tmp, len);
- system_utsname.nodename[len] = 0;
+ memcpy(utsname()->nodename, tmp, len);
+ utsname()->nodename[len] = 0;
errno = 0;
}
up_write(&uts_sem);
@@ -1690,11 +1690,11 @@ asmlinkage long sys_gethostname(char __user *name, int len)
if (len < 0)
return -EINVAL;
down_read(&uts_sem);
- i = 1 + strlen(system_utsname.nodename);
+ i = 1 + strlen(utsname()->nodename);
if (i > len)
i = len;
errno = 0;
- if (copy_to_user(name, system_utsname.nodename, i))
+ if (copy_to_user(name, utsname()->nodename, i))
errno = -EFAULT;
up_read(&uts_sem);
return errno;
@@ -1719,8 +1719,8 @@ asmlinkage long sys_setdomainname(char __user *name, int len)
down_write(&uts_sem);
errno = -EFAULT;
if (!copy_from_user(tmp, name, len)) {
- memcpy(system_utsname.domainname, tmp, len);
- system_utsname.domainname[len] = 0;
+ memcpy(utsname()->domainname, tmp, len);
+ utsname()->domainname[len] = 0;
errno = 0;
}
up_write(&uts_sem);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ba42694f045..8020fb273c4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -68,7 +68,6 @@ extern int sysrq_enabled;
extern int core_uses_pid;
extern int suid_dumpable;
extern char core_pattern[];
-extern int cad_pid;
extern int pid_max;
extern int min_free_kbytes;
extern int printk_ratelimit_jiffies;
@@ -92,13 +91,8 @@ extern char modprobe_path[];
extern int sg_big_buff;
#endif
#ifdef CONFIG_SYSVIPC
-extern size_t shm_ctlmax;
-extern size_t shm_ctlall;
-extern int shm_ctlmni;
-extern int msg_ctlmax;
-extern int msg_ctlmnb;
-extern int msg_ctlmni;
-extern int sem_ctls[];
+static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
#endif
#ifdef __sparc__
@@ -139,7 +133,10 @@ static int parse_table(int __user *, int, void __user *, size_t __user *,
void __user *, size_t, ctl_table *, void **);
#endif
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+
+static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos);
static ctl_table root_table[];
@@ -229,51 +226,100 @@ static ctl_table root_table[] = {
};
static ctl_table kern_table[] = {
+#ifndef CONFIG_UTS_NS
+ {
+ .ctl_name = KERN_OSTYPE,
+ .procname = "ostype",
+ .data = init_uts_ns.name.sysname,
+ .maxlen = sizeof(init_uts_ns.name.sysname),
+ .mode = 0444,
+ .proc_handler = &proc_do_uts_string,
+ .strategy = &sysctl_string,
+ },
+ {
+ .ctl_name = KERN_OSRELEASE,
+ .procname = "osrelease",
+ .data = init_uts_ns.name.release,
+ .maxlen = sizeof(init_uts_ns.name.release),
+ .mode = 0444,
+ .proc_handler = &proc_do_uts_string,
+ .strategy = &sysctl_string,
+ },
+ {
+ .ctl_name = KERN_VERSION,
+ .procname = "version",
+ .data = init_uts_ns.name.version,
+ .maxlen = sizeof(init_uts_ns.name.version),
+ .mode = 0444,
+ .proc_handler = &proc_do_uts_string,
+ .strategy = &sysctl_string,
+ },
+ {
+ .ctl_name = KERN_NODENAME,
+ .procname = "hostname",
+ .data = init_uts_ns.name.nodename,
+ .maxlen = sizeof(init_uts_ns.name.nodename),
+ .mode = 0644,
+ .proc_handler = &proc_do_uts_string,
+ .strategy = &sysctl_string,
+ },
+ {
+ .ctl_name = KERN_DOMAINNAME,
+ .procname = "domainname",
+ .data = init_uts_ns.name.domainname,
+ .maxlen = sizeof(init_uts_ns.name.domainname),
+ .mode = 0644,
+ .proc_handler = &proc_do_uts_string,
+ .strategy = &sysctl_string,
+ },
+#else /* !CONFIG_UTS_NS */
{
.ctl_name = KERN_OSTYPE,
.procname = "ostype",
- .data = system_utsname.sysname,
- .maxlen = sizeof(system_utsname.sysname),
+ .data = NULL,
+ /* could maybe use __NEW_UTS_LEN here? */
+ .maxlen = FIELD_SIZEOF(struct new_utsname, sysname),
.mode = 0444,
- .proc_handler = &proc_doutsstring,
+ .proc_handler = &proc_do_uts_string,
.strategy = &sysctl_string,
},
{
.ctl_name = KERN_OSRELEASE,
.procname = "osrelease",
- .data = system_utsname.release,
- .maxlen = sizeof(system_utsname.release),
+ .data = NULL,
+ .maxlen = FIELD_SIZEOF(struct new_utsname, release),
.mode = 0444,
- .proc_handler = &proc_doutsstring,
+ .proc_handler = &proc_do_uts_string,
.strategy = &sysctl_string,
},
{
.ctl_name = KERN_VERSION,
.procname = "version",
- .data = system_utsname.version,
- .maxlen = sizeof(system_utsname.version),
+ .data = NULL,
+ .maxlen = FIELD_SIZEOF(struct new_utsname, version),
.mode = 0444,
- .proc_handler = &proc_doutsstring,
+ .proc_handler = &proc_do_uts_string,
.strategy = &sysctl_string,
},
{
.ctl_name = KERN_NODENAME,
.procname = "hostname",
- .data = system_utsname.nodename,
- .maxlen = sizeof(system_utsname.nodename),
+ .data = NULL,
+ .maxlen = FIELD_SIZEOF(struct new_utsname, nodename),
.mode = 0644,
- .proc_handler = &proc_doutsstring,
+ .proc_handler = &proc_do_uts_string,
.strategy = &sysctl_string,
},
{
.ctl_name = KERN_DOMAINNAME,
.procname = "domainname",
- .data = system_utsname.domainname,
- .maxlen = sizeof(system_utsname.domainname),
+ .data = NULL,
+ .maxlen = FIELD_SIZEOF(struct new_utsname, domainname),
.mode = 0644,
- .proc_handler = &proc_doutsstring,
+ .proc_handler = &proc_do_uts_string,
.strategy = &sysctl_string,
},
+#endif /* !CONFIG_UTS_NS */
{
.ctl_name = KERN_PANIC,
.procname = "panic",
@@ -432,58 +478,58 @@ static ctl_table kern_table[] = {
{
.ctl_name = KERN_SHMMAX,
.procname = "shmmax",
- .data = &shm_ctlmax,
+ .data = NULL,
.maxlen = sizeof (size_t),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = &proc_do_ipc_string,
},
{
.ctl_name = KERN_SHMALL,
.procname = "shmall",
- .data = &shm_ctlall,
+ .data = NULL,
.maxlen = sizeof (size_t),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = &proc_do_ipc_string,
},
{
.ctl_name = KERN_SHMMNI,
.procname = "shmmni",
- .data = &shm_ctlmni,
+ .data = NULL,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_do_ipc_string,
},
{
.ctl_name = KERN_MSGMAX,
.procname = "msgmax",
- .data = &msg_ctlmax,
+ .data = NULL,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_do_ipc_string,
},
{
.ctl_name = KERN_MSGMNI,
.procname = "msgmni",
- .data = &msg_ctlmni,
+ .data = NULL,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_do_ipc_string,
},
{
.ctl_name = KERN_MSGMNB,
.procname = "msgmnb",
- .data = &msg_ctlmnb,
+ .data = NULL,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_do_ipc_string,
},
{
.ctl_name = KERN_SEM,
.procname = "sem",
- .data = &sem_ctls,
+ .data = NULL,
.maxlen = 4*sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_do_ipc_string,
},
#endif
#ifdef CONFIG_MAGIC_SYSRQ
@@ -499,10 +545,10 @@ static ctl_table kern_table[] = {
{
.ctl_name = KERN_CADPID,
.procname = "cad_pid",
- .data = &cad_pid,
+ .data = NULL,
.maxlen = sizeof (int),
.mode = 0600,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_do_cad_pid,
},
{
.ctl_name = KERN_MAX_THREADS,
@@ -1624,32 +1670,15 @@ static ssize_t proc_writesys(struct file * file, const char __user * buf,
return do_rw_proc(1, file, (char __user *) buf, count, ppos);
}
-/**
- * proc_dostring - read a string sysctl
- * @table: the sysctl table
- * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
- * @buffer: the user buffer
- * @lenp: the size of the user buffer
- * @ppos: file position
- *
- * Reads/writes a string from/to the user buffer. If the kernel
- * buffer provided is not large enough to hold the string, the
- * string is truncated. The copied string is %NULL-terminated.
- * If the string is being read by the user process, it is copied
- * and a newline '\n' is added. It is truncated if the buffer is
- * not large enough.
- *
- * Returns 0 on success.
- */
-int proc_dostring(ctl_table *table, int write, struct file *filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+static int _proc_do_string(void* data, int maxlen, int write,
+ struct file *filp, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
{
size_t len;
char __user *p;
char c;
- if (!table->data || !table->maxlen || !*lenp ||
+ if (!data || !maxlen || !*lenp ||
(*ppos && !write)) {
*lenp = 0;
return 0;
@@ -1665,20 +1694,20 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
break;
len++;
}
- if (len >= table->maxlen)
- len = table->maxlen-1;
- if(copy_from_user(table->data, buffer, len))
+ if (len >= maxlen)
+ len = maxlen-1;
+ if(copy_from_user(data, buffer, len))
return -EFAULT;
- ((char *) table->data)[len] = 0;
+ ((char *) data)[len] = 0;
*ppos += *lenp;
} else {
- len = strlen(table->data);
- if (len > table->maxlen)
- len = table->maxlen;
+ len = strlen(data);
+ if (len > maxlen)
+ len = maxlen;
if (len > *lenp)
len = *lenp;
if (len)
- if(copy_to_user(buffer, table->data, len))
+ if(copy_to_user(buffer, data, len))
return -EFAULT;
if (len < *lenp) {
if(put_user('\n', ((char __user *) buffer) + len))
@@ -1691,12 +1720,38 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
return 0;
}
+/**
+ * proc_dostring - read a string sysctl
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @filp: the file structure
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes a string from/to the user buffer. If the kernel
+ * buffer provided is not large enough to hold the string, the
+ * string is truncated. The copied string is %NULL-terminated.
+ * If the string is being read by the user process, it is copied
+ * and a newline '\n' is added. It is truncated if the buffer is
+ * not large enough.
+ *
+ * Returns 0 on success.
+ */
+int proc_dostring(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return _proc_do_string(table->data, table->maxlen, write, filp,
+ buffer, lenp, ppos);
+}
+
/*
* Special case of dostring for the UTS structure. This has locks
* to observe. Should this be in kernel/sys.c ????
*/
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+#ifndef CONFIG_UTS_NS
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int r;
@@ -1712,6 +1767,48 @@ static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
}
return r;
}
+#else /* !CONFIG_UTS_NS */
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int r;
+ struct uts_namespace* uts_ns = current->nsproxy->uts_ns;
+ char* which;
+
+ switch (table->ctl_name) {
+ case KERN_OSTYPE:
+ which = uts_ns->name.sysname;
+ break;
+ case KERN_NODENAME:
+ which = uts_ns->name.nodename;
+ break;
+ case KERN_OSRELEASE:
+ which = uts_ns->name.release;
+ break;
+ case KERN_VERSION:
+ which = uts_ns->name.version;
+ break;
+ case KERN_DOMAINNAME:
+ which = uts_ns->name.domainname;
+ break;
+ default:
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!write) {
+ down_read(&uts_sem);
+ r=_proc_do_string(which,table->maxlen,0,filp,buffer,lenp, ppos);
+ up_read(&uts_sem);
+ } else {
+ down_write(&uts_sem);
+ r=_proc_do_string(which,table->maxlen,1,filp,buffer,lenp, ppos);
+ up_write(&uts_sem);
+ }
+ out:
+ return r;
+}
+#endif /* !CONFIG_UTS_NS */
static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
int *valp,
@@ -1732,8 +1829,9 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
return 0;
}
-static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
- void __user *buffer, size_t *lenp, loff_t *ppos,
+static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
+ int write, struct file *filp, void __user *buffer,
+ size_t *lenp, loff_t *ppos,
int (*conv)(int *negp, unsigned long *lvalp, int *valp,
int write, void *data),
void *data)
@@ -1746,13 +1844,13 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
char buf[TMPBUFLEN], *p;
char __user *s = buffer;
- if (!table->data || !table->maxlen || !*lenp ||
+ if (!tbl_data || !table->maxlen || !*lenp ||
(*ppos && !write)) {
*lenp = 0;
return 0;
}
- i = (int *) table->data;
+ i = (int *) tbl_data;
vleft = table->maxlen / sizeof(*i);
left = *lenp;
@@ -1841,6 +1939,16 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
#undef TMPBUFLEN
}
+static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos,
+ int (*conv)(int *negp, unsigned long *lvalp, int *valp,
+ int write, void *data),
+ void *data)
+{
+ return __do_proc_dointvec(table->data, table, write, filp,
+ buffer, lenp, ppos, conv, data);
+}
+
/**
* proc_dointvec - read a vector of integers
* @table: the sysctl table
@@ -1974,7 +2082,7 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
do_proc_dointvec_minmax_conv, &param);
}
-static int do_proc_doulongvec_minmax(ctl_table *table, int write,
+static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
struct file *filp,
void __user *buffer,
size_t *lenp, loff_t *ppos,
@@ -1988,13 +2096,13 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write,
char buf[TMPBUFLEN], *p;
char __user *s = buffer;
- if (!table->data || !table->maxlen || !*lenp ||
+ if (!data || !table->maxlen || !*lenp ||
(*ppos && !write)) {
*lenp = 0;
return 0;
}
- i = (unsigned long *) table->data;
+ i = (unsigned long *) data;
min = (unsigned long *) table->extra1;
max = (unsigned long *) table->extra2;
vleft = table->maxlen / sizeof(unsigned long);
@@ -2079,6 +2187,17 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write,
#undef TMPBUFLEN
}
+static int do_proc_doulongvec_minmax(ctl_table *table, int write,
+ struct file *filp,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ unsigned long convmul,
+ unsigned long convdiv)
+{
+ return __do_proc_doulongvec_minmax(table->data, table, write,
+ filp, buffer, lenp, ppos, convmul, convdiv);
+}
+
/**
* proc_doulongvec_minmax - read a vector of long integers with min/max values
* @table: the sysctl table
@@ -2267,6 +2386,71 @@ int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
do_proc_dointvec_ms_jiffies_conv, NULL);
}
+#ifdef CONFIG_SYSVIPC
+static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ void *data;
+ struct ipc_namespace *ns;
+
+ ns = current->nsproxy->ipc_ns;
+
+ switch (table->ctl_name) {
+ case KERN_SHMMAX:
+ data = &ns->shm_ctlmax;
+ goto proc_minmax;
+ case KERN_SHMALL:
+ data = &ns->shm_ctlall;
+ goto proc_minmax;
+ case KERN_SHMMNI:
+ data = &ns->shm_ctlmni;
+ break;
+ case KERN_MSGMAX:
+ data = &ns->msg_ctlmax;
+ break;
+ case KERN_MSGMNI:
+ data = &ns->msg_ctlmni;
+ break;
+ case KERN_MSGMNB:
+ data = &ns->msg_ctlmnb;
+ break;
+ case KERN_SEM:
+ data = &ns->sem_ctls;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return __do_proc_dointvec(data, table, write, filp, buffer,
+ lenp, ppos, NULL, NULL);
+proc_minmax:
+ return __do_proc_doulongvec_minmax(data, table, write, filp, buffer,
+ lenp, ppos, 1l, 1l);
+}
+#endif
+
+static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct pid *new_pid;
+ pid_t tmp;
+ int r;
+
+ tmp = pid_nr(cad_pid);
+
+ r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
+ lenp, ppos, NULL, NULL);
+ if (r || !write)
+ return r;
+
+ new_pid = find_get_pid(tmp);
+ if (!new_pid)
+ return -ESRCH;
+
+ put_pid(xchg(&cad_pid, new_pid));
+ return 0;
+}
+
#else /* CONFIG_PROC_FS */
int proc_dostring(ctl_table *table, int write, struct file *filp,
@@ -2275,12 +2459,20 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
return -ENOSYS;
}
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
+#ifdef CONFIG_SYSVIPC
+static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+#endif
+
int proc_dointvec(ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
diff --git a/kernel/utsname.c b/kernel/utsname.c
new file mode 100644
index 00000000000..c859164a699
--- /dev/null
+++ b/kernel/utsname.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2004 IBM Corporation
+ *
+ * Author: Serge Hallyn <serue@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <linux/module.h>
+#include <linux/uts.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+
+/*
+ * Clone a new ns copying an original utsname, setting refcount to 1
+ * @old_ns: namespace to clone
+ * Return NULL on error (failure to kmalloc), new ns otherwise
+ */
+static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
+{
+ struct uts_namespace *ns;
+
+ ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+ if (ns) {
+ memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+ kref_init(&ns->kref);
+ }
+ return ns;
+}
+
+/*
+ * unshare the current process' utsname namespace.
+ * called only in sys_unshare()
+ */
+int unshare_utsname(unsigned long unshare_flags, struct uts_namespace **new_uts)
+{
+ if (unshare_flags & CLONE_NEWUTS) {
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ *new_uts = clone_uts_ns(current->nsproxy->uts_ns);
+ if (!*new_uts)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy task tsk's utsname namespace, or clone it if flags
+ * specifies CLONE_NEWUTS. In latter case, changes to the
+ * utsname of this process won't be seen by parent, and vice
+ * versa.
+ */
+int copy_utsname(int flags, struct task_struct *tsk)
+{
+ struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
+ struct uts_namespace *new_ns;
+ int err = 0;
+
+ if (!old_ns)
+ return 0;
+
+ get_uts_ns(old_ns);
+
+ if (!(flags & CLONE_NEWUTS))
+ return 0;
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out;
+ }
+
+ new_ns = clone_uts_ns(old_ns);
+ if (!new_ns) {
+ err = -ENOMEM;
+ goto out;
+ }
+ tsk->nsproxy->uts_ns = new_ns;
+
+out:
+ put_uts_ns(old_ns);
+ return err;
+}
+
+void free_uts_ns(struct kref *kref)
+{
+ struct uts_namespace *ns;
+
+ ns = container_of(kref, struct uts_namespace, kref);
+ kfree(ns);
+}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index f9ae75cc014..756a908c441 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -384,3 +384,17 @@ config RCU_TORTURE_TEST
at boot time (you probably don't).
Say M if you want the RCU torture tests to build as a module.
Say N if you are unsure.
+
+config LKDTM
+ tristate "Linux Kernel Dump Test Tool Module"
+ depends on KPROBES
+ default n
+ help
+ This module enables testing of the different dumping mechanisms by
+ inducing system failures at predefined crash points.
+ If you don't need it: say N
+ Choose M here to compile this code as a module. The module will be
+ called lkdtm.
+
+ Documentation on how to use the module can be found in
+ drivers/misc/lkdtm.c
diff --git a/lib/Makefile b/lib/Makefile
index ddf3e676e1f..b0361756e22 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -2,7 +2,7 @@
# Makefile for some libs needed in the kernel.
#
-lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
+lib-y := ctype.o string.o vsprintf.o cmdline.o \
bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
sha1.o
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 3a67dc5ada7..7a2a73f88d5 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -43,3 +43,19 @@ int __any_online_cpu(const cpumask_t *mask)
return cpu;
}
EXPORT_SYMBOL(__any_online_cpu);
+
+#if MAX_NUMNODES > 1
+/*
+ * Find the highest possible node id.
+ */
+int highest_possible_node_id(void)
+{
+ unsigned int node;
+ unsigned int highest = 0;
+
+ for_each_node_mask(node, node_possible_map)
+ highest = node;
+ return highest;
+}
+EXPORT_SYMBOL(highest_possible_node_id);
+#endif
diff --git a/lib/errno.c b/lib/errno.c
deleted file mode 100644
index 41cb9d76c05..00000000000
--- a/lib/errno.c
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * linux/lib/errno.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- */
-
-int errno;
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 71338b48e88..75ae68ce03e 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -14,11 +14,13 @@
#include <linux/genalloc.h>
-/*
- * Create a new special memory pool.
- *
+/**
+ * gen_pool_create - create a new special memory pool
* @min_alloc_order: log base 2 of number of bytes each bitmap bit represents
* @nid: node id of the node the pool structure should be allocated on, or -1
+ *
+ * Create a new special memory pool that can be used to manage special purpose
+ * memory not managed by the regular kmalloc/kfree interface.
*/
struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
{
@@ -34,15 +36,15 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
}
EXPORT_SYMBOL(gen_pool_create);
-
-/*
- * Add a new chunk of memory to the specified pool.
- *
+/**
+ * gen_pool_add - add a new chunk of special memory to the pool
* @pool: pool to add new memory chunk to
* @addr: starting address of memory chunk to add to pool
* @size: size in bytes of the memory chunk to add to pool
* @nid: node id of the node the chunk structure and bitmap should be
* allocated on, or -1
+ *
+ * Add a new chunk of special memory to the specified pool.
*/
int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size,
int nid)
@@ -69,13 +71,44 @@ int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size,
}
EXPORT_SYMBOL(gen_pool_add);
-
-/*
- * Allocate the requested number of bytes from the specified pool.
- * Uses a first-fit algorithm.
+/**
+ * gen_pool_destroy - destroy a special memory pool
+ * @pool: pool to destroy
*
+ * Destroy the specified special memory pool. Verifies that there are no
+ * outstanding allocations.
+ */
+void gen_pool_destroy(struct gen_pool *pool)
+{
+ struct list_head *_chunk, *_next_chunk;
+ struct gen_pool_chunk *chunk;
+ int order = pool->min_alloc_order;
+ int bit, end_bit;
+
+
+ write_lock(&pool->lock);
+ list_for_each_safe(_chunk, _next_chunk, &pool->chunks) {
+ chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
+ list_del(&chunk->next_chunk);
+
+ end_bit = (chunk->end_addr - chunk->start_addr) >> order;
+ bit = find_next_bit(chunk->bits, end_bit, 0);
+ BUG_ON(bit < end_bit);
+
+ kfree(chunk);
+ }
+ kfree(pool);
+ return;
+}
+EXPORT_SYMBOL(gen_pool_destroy);
+
+/**
+ * gen_pool_alloc - allocate special memory from the pool
* @pool: pool to allocate from
* @size: number of bytes to allocate from the pool
+ *
+ * Allocate the requested number of bytes from the specified pool.
+ * Uses a first-fit algorithm.
*/
unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
{
@@ -127,13 +160,13 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
}
EXPORT_SYMBOL(gen_pool_alloc);
-
-/*
- * Free the specified memory back to the specified pool.
- *
+/**
+ * gen_pool_free - free allocated special memory back to the pool
* @pool: pool to free to
* @addr: starting address of memory to free back to pool
* @size: size in bytes of memory to free
+ *
+ * Free previously allocated special memory back to the specified pool.
*/
void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
{
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 26f322737db..1958ad1b854 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -1011,7 +1011,7 @@ static int rfcomm_tty_tiocmset(struct tty_struct *tty, struct file *filp, unsign
/* ---- TTY structure ---- */
-static struct tty_operations rfcomm_ops = {
+static const struct tty_operations rfcomm_ops = {
.open = rfcomm_tty_open,
.close = rfcomm_tty_close,
.write = rfcomm_tty_write,
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 1fbb38415b1..f8ce8475915 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -366,7 +366,7 @@ static int __init ic_defaults(void)
*/
if (!ic_host_name_set)
- sprintf(system_utsname.nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr));
+ sprintf(init_utsname()->nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr));
if (root_server_addr == INADDR_NONE)
root_server_addr = ic_servaddr;
@@ -805,7 +805,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
}
break;
case 12: /* Host name */
- ic_bootp_string(system_utsname.nodename, ext+1, *ext, __NEW_UTS_LEN);
+ ic_bootp_string(utsname()->nodename, ext+1, *ext, __NEW_UTS_LEN);
ic_host_name_set = 1;
break;
case 15: /* Domain name (DNS) */
@@ -816,7 +816,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path));
break;
case 40: /* NIS Domain name (_not_ DNS) */
- ic_bootp_string(system_utsname.domainname, ext+1, *ext, __NEW_UTS_LEN);
+ ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN);
break;
}
}
@@ -1368,7 +1368,7 @@ static int __init ip_auto_config(void)
printk(", mask=%u.%u.%u.%u", NIPQUAD(ic_netmask));
printk(", gw=%u.%u.%u.%u", NIPQUAD(ic_gateway));
printk(",\n host=%s, domain=%s, nis-domain=%s",
- system_utsname.nodename, ic_domain, system_utsname.domainname);
+ utsname()->nodename, ic_domain, utsname()->domainname);
printk(",\n bootserver=%u.%u.%u.%u", NIPQUAD(ic_servaddr));
printk(", rootserver=%u.%u.%u.%u", NIPQUAD(root_server_addr));
printk(", rootpath=%s", root_server_path);
@@ -1478,11 +1478,11 @@ static int __init ip_auto_config_setup(char *addrs)
case 4:
if ((dp = strchr(ip, '.'))) {
*dp++ = '\0';
- strlcpy(system_utsname.domainname, dp,
- sizeof(system_utsname.domainname));
+ strlcpy(utsname()->domainname, dp,
+ sizeof(utsname()->domainname));
}
- strlcpy(system_utsname.nodename, ip,
- sizeof(system_utsname.nodename));
+ strlcpy(utsname()->nodename, ip,
+ sizeof(utsname()->nodename));
ic_host_name_set = 1;
break;
case 5:
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index dab37d2f65f..4be336f1788 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -99,8 +99,10 @@ static int jtcp_sendmsg(struct kiocb *iocb, struct sock *sk,
}
static struct jprobe tcp_send_probe = {
- .kp = { .addr = (kprobe_opcode_t *) &tcp_sendmsg, },
- .entry = (kprobe_opcode_t *) &jtcp_sendmsg,
+ .kp = {
+ .symbol_name = "tcp_sendmsg",
+ },
+ .entry = JPROBE_ENTRY(jtcp_sendmsg),
};
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 3bcdb467efc..d50a02030ad 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -79,7 +79,7 @@ static struct tty_driver *driver;
hashbin_t *ircomm_tty = NULL;
-static struct tty_operations ops = {
+static const struct tty_operations ops = {
.open = ircomm_tty_open,
.close = ircomm_tty_close,
.write = ircomm_tty_write,
diff --git a/net/socket.c b/net/socket.c
index 01918f7a301..6c9b9b326d7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -825,7 +825,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
break;
case FIOGETOWN:
case SIOCGPGRP:
- err = put_user(sock->file->f_owner.pid,
+ err = put_user(f_getown(sock->file),
(int __user *)argp);
break;
case SIOCGIFBR:
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 124ff0ceb55..78696f2dc7d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -161,10 +161,10 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
}
/* save the nodename */
- clnt->cl_nodelen = strlen(system_utsname.nodename);
+ clnt->cl_nodelen = strlen(utsname()->nodename);
if (clnt->cl_nodelen > UNX_MAXNODENAME)
clnt->cl_nodelen = UNX_MAXNODENAME;
- memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
+ memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen);
return clnt;
out_no_auth:
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 26c0531d7e2..192dff5dabc 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -70,6 +70,8 @@ EXPORT_SYMBOL(put_rpccred);
/* RPC server stuff */
EXPORT_SYMBOL(svc_create);
EXPORT_SYMBOL(svc_create_thread);
+EXPORT_SYMBOL(svc_create_pooled);
+EXPORT_SYMBOL(svc_set_num_threads);
EXPORT_SYMBOL(svc_exit_thread);
EXPORT_SYMBOL(svc_destroy);
EXPORT_SYMBOL(svc_drop);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 44b8d9d4c18..a99e67b164c 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -4,6 +4,10 @@
* High-level RPC service routines
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ *
+ * Multiple threads pools and NUMAisation
+ * Copyright (c) 2006 Silicon Graphics, Inc.
+ * by Greg Banks <gnb@melbourne.sgi.com>
*/
#include <linux/linkage.h>
@@ -12,6 +16,8 @@
#include <linux/net.h>
#include <linux/in.h>
#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
@@ -23,14 +29,252 @@
#define RPC_PARANOIA 1
/*
+ * Mode for mapping cpus to pools.
+ */
+enum {
+ SVC_POOL_NONE = -1, /* uninitialised, choose one of the others */
+ SVC_POOL_GLOBAL, /* no mapping, just a single global pool
+ * (legacy & UP mode) */
+ SVC_POOL_PERCPU, /* one pool per cpu */
+ SVC_POOL_PERNODE /* one pool per numa node */
+};
+
+/*
+ * Structure for mapping cpus to pools and vice versa.
+ * Setup once during sunrpc initialisation.
+ */
+static struct svc_pool_map {
+ int mode; /* Note: int not enum to avoid
+ * warnings about "enumeration value
+ * not handled in switch" */
+ unsigned int npools;
+ unsigned int *pool_to; /* maps pool id to cpu or node */
+ unsigned int *to_pool; /* maps cpu or node to pool id */
+} svc_pool_map = {
+ .mode = SVC_POOL_NONE
+};
+
+
+/*
+ * Detect best pool mapping mode heuristically,
+ * according to the machine's topology.
+ */
+static int
+svc_pool_map_choose_mode(void)
+{
+ unsigned int node;
+
+ if (num_online_nodes() > 1) {
+ /*
+ * Actually have multiple NUMA nodes,
+ * so split pools on NUMA node boundaries
+ */
+ return SVC_POOL_PERNODE;
+ }
+
+ node = any_online_node(node_online_map);
+ if (nr_cpus_node(node) > 2) {
+ /*
+ * Non-trivial SMP, or CONFIG_NUMA on
+ * non-NUMA hardware, e.g. with a generic
+ * x86_64 kernel on Xeons. In this case we
+ * want to divide the pools on cpu boundaries.
+ */
+ return SVC_POOL_PERCPU;
+ }
+
+ /* default: one global pool */
+ return SVC_POOL_GLOBAL;
+}
+
+/*
+ * Allocate the to_pool[] and pool_to[] arrays.
+ * Returns 0 on success or an errno.
+ */
+static int
+svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
+{
+ m->to_pool = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
+ if (!m->to_pool)
+ goto fail;
+ m->pool_to = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
+ if (!m->pool_to)
+ goto fail_free;
+
+ return 0;
+
+fail_free:
+ kfree(m->to_pool);
+fail:
+ return -ENOMEM;
+}
+
+/*
+ * Initialise the pool map for SVC_POOL_PERCPU mode.
+ * Returns number of pools or <0 on error.
+ */
+static int
+svc_pool_map_init_percpu(struct svc_pool_map *m)
+{
+ unsigned int maxpools = highest_possible_processor_id()+1;
+ unsigned int pidx = 0;
+ unsigned int cpu;
+ int err;
+
+ err = svc_pool_map_alloc_arrays(m, maxpools);
+ if (err)
+ return err;
+
+ for_each_online_cpu(cpu) {
+ BUG_ON(pidx > maxpools);
+ m->to_pool[cpu] = pidx;
+ m->pool_to[pidx] = cpu;
+ pidx++;
+ }
+ /* cpus brought online later all get mapped to pool0, sorry */
+
+ return pidx;
+};
+
+
+/*
+ * Initialise the pool map for SVC_POOL_PERNODE mode.
+ * Returns number of pools or <0 on error.
+ */
+static int
+svc_pool_map_init_pernode(struct svc_pool_map *m)
+{
+ unsigned int maxpools = highest_possible_node_id()+1;
+ unsigned int pidx = 0;
+ unsigned int node;
+ int err;
+
+ err = svc_pool_map_alloc_arrays(m, maxpools);
+ if (err)
+ return err;
+
+ for_each_node_with_cpus(node) {
+ /* some architectures (e.g. SN2) have cpuless nodes */
+ BUG_ON(pidx > maxpools);
+ m->to_pool[node] = pidx;
+ m->pool_to[pidx] = node;
+ pidx++;
+ }
+ /* nodes brought online later all get mapped to pool0, sorry */
+
+ return pidx;
+}
+
+
+/*
+ * Build the global map of cpus to pools and vice versa.
+ */
+static unsigned int
+svc_pool_map_init(void)
+{
+ struct svc_pool_map *m = &svc_pool_map;
+ int npools = -1;
+
+ if (m->mode != SVC_POOL_NONE)
+ return m->npools;
+
+ m->mode = svc_pool_map_choose_mode();
+
+ switch (m->mode) {
+ case SVC_POOL_PERCPU:
+ npools = svc_pool_map_init_percpu(m);
+ break;
+ case SVC_POOL_PERNODE:
+ npools = svc_pool_map_init_pernode(m);
+ break;
+ }
+
+ if (npools < 0) {
+ /* default, or memory allocation failure */
+ npools = 1;
+ m->mode = SVC_POOL_GLOBAL;
+ }
+ m->npools = npools;
+
+ return m->npools;
+}
+
+/*
+ * Set the current thread's cpus_allowed mask so that it
+ * will only run on cpus in the given pool.
+ *
+ * Returns 1 and fills in oldmask iff a cpumask was applied.
+ */
+static inline int
+svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
+{
+ struct svc_pool_map *m = &svc_pool_map;
+ unsigned int node; /* or cpu */
+
+ /*
+ * The caller checks for sv_nrpools > 1, which
+ * implies that we've been initialized and the
+ * map mode is not NONE.
+ */
+ BUG_ON(m->mode == SVC_POOL_NONE);
+
+ switch (m->mode)
+ {
+ default:
+ return 0;
+ case SVC_POOL_PERCPU:
+ node = m->pool_to[pidx];
+ *oldmask = current->cpus_allowed;
+ set_cpus_allowed(current, cpumask_of_cpu(node));
+ return 1;
+ case SVC_POOL_PERNODE:
+ node = m->pool_to[pidx];
+ *oldmask = current->cpus_allowed;
+ set_cpus_allowed(current, node_to_cpumask(node));
+ return 1;
+ }
+}
+
+/*
+ * Use the mapping mode to choose a pool for a given CPU.
+ * Used when enqueueing an incoming RPC. Always returns
+ * a non-NULL pool pointer.
+ */
+struct svc_pool *
+svc_pool_for_cpu(struct svc_serv *serv, int cpu)
+{
+ struct svc_pool_map *m = &svc_pool_map;
+ unsigned int pidx = 0;
+
+ /*
+ * SVC_POOL_NONE happens in a pure client when
+ * lockd is brought up, so silently treat it the
+ * same as SVC_POOL_GLOBAL.
+ */
+
+ switch (m->mode) {
+ case SVC_POOL_PERCPU:
+ pidx = m->to_pool[cpu];
+ break;
+ case SVC_POOL_PERNODE:
+ pidx = m->to_pool[cpu_to_node(cpu)];
+ break;
+ }
+ return &serv->sv_pools[pidx % serv->sv_nrpools];
+}
+
+
+/*
* Create an RPC service
*/
-struct svc_serv *
-svc_create(struct svc_program *prog, unsigned int bufsize)
+static struct svc_serv *
+__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
+ void (*shutdown)(struct svc_serv *serv))
{
struct svc_serv *serv;
int vers;
unsigned int xdrsize;
+ unsigned int i;
if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
return NULL;
@@ -39,6 +283,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
serv->sv_nrthreads = 1;
serv->sv_stats = prog->pg_stats;
serv->sv_bufsz = bufsize? bufsize : 4096;
+ serv->sv_shutdown = shutdown;
xdrsize = 0;
while (prog) {
prog->pg_lovers = prog->pg_nvers-1;
@@ -53,20 +298,68 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
prog = prog->pg_next;
}
serv->sv_xdrsize = xdrsize;
- INIT_LIST_HEAD(&serv->sv_threads);
- INIT_LIST_HEAD(&serv->sv_sockets);
INIT_LIST_HEAD(&serv->sv_tempsocks);
INIT_LIST_HEAD(&serv->sv_permsocks);
+ init_timer(&serv->sv_temptimer);
spin_lock_init(&serv->sv_lock);
+ serv->sv_nrpools = npools;
+ serv->sv_pools =
+ kcalloc(sizeof(struct svc_pool), serv->sv_nrpools,
+ GFP_KERNEL);
+ if (!serv->sv_pools) {
+ kfree(serv);
+ return NULL;
+ }
+
+ for (i = 0; i < serv->sv_nrpools; i++) {
+ struct svc_pool *pool = &serv->sv_pools[i];
+
+ dprintk("initialising pool %u for %s\n",
+ i, serv->sv_name);
+
+ pool->sp_id = i;
+ INIT_LIST_HEAD(&pool->sp_threads);
+ INIT_LIST_HEAD(&pool->sp_sockets);
+ INIT_LIST_HEAD(&pool->sp_all_threads);
+ spin_lock_init(&pool->sp_lock);
+ }
+
+
/* Remove any stale portmap registrations */
svc_register(serv, 0, 0);
return serv;
}
+struct svc_serv *
+svc_create(struct svc_program *prog, unsigned int bufsize,
+ void (*shutdown)(struct svc_serv *serv))
+{
+ return __svc_create(prog, bufsize, /*npools*/1, shutdown);
+}
+
+struct svc_serv *
+svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
+ void (*shutdown)(struct svc_serv *serv),
+ svc_thread_fn func, int sig, struct module *mod)
+{
+ struct svc_serv *serv;
+ unsigned int npools = svc_pool_map_init();
+
+ serv = __svc_create(prog, bufsize, npools, shutdown);
+
+ if (serv != NULL) {
+ serv->sv_function = func;
+ serv->sv_kill_signal = sig;
+ serv->sv_module = mod;
+ }
+
+ return serv;
+}
+
/*
- * Destroy an RPC service
+ * Destroy an RPC service. Should be called with the BKL held
*/
void
svc_destroy(struct svc_serv *serv)
@@ -85,12 +378,17 @@ svc_destroy(struct svc_serv *serv)
} else
printk("svc_destroy: no threads for serv=%p!\n", serv);
+ del_timer_sync(&serv->sv_temptimer);
+
while (!list_empty(&serv->sv_tempsocks)) {
svsk = list_entry(serv->sv_tempsocks.next,
struct svc_sock,
sk_list);
svc_delete_socket(svsk);
}
+ if (serv->sv_shutdown)
+ serv->sv_shutdown(serv);
+
while (!list_empty(&serv->sv_permsocks)) {
svsk = list_entry(serv->sv_permsocks.next,
struct svc_sock,
@@ -102,6 +400,7 @@ svc_destroy(struct svc_serv *serv)
/* Unregister service with the portmapper */
svc_register(serv, 0, 0);
+ kfree(serv->sv_pools);
kfree(serv);
}
@@ -150,13 +449,18 @@ svc_release_buffer(struct svc_rqst *rqstp)
}
/*
- * Create a server thread
+ * Create a thread in the given pool. Caller must hold BKL.
+ * On a NUMA or SMP machine, with a multi-pool serv, the thread
+ * will be restricted to run on the cpus belonging to the pool.
*/
-int
-svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
+static int
+__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
+ struct svc_pool *pool)
{
struct svc_rqst *rqstp;
int error = -ENOMEM;
+ int have_oldmask = 0;
+ cpumask_t oldmask;
rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
if (!rqstp)
@@ -170,8 +474,21 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
goto out_thread;
serv->sv_nrthreads++;
+ spin_lock_bh(&pool->sp_lock);
+ pool->sp_nrthreads++;
+ list_add(&rqstp->rq_all, &pool->sp_all_threads);
+ spin_unlock_bh(&pool->sp_lock);
rqstp->rq_server = serv;
+ rqstp->rq_pool = pool;
+
+ if (serv->sv_nrpools > 1)
+ have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
+
error = kernel_thread((int (*)(void *)) func, rqstp, 0);
+
+ if (have_oldmask)
+ set_cpus_allowed(current, oldmask);
+
if (error < 0)
goto out_thread;
svc_sock_update_bufs(serv);
@@ -185,17 +502,136 @@ out_thread:
}
/*
- * Destroy an RPC server thread
+ * Create a thread in the default pool. Caller must hold BKL.
+ */
+int
+svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
+{
+ return __svc_create_thread(func, serv, &serv->sv_pools[0]);
+}
+
+/*
+ * Choose a pool in which to create a new thread, for svc_set_num_threads
+ */
+static inline struct svc_pool *
+choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+{
+ if (pool != NULL)
+ return pool;
+
+ return &serv->sv_pools[(*state)++ % serv->sv_nrpools];
+}
+
+/*
+ * Choose a thread to kill, for svc_set_num_threads
+ */
+static inline struct task_struct *
+choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+{
+ unsigned int i;
+ struct task_struct *task = NULL;
+
+ if (pool != NULL) {
+ spin_lock_bh(&pool->sp_lock);
+ } else {
+ /* choose a pool in round-robin fashion */
+ for (i = 0; i < serv->sv_nrpools; i++) {
+ pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
+ spin_lock_bh(&pool->sp_lock);
+ if (!list_empty(&pool->sp_all_threads))
+ goto found_pool;
+ spin_unlock_bh(&pool->sp_lock);
+ }
+ return NULL;
+ }
+
+found_pool:
+ if (!list_empty(&pool->sp_all_threads)) {
+ struct svc_rqst *rqstp;
+
+ /*
+ * Remove from the pool->sp_all_threads list
+ * so we don't try to kill it again.
+ */
+ rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
+ list_del_init(&rqstp->rq_all);
+ task = rqstp->rq_task;
+ }
+ spin_unlock_bh(&pool->sp_lock);
+
+ return task;
+}
+
+/*
+ * Create or destroy enough new threads to make the number
+ * of threads the given number. If `pool' is non-NULL, applies
+ * only to threads in that pool, otherwise round-robins between
+ * all pools. Must be called with a svc_get() reference and
+ * the BKL held.
+ *
+ * Destroying threads relies on the service threads filling in
+ * rqstp->rq_task, which only the nfs ones do. Assumes the serv
+ * has been created using svc_create_pooled().
+ *
+ * Based on code that used to be in nfsd_svc() but tweaked
+ * to be pool-aware.
+ */
+int
+svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+ struct task_struct *victim;
+ int error = 0;
+ unsigned int state = serv->sv_nrthreads-1;
+
+ if (pool == NULL) {
+ /* The -1 assumes caller has done a svc_get() */
+ nrservs -= (serv->sv_nrthreads-1);
+ } else {
+ spin_lock_bh(&pool->sp_lock);
+ nrservs -= pool->sp_nrthreads;
+ spin_unlock_bh(&pool->sp_lock);
+ }
+
+ /* create new threads */
+ while (nrservs > 0) {
+ nrservs--;
+ __module_get(serv->sv_module);
+ error = __svc_create_thread(serv->sv_function, serv,
+ choose_pool(serv, pool, &state));
+ if (error < 0) {
+ module_put(serv->sv_module);
+ break;
+ }
+ }
+ /* destroy old threads */
+ while (nrservs < 0 &&
+ (victim = choose_victim(serv, pool, &state)) != NULL) {
+ send_sig(serv->sv_kill_signal, victim, 1);
+ nrservs++;
+ }
+
+ return error;
+}
+
+/*
+ * Called from a server thread as it's exiting. Caller must hold BKL.
*/
void
svc_exit_thread(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
+ struct svc_pool *pool = rqstp->rq_pool;
svc_release_buffer(rqstp);
kfree(rqstp->rq_resp);
kfree(rqstp->rq_argp);
kfree(rqstp->rq_auth_data);
+
+ spin_lock_bh(&pool->sp_lock);
+ pool->sp_nrthreads--;
+ list_del(&rqstp->rq_all);
+ spin_unlock_bh(&pool->sp_lock);
+
kfree(rqstp);
/* Release the server */
@@ -248,13 +684,14 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
* Process the RPC request.
*/
int
-svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
+svc_process(struct svc_rqst *rqstp)
{
struct svc_program *progp;
struct svc_version *versp = NULL; /* compiler food */
struct svc_procedure *procp = NULL;
struct kvec * argv = &rqstp->rq_arg.head[0];
struct kvec * resv = &rqstp->rq_res.head[0];
+ struct svc_serv *serv = rqstp->rq_server;
kxdrproc_t xdr;
__be32 *statp;
u32 dir, prog, vers, proc;
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 1020d54b01d..40d41a2831d 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -348,12 +348,9 @@ int auth_unix_forget_old(struct auth_domain *dom)
struct auth_domain *auth_unix_lookup(struct in_addr addr)
{
- struct ip_map key, *ipm;
+ struct ip_map *ipm;
struct auth_domain *rv;
- strcpy(key.m_class, "nfsd");
- key.m_addr = addr;
-
ipm = ip_map_lookup("nfsd", addr);
if (!ipm)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5b0fe1b66a2..cba85d19522 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -31,6 +31,7 @@
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
+#include <linux/file.h>
#include <net/sock.h>
#include <net/checksum.h>
#include <net/ip.h>
@@ -45,13 +46,16 @@
/* SMP locking strategy:
*
- * svc_serv->sv_lock protects most stuff for that service.
+ * svc_pool->sp_lock protects most of the fields of that pool.
+ * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
+ * when both need to be taken (rare), svc_serv->sv_lock is first.
+ * BKL protects svc_serv->sv_nrthread.
+ * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list
+ * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply.
*
* Some flags can be set to certain values at any time
* providing that certain rules are followed:
*
- * SK_BUSY can be set to 0 at any time.
- * svc_sock_enqueue must be called afterwards
* SK_CONN, SK_DATA, can be set or cleared at any time.
* after a set, svc_sock_enqueue must be called.
* after a clear, the socket must be read/accepted
@@ -73,23 +77,30 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
+/* apparently the "standard" is that clients close
+ * idle connections after 5 minutes, servers after
+ * 6 minutes
+ * http://www.connectathon.org/talks96/nfstcp.pdf
+ */
+static int svc_conn_age_period = 6*60;
+
/*
- * Queue up an idle server thread. Must have serv->sv_lock held.
+ * Queue up an idle server thread. Must have pool->sp_lock held.
* Note: this is really a stack rather than a queue, so that we only
- * use as many different threads as we need, and the rest don't polute
+ * use as many different threads as we need, and the rest don't pollute
* the cache.
*/
static inline void
-svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp)
+svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp)
{
- list_add(&rqstp->rq_list, &serv->sv_threads);
+ list_add(&rqstp->rq_list, &pool->sp_threads);
}
/*
- * Dequeue an nfsd thread. Must have serv->sv_lock held.
+ * Dequeue an nfsd thread. Must have pool->sp_lock held.
*/
static inline void
-svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp)
+svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
{
list_del(&rqstp->rq_list);
}
@@ -140,7 +151,9 @@ static void
svc_sock_enqueue(struct svc_sock *svsk)
{
struct svc_serv *serv = svsk->sk_server;
+ struct svc_pool *pool;
struct svc_rqst *rqstp;
+ int cpu;
if (!(svsk->sk_flags &
( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) ))
@@ -148,10 +161,14 @@ svc_sock_enqueue(struct svc_sock *svsk)
if (test_bit(SK_DEAD, &svsk->sk_flags))
return;
- spin_lock_bh(&serv->sv_lock);
+ cpu = get_cpu();
+ pool = svc_pool_for_cpu(svsk->sk_server, cpu);
+ put_cpu();
- if (!list_empty(&serv->sv_threads) &&
- !list_empty(&serv->sv_sockets))
+ spin_lock_bh(&pool->sp_lock);
+
+ if (!list_empty(&pool->sp_threads) &&
+ !list_empty(&pool->sp_sockets))
printk(KERN_ERR
"svc_sock_enqueue: threads and sockets both waiting??\n");
@@ -161,73 +178,79 @@ svc_sock_enqueue(struct svc_sock *svsk)
goto out_unlock;
}
- if (test_bit(SK_BUSY, &svsk->sk_flags)) {
- /* Don't enqueue socket while daemon is receiving */
+ /* Mark socket as busy. It will remain in this state until the
+ * server has processed all pending data and put the socket back
+ * on the idle list. We update SK_BUSY atomically because
+ * it also guards against trying to enqueue the svc_sock twice.
+ */
+ if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) {
+ /* Don't enqueue socket while already enqueued */
dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
goto out_unlock;
}
+ BUG_ON(svsk->sk_pool != NULL);
+ svsk->sk_pool = pool;
set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
- if (((svsk->sk_reserved + serv->sv_bufsz)*2
+ if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2
> svc_sock_wspace(svsk))
&& !test_bit(SK_CLOSE, &svsk->sk_flags)
&& !test_bit(SK_CONN, &svsk->sk_flags)) {
/* Don't enqueue while not enough space for reply */
dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n",
- svsk->sk_sk, svsk->sk_reserved+serv->sv_bufsz,
+ svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz,
svc_sock_wspace(svsk));
+ svsk->sk_pool = NULL;
+ clear_bit(SK_BUSY, &svsk->sk_flags);
goto out_unlock;
}
clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
- /* Mark socket as busy. It will remain in this state until the
- * server has processed all pending data and put the socket back
- * on the idle list.
- */
- set_bit(SK_BUSY, &svsk->sk_flags);
- if (!list_empty(&serv->sv_threads)) {
- rqstp = list_entry(serv->sv_threads.next,
+ if (!list_empty(&pool->sp_threads)) {
+ rqstp = list_entry(pool->sp_threads.next,
struct svc_rqst,
rq_list);
dprintk("svc: socket %p served by daemon %p\n",
svsk->sk_sk, rqstp);
- svc_serv_dequeue(serv, rqstp);
+ svc_thread_dequeue(pool, rqstp);
if (rqstp->rq_sock)
printk(KERN_ERR
"svc_sock_enqueue: server %p, rq_sock=%p!\n",
rqstp, rqstp->rq_sock);
rqstp->rq_sock = svsk;
- svsk->sk_inuse++;
+ atomic_inc(&svsk->sk_inuse);
rqstp->rq_reserved = serv->sv_bufsz;
- svsk->sk_reserved += rqstp->rq_reserved;
+ atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
+ BUG_ON(svsk->sk_pool != pool);
wake_up(&rqstp->rq_wait);
} else {
dprintk("svc: socket %p put into queue\n", svsk->sk_sk);
- list_add_tail(&svsk->sk_ready, &serv->sv_sockets);
+ list_add_tail(&svsk->sk_ready, &pool->sp_sockets);
+ BUG_ON(svsk->sk_pool != pool);
}
out_unlock:
- spin_unlock_bh(&serv->sv_lock);
+ spin_unlock_bh(&pool->sp_lock);
}
/*
- * Dequeue the first socket. Must be called with the serv->sv_lock held.
+ * Dequeue the first socket. Must be called with the pool->sp_lock held.
*/
static inline struct svc_sock *
-svc_sock_dequeue(struct svc_serv *serv)
+svc_sock_dequeue(struct svc_pool *pool)
{
struct svc_sock *svsk;
- if (list_empty(&serv->sv_sockets))
+ if (list_empty(&pool->sp_sockets))
return NULL;
- svsk = list_entry(serv->sv_sockets.next,
+ svsk = list_entry(pool->sp_sockets.next,
struct svc_sock, sk_ready);
list_del_init(&svsk->sk_ready);
dprintk("svc: socket %p dequeued, inuse=%d\n",
- svsk->sk_sk, svsk->sk_inuse);
+ svsk->sk_sk, atomic_read(&svsk->sk_inuse));
return svsk;
}
@@ -241,6 +264,7 @@ svc_sock_dequeue(struct svc_serv *serv)
static inline void
svc_sock_received(struct svc_sock *svsk)
{
+ svsk->sk_pool = NULL;
clear_bit(SK_BUSY, &svsk->sk_flags);
svc_sock_enqueue(svsk);
}
@@ -262,10 +286,8 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
if (space < rqstp->rq_reserved) {
struct svc_sock *svsk = rqstp->rq_sock;
- spin_lock_bh(&svsk->sk_server->sv_lock);
- svsk->sk_reserved -= (rqstp->rq_reserved - space);
+ atomic_sub((rqstp->rq_reserved - space), &svsk->sk_reserved);
rqstp->rq_reserved = space;
- spin_unlock_bh(&svsk->sk_server->sv_lock);
svc_sock_enqueue(svsk);
}
@@ -277,17 +299,11 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
static inline void
svc_sock_put(struct svc_sock *svsk)
{
- struct svc_serv *serv = svsk->sk_server;
-
- spin_lock_bh(&serv->sv_lock);
- if (!--(svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) {
- spin_unlock_bh(&serv->sv_lock);
+ if (atomic_dec_and_test(&svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) {
dprintk("svc: releasing dead socket\n");
sock_release(svsk->sk_sock);
kfree(svsk);
}
- else
- spin_unlock_bh(&serv->sv_lock);
}
static void
@@ -321,25 +337,33 @@ svc_sock_release(struct svc_rqst *rqstp)
/*
* External function to wake up a server waiting for data
+ * This really only makes sense for services like lockd
+ * which have exactly one thread anyway.
*/
void
svc_wake_up(struct svc_serv *serv)
{
struct svc_rqst *rqstp;
-
- spin_lock_bh(&serv->sv_lock);
- if (!list_empty(&serv->sv_threads)) {
- rqstp = list_entry(serv->sv_threads.next,
- struct svc_rqst,
- rq_list);
- dprintk("svc: daemon %p woken up.\n", rqstp);
- /*
- svc_serv_dequeue(serv, rqstp);
- rqstp->rq_sock = NULL;
- */
- wake_up(&rqstp->rq_wait);
+ unsigned int i;
+ struct svc_pool *pool;
+
+ for (i = 0; i < serv->sv_nrpools; i++) {
+ pool = &serv->sv_pools[i];
+
+ spin_lock_bh(&pool->sp_lock);
+ if (!list_empty(&pool->sp_threads)) {
+ rqstp = list_entry(pool->sp_threads.next,
+ struct svc_rqst,
+ rq_list);
+ dprintk("svc: daemon %p woken up.\n", rqstp);
+ /*
+ svc_thread_dequeue(pool, rqstp);
+ rqstp->rq_sock = NULL;
+ */
+ wake_up(&rqstp->rq_wait);
+ }
+ spin_unlock_bh(&pool->sp_lock);
}
- spin_unlock_bh(&serv->sv_lock);
}
/*
@@ -429,6 +453,51 @@ out:
}
/*
+ * Report socket names for nfsdfs
+ */
+static int one_sock_name(char *buf, struct svc_sock *svsk)
+{
+ int len;
+
+ switch(svsk->sk_sk->sk_family) {
+ case AF_INET:
+ len = sprintf(buf, "ipv4 %s %u.%u.%u.%u %d\n",
+ svsk->sk_sk->sk_protocol==IPPROTO_UDP?
+ "udp" : "tcp",
+ NIPQUAD(inet_sk(svsk->sk_sk)->rcv_saddr),
+ inet_sk(svsk->sk_sk)->num);
+ break;
+ default:
+ len = sprintf(buf, "*unknown-%d*\n",
+ svsk->sk_sk->sk_family);
+ }
+ return len;
+}
+
+int
+svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
+{
+ struct svc_sock *svsk, *closesk = NULL;
+ int len = 0;
+
+ if (!serv)
+ return 0;
+ spin_lock(&serv->sv_lock);
+ list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) {
+ int onelen = one_sock_name(buf+len, svsk);
+ if (toclose && strcmp(toclose, buf+len) == 0)
+ closesk = svsk;
+ else
+ len += onelen;
+ }
+ spin_unlock(&serv->sv_lock);
+ if (closesk)
+ svc_delete_socket(closesk);
+ return len;
+}
+EXPORT_SYMBOL(svc_sock_names);
+
+/*
* Check input queue length
*/
static int
@@ -557,7 +626,10 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
/* udp sockets need large rcvbuf as all pending
* requests are still in that buffer. sndbuf must
* also be large enough that there is enough space
- * for one reply per thread.
+ * for one reply per thread. We count all threads
+ * rather than threads in a particular pool, which
+ * provides an upper bound on the number of threads
+ * which will access the socket.
*/
svc_sock_setbufsize(svsk->sk_sock,
(serv->sv_nrthreads+3) * serv->sv_bufsz,
@@ -844,7 +916,7 @@ svc_tcp_accept(struct svc_sock *svsk)
struct svc_sock,
sk_list);
set_bit(SK_CLOSE, &svsk->sk_flags);
- svsk->sk_inuse ++;
+ atomic_inc(&svsk->sk_inuse);
}
spin_unlock_bh(&serv->sv_lock);
@@ -902,6 +974,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
/* sndbuf needs to have room for one request
* per thread, otherwise we can stall even when the
* network isn't a bottleneck.
+ *
+ * We count all threads rather than threads in a
+ * particular pool, which provides an upper bound
+ * on the number of threads which will access the socket.
+ *
* rcvbuf just needs to be able to hold a few requests.
* Normally they will be removed from the queue
* as soon a a complete request arrives.
@@ -1117,12 +1194,16 @@ svc_sock_update_bufs(struct svc_serv *serv)
}
/*
- * Receive the next request on any socket.
+ * Receive the next request on any socket. This code is carefully
+ * organised not to touch any cachelines in the shared svc_serv
+ * structure, only cachelines in the local svc_pool.
*/
int
-svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
+svc_recv(struct svc_rqst *rqstp, long timeout)
{
struct svc_sock *svsk =NULL;
+ struct svc_serv *serv = rqstp->rq_server;
+ struct svc_pool *pool = rqstp->rq_pool;
int len;
int pages;
struct xdr_buf *arg;
@@ -1172,32 +1253,15 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
if (signalled())
return -EINTR;
- spin_lock_bh(&serv->sv_lock);
- if (!list_empty(&serv->sv_tempsocks)) {
- svsk = list_entry(serv->sv_tempsocks.next,
- struct svc_sock, sk_list);
- /* apparently the "standard" is that clients close
- * idle connections after 5 minutes, servers after
- * 6 minutes
- * http://www.connectathon.org/talks96/nfstcp.pdf
- */
- if (get_seconds() - svsk->sk_lastrecv < 6*60
- || test_bit(SK_BUSY, &svsk->sk_flags))
- svsk = NULL;
- }
- if (svsk) {
- set_bit(SK_BUSY, &svsk->sk_flags);
- set_bit(SK_CLOSE, &svsk->sk_flags);
- rqstp->rq_sock = svsk;
- svsk->sk_inuse++;
- } else if ((svsk = svc_sock_dequeue(serv)) != NULL) {
+ spin_lock_bh(&pool->sp_lock);
+ if ((svsk = svc_sock_dequeue(pool)) != NULL) {
rqstp->rq_sock = svsk;
- svsk->sk_inuse++;
+ atomic_inc(&svsk->sk_inuse);
rqstp->rq_reserved = serv->sv_bufsz;
- svsk->sk_reserved += rqstp->rq_reserved;
+ atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
} else {
/* No data pending. Go to sleep */
- svc_serv_enqueue(serv, rqstp);
+ svc_thread_enqueue(pool, rqstp);
/*
* We have to be able to interrupt this wait
@@ -1205,26 +1269,26 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
*/
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&rqstp->rq_wait, &wait);
- spin_unlock_bh(&serv->sv_lock);
+ spin_unlock_bh(&pool->sp_lock);
schedule_timeout(timeout);
try_to_freeze();
- spin_lock_bh(&serv->sv_lock);
+ spin_lock_bh(&pool->sp_lock);
remove_wait_queue(&rqstp->rq_wait, &wait);
if (!(svsk = rqstp->rq_sock)) {
- svc_serv_dequeue(serv, rqstp);
- spin_unlock_bh(&serv->sv_lock);
+ svc_thread_dequeue(pool, rqstp);
+ spin_unlock_bh(&pool->sp_lock);
dprintk("svc: server %p, no data yet\n", rqstp);
return signalled()? -EINTR : -EAGAIN;
}
}
- spin_unlock_bh(&serv->sv_lock);
+ spin_unlock_bh(&pool->sp_lock);
- dprintk("svc: server %p, socket %p, inuse=%d\n",
- rqstp, svsk, svsk->sk_inuse);
+ dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
+ rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
len = svsk->sk_recvfrom(rqstp);
dprintk("svc: got len=%d\n", len);
@@ -1235,13 +1299,7 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
return -EAGAIN;
}
svsk->sk_lastrecv = get_seconds();
- if (test_bit(SK_TEMP, &svsk->sk_flags)) {
- /* push active sockets to end of list */
- spin_lock_bh(&serv->sv_lock);
- if (!list_empty(&svsk->sk_list))
- list_move_tail(&svsk->sk_list, &serv->sv_tempsocks);
- spin_unlock_bh(&serv->sv_lock);
- }
+ clear_bit(SK_OLD, &svsk->sk_flags);
rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024;
rqstp->rq_chandle.defer = svc_defer;
@@ -1301,6 +1359,58 @@ svc_send(struct svc_rqst *rqstp)
}
/*
+ * Timer function to close old temporary sockets, using
+ * a mark-and-sweep algorithm.
+ */
+static void
+svc_age_temp_sockets(unsigned long closure)
+{
+ struct svc_serv *serv = (struct svc_serv *)closure;
+ struct svc_sock *svsk;
+ struct list_head *le, *next;
+ LIST_HEAD(to_be_aged);
+
+ dprintk("svc_age_temp_sockets\n");
+
+ if (!spin_trylock_bh(&serv->sv_lock)) {
+ /* busy, try again 1 sec later */
+ dprintk("svc_age_temp_sockets: busy\n");
+ mod_timer(&serv->sv_temptimer, jiffies + HZ);
+ return;
+ }
+
+ list_for_each_safe(le, next, &serv->sv_tempsocks) {
+ svsk = list_entry(le, struct svc_sock, sk_list);
+
+ if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
+ continue;
+ if (atomic_read(&svsk->sk_inuse) || test_bit(SK_BUSY, &svsk->sk_flags))
+ continue;
+ atomic_inc(&svsk->sk_inuse);
+ list_move(le, &to_be_aged);
+ set_bit(SK_CLOSE, &svsk->sk_flags);
+ set_bit(SK_DETACHED, &svsk->sk_flags);
+ }
+ spin_unlock_bh(&serv->sv_lock);
+
+ while (!list_empty(&to_be_aged)) {
+ le = to_be_aged.next;
+ /* fiddling the sk_list node is safe 'cos we're SK_DETACHED */
+ list_del_init(le);
+ svsk = list_entry(le, struct svc_sock, sk_list);
+
+ dprintk("queuing svsk %p for closing, %lu seconds old\n",
+ svsk, get_seconds() - svsk->sk_lastrecv);
+
+ /* a thread will dequeue and close it soon */
+ svc_sock_enqueue(svsk);
+ svc_sock_put(svsk);
+ }
+
+ mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
+}
+
+/*
* Initialize socket for RPC use and create svc_sock struct
* XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
*/
@@ -1337,7 +1447,9 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
svsk->sk_odata = inet->sk_data_ready;
svsk->sk_owspace = inet->sk_write_space;
svsk->sk_server = serv;
+ atomic_set(&svsk->sk_inuse, 0);
svsk->sk_lastrecv = get_seconds();
+ spin_lock_init(&svsk->sk_defer_lock);
INIT_LIST_HEAD(&svsk->sk_deferred);
INIT_LIST_HEAD(&svsk->sk_ready);
mutex_init(&svsk->sk_mutex);
@@ -1353,6 +1465,13 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
set_bit(SK_TEMP, &svsk->sk_flags);
list_add(&svsk->sk_list, &serv->sv_tempsocks);
serv->sv_tmpcnt++;
+ if (serv->sv_temptimer.function == NULL) {
+ /* setup timer to age temp sockets */
+ setup_timer(&serv->sv_temptimer, svc_age_temp_sockets,
+ (unsigned long)serv);
+ mod_timer(&serv->sv_temptimer,
+ jiffies + svc_conn_age_period * HZ);
+ }
} else {
clear_bit(SK_TEMP, &svsk->sk_flags);
list_add(&svsk->sk_list, &serv->sv_permsocks);
@@ -1367,6 +1486,38 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
return svsk;
}
+int svc_addsock(struct svc_serv *serv,
+ int fd,
+ char *name_return,
+ int *proto)
+{
+ int err = 0;
+ struct socket *so = sockfd_lookup(fd, &err);
+ struct svc_sock *svsk = NULL;
+
+ if (!so)
+ return err;
+ if (so->sk->sk_family != AF_INET)
+ err = -EAFNOSUPPORT;
+ else if (so->sk->sk_protocol != IPPROTO_TCP &&
+ so->sk->sk_protocol != IPPROTO_UDP)
+ err = -EPROTONOSUPPORT;
+ else if (so->state > SS_UNCONNECTED)
+ err = -EISCONN;
+ else {
+ svsk = svc_setup_socket(serv, so, &err, 1);
+ if (svsk)
+ err = 0;
+ }
+ if (err) {
+ sockfd_put(so);
+ return err;
+ }
+ if (proto) *proto = so->sk->sk_protocol;
+ return one_sock_name(name_return, svsk);
+}
+EXPORT_SYMBOL_GPL(svc_addsock);
+
/*
* Create socket for RPC service.
*/
@@ -1434,15 +1585,25 @@ svc_delete_socket(struct svc_sock *svsk)
spin_lock_bh(&serv->sv_lock);
- list_del_init(&svsk->sk_list);
- list_del_init(&svsk->sk_ready);
+ if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags))
+ list_del_init(&svsk->sk_list);
+ /*
+ * We used to delete the svc_sock from whichever list
+ * it's sk_ready node was on, but we don't actually
+ * need to. This is because the only time we're called
+ * while still attached to a queue, the queue itself
+ * is about to be destroyed (in svc_destroy).
+ */
if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags))
if (test_bit(SK_TEMP, &svsk->sk_flags))
serv->sv_tmpcnt--;
- if (!svsk->sk_inuse) {
+ if (!atomic_read(&svsk->sk_inuse)) {
spin_unlock_bh(&serv->sv_lock);
- sock_release(svsk->sk_sock);
+ if (svsk->sk_sock->file)
+ sockfd_put(svsk->sk_sock);
+ else
+ sock_release(svsk->sk_sock);
kfree(svsk);
} else {
spin_unlock_bh(&serv->sv_lock);
@@ -1473,7 +1634,6 @@ svc_makesock(struct svc_serv *serv, int protocol, unsigned short port)
static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
{
struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle);
- struct svc_serv *serv = dreq->owner;
struct svc_sock *svsk;
if (too_many) {
@@ -1484,9 +1644,9 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
dprintk("revisit queued\n");
svsk = dr->svsk;
dr->svsk = NULL;
- spin_lock_bh(&serv->sv_lock);
+ spin_lock_bh(&svsk->sk_defer_lock);
list_add(&dr->handle.recent, &svsk->sk_deferred);
- spin_unlock_bh(&serv->sv_lock);
+ spin_unlock_bh(&svsk->sk_defer_lock);
set_bit(SK_DEFERRED, &svsk->sk_flags);
svc_sock_enqueue(svsk);
svc_sock_put(svsk);
@@ -1518,10 +1678,8 @@ svc_defer(struct cache_req *req)
dr->argslen = rqstp->rq_arg.len >> 2;
memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2);
}
- spin_lock_bh(&rqstp->rq_server->sv_lock);
- rqstp->rq_sock->sk_inuse++;
+ atomic_inc(&rqstp->rq_sock->sk_inuse);
dr->svsk = rqstp->rq_sock;
- spin_unlock_bh(&rqstp->rq_server->sv_lock);
dr->handle.revisit = svc_revisit;
return &dr->handle;
@@ -1548,11 +1706,10 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
{
struct svc_deferred_req *dr = NULL;
- struct svc_serv *serv = svsk->sk_server;
if (!test_bit(SK_DEFERRED, &svsk->sk_flags))
return NULL;
- spin_lock_bh(&serv->sv_lock);
+ spin_lock_bh(&svsk->sk_defer_lock);
clear_bit(SK_DEFERRED, &svsk->sk_flags);
if (!list_empty(&svsk->sk_deferred)) {
dr = list_entry(svsk->sk_deferred.next,
@@ -1561,6 +1718,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
list_del_init(&dr->handle.recent);
set_bit(SK_DEFERRED, &svsk->sk_flags);
}
- spin_unlock_bh(&serv->sv_lock);
+ spin_unlock_bh(&svsk->sk_defer_lock);
return dr;
}
diff --git a/sound/core/info_oss.c b/sound/core/info_oss.c
index 3ebc34919c7..a444bfe2cf7 100644
--- a/sound/core/info_oss.c
+++ b/sound/core/info_oss.c
@@ -96,11 +96,11 @@ static void snd_sndstat_proc_read(struct snd_info_entry *entry,
{
snd_iprintf(buffer, "Sound Driver:3.8.1a-980706 (ALSA v" CONFIG_SND_VERSION " emulation code)\n");
snd_iprintf(buffer, "Kernel: %s %s %s %s %s\n",
- system_utsname.sysname,
- system_utsname.nodename,
- system_utsname.release,
- system_utsname.version,
- system_utsname.machine);
+ init_utsname()->sysname,
+ init_utsname()->nodename,
+ init_utsname()->release,
+ init_utsname()->version,
+ init_utsname()->machine);
snd_iprintf(buffer, "Config options: 0\n");
snd_iprintf(buffer, "\nInstalled drivers: \n");
snd_iprintf(buffer, "Type 10: ALSA emulation\n");