diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-01-11 02:42:53 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-11 02:42:53 +0100 |
commit | 506c10f26c481b7f8ef27c1c79290f68989b2e9e (patch) | |
tree | 03de82e812f00957aa6276dac2fe51c3358e88d7 /init | |
parent | e1df957670aef74ffd9a4ad93e6d2c90bf6b4845 (diff) | |
parent | c59765042f53a79a7a65585042ff463b69cb248c (diff) |
Merge commit 'v2.6.29-rc1' into perfcounters/core
Conflicts:
include/linux/kernel_stat.h
Diffstat (limited to 'init')
-rw-r--r-- | init/Kconfig | 291 | ||||
-rw-r--r-- | init/do_mounts.c | 6 | ||||
-rw-r--r-- | init/do_mounts_md.c | 2 | ||||
-rw-r--r-- | init/do_mounts_rd.c | 14 | ||||
-rw-r--r-- | init/initramfs.c | 1 | ||||
-rw-r--r-- | init/main.c | 41 |
6 files changed, 239 insertions, 116 deletions
diff --git a/init/Kconfig b/init/Kconfig index c38ae71a5e1..a588cdc274b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -271,59 +271,6 @@ config LOG_BUF_SHIFT 13 => 8 KB 12 => 4 KB -config CGROUPS - bool "Control Group support" - help - This option will let you use process cgroup subsystems - such as Cpusets - - Say N if unsure. - -config CGROUP_DEBUG - bool "Example debug cgroup subsystem" - depends on CGROUPS - default n - help - This option enables a simple cgroup subsystem that - exports useful debugging information about the cgroups - framework - - Say N if unsure - -config CGROUP_NS - bool "Namespace cgroup subsystem" - depends on CGROUPS - help - Provides a simple namespace cgroup subsystem to - provide hierarchical naming of sets of namespaces, - for instance virtual servers and checkpoint/restart - jobs. - -config CGROUP_FREEZER - bool "control group freezer subsystem" - depends on CGROUPS - help - Provides a way to freeze and unfreeze all tasks in a - cgroup. - -config CGROUP_DEVICE - bool "Device controller for cgroups" - depends on CGROUPS && EXPERIMENTAL - help - Provides a cgroup implementing whitelists for devices which - a process in the cgroup can mknod or open. - -config CPUSETS - bool "Cpuset support" - depends on SMP && CGROUPS - help - This option will let you create and manage CPUSETs which - allow dynamically partitioning a system into sets of CPUs and - Memory Nodes and assigning tasks to run only within those sets. - This is primarily useful on large SMP or NUMA systems. - - Say N if unsure. - # # Architectures with an unreliable sched_clock() should select this: # @@ -337,6 +284,8 @@ config GROUP_SCHED help This feature lets CPU scheduler recognize task groups and control CPU bandwidth allocation to such task groups. + In order to create a group from arbitrary set of processes, use + CONFIG_CGROUPS. (See Control Group support.) config FAIR_GROUP_SCHED bool "Group scheduling for SCHED_OTHER" @@ -379,6 +328,66 @@ config CGROUP_SCHED endchoice +menu "Control Group support" +config CGROUPS + bool "Control Group support" + help + This option add support for grouping sets of processes together, for + use with process control subsystems such as Cpusets, CFS, memory + controls or device isolation. + See + - Documentation/cpusets.txt (Cpusets) + - Documentation/scheduler/sched-design-CFS.txt (CFS) + - Documentation/cgroups/ (features for grouping, isolation) + - Documentation/controllers/ (features for resource control) + + Say N if unsure. + +config CGROUP_DEBUG + bool "Example debug cgroup subsystem" + depends on CGROUPS + default n + help + This option enables a simple cgroup subsystem that + exports useful debugging information about the cgroups + framework + + Say N if unsure + +config CGROUP_NS + bool "Namespace cgroup subsystem" + depends on CGROUPS + help + Provides a simple namespace cgroup subsystem to + provide hierarchical naming of sets of namespaces, + for instance virtual servers and checkpoint/restart + jobs. + +config CGROUP_FREEZER + bool "control group freezer subsystem" + depends on CGROUPS + help + Provides a way to freeze and unfreeze all tasks in a + cgroup. + +config CGROUP_DEVICE + bool "Device controller for cgroups" + depends on CGROUPS && EXPERIMENTAL + help + Provides a cgroup implementing whitelists for devices which + a process in the cgroup can mknod or open. + +config CPUSETS + bool "Cpuset support" + depends on SMP && CGROUPS + help + This option will let you create and manage CPUSETs which + allow dynamically partitioning a system into sets of CPUs and + Memory Nodes and assigning tasks to run only within those sets. + This is primarily useful on large SMP or NUMA systems. + + Say N if unsure. + config CGROUP_CPUACCT bool "Simple CPU accounting cgroup subsystem" depends on CGROUPS @@ -393,9 +402,6 @@ config RESOURCE_COUNTERS infrastructure that works with cgroups depends on CGROUPS -config MM_OWNER - bool - config CGROUP_MEM_RES_CTLR bool "Memory Resource Controller for Control Groups" depends on CGROUPS && RESOURCE_COUNTERS @@ -414,36 +420,68 @@ config CGROUP_MEM_RES_CTLR sure you need the memory resource controller. Even when you enable this, you can set "cgroup_disable=memory" at your boot option to disable memory resource controller and you can avoid overheads. - (and lose benefits of memory resource contoller) + (and lose benefits of memory resource controller) This config option also selects MM_OWNER config option, which could in turn add some fork/exit overhead. +config MM_OWNER + bool + +config CGROUP_MEM_RES_CTLR_SWAP + bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)" + depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL + help + Add swap management feature to memory resource controller. When you + enable this, you can limit mem+swap usage per cgroup. In other words, + when you disable this, memory resource controller has no cares to + usage of swap...a process can exhaust all of the swap. This extension + is useful when you want to avoid exhaustion swap but this itself + adds more overheads and consumes memory for remembering information. + Especially if you use 32bit system or small memory system, please + be careful about enabling this. When memory resource controller + is disabled by boot option, this will be automatically disabled and + there will be no overhead from this. Even when you set this config=y, + if boot option "noswapaccount" is set, swap will not be accounted. + + +endmenu + config SYSFS_DEPRECATED bool config SYSFS_DEPRECATED_V2 - bool "Create deprecated sysfs files" + bool "Create deprecated sysfs layout for older userspace tools" depends on SYSFS default y select SYSFS_DEPRECATED help - This option creates deprecated symlinks such as the - "device"-link, the <subsystem>:<name>-link, and the - "bus"-link. It may also add deprecated key in the - uevent environment. - None of these features or values should be used today, as - they export driver core implementation details to userspace - or export properties which can't be kept stable across kernel - releases. - - If enabled, this option will also move any device structures - that belong to a class, back into the /sys/class hierarchy, in - order to support older versions of udev and some userspace - programs. - - If you are using a distro with the most recent userspace - packages, it should be safe to say N here. + This option switches the layout of sysfs to the deprecated + version. + + The current sysfs layout features a unified device tree at + /sys/devices/, which is able to express a hierarchy between + class devices. If the deprecated option is set to Y, the + unified device tree is split into a bus device tree at + /sys/devices/ and several individual class device trees at + /sys/class/. The class and bus devices will be connected by + "<subsystem>:<name>" and the "device" links. The "block" + class devices, will not show up in /sys/class/block/. Some + subsystems will suppress the creation of some devices which + depend on the unified device tree. + + This option is not a pure compatibility option that can + be safely enabled on newer distributions. It will change the + layout of sysfs to the non-extensible deprecated version, + and disable some features, which can not be exported without + confusing older userspace tools. Since 2007/2008 all major + distributions do not enable this option, and ship no tools which + depend on the deprecated layout or this option. + + If you are using a new kernel on an older distribution, or use + older userspace tools, you might need to say Y here. Do not say Y, + if the original kernel, that came with your distribution, has + this option set to N. config PROC_PID_CPUSET bool "Include legacy /proc/<pid>/cpuset file" @@ -868,10 +906,6 @@ config RT_MUTEXES boolean select PLIST -config TINY_SHMEM - default !SHMEM - bool - config BASE_SMALL int default 0 if BASE_FULL @@ -946,14 +980,17 @@ config MODULE_SRCVERSION_ALL the version). With this option, such a "srcversion" field will be created for all modules. If unsure, say N. -config KMOD - def_bool y - help - This is being removed soon. These days, CONFIG_MODULES - implies CONFIG_KMOD, so use that instead. - endif # MODULES +config INIT_ALL_POSSIBLE + bool + help + Back when each arch used to define their own cpu_online_map and + cpu_possible_map, some of them chose to initialize cpu_possible_map + with all 1s, and others with all 0s. When they were centralised, + it was better to provide this option than to break all the archs + and have several arch maintainers persuing me down dark alleys. + config STOP_MACHINE bool default y @@ -966,10 +1003,90 @@ source "block/Kconfig" config PREEMPT_NOTIFIERS bool +choice + prompt "RCU Implementation" + default CLASSIC_RCU + config CLASSIC_RCU - def_bool !PREEMPT_RCU + bool "Classic RCU" help This option selects the classic RCU implementation that is designed for best read-side performance on non-realtime - systems. Classic RCU is the default. Note that the - PREEMPT_RCU symbol is used to select/deselect this option. + systems. + + Select this option if you are unsure. + +config TREE_RCU + bool "Tree-based hierarchical RCU" + help + This option selects the RCU implementation that is + designed for very large SMP system with hundreds or + thousands of CPUs. + +config PREEMPT_RCU + bool "Preemptible RCU" + depends on PREEMPT + help + This option reduces the latency of the kernel by making certain + RCU sections preemptible. Normally RCU code is non-preemptible, if + this option is selected then read-only RCU sections become + preemptible. This helps latency, but may expose bugs due to + now-naive assumptions about each RCU read-side critical section + remaining on a given CPU through its execution. + +endchoice + +config RCU_TRACE + bool "Enable tracing for RCU" + depends on TREE_RCU || PREEMPT_RCU + help + This option provides tracing in RCU which presents stats + in debugfs for debugging RCU implementation. + + Say Y here if you want to enable RCU tracing + Say N if you are unsure. + +config RCU_FANOUT + int "Tree-based hierarchical RCU fanout value" + range 2 64 if 64BIT + range 2 32 if !64BIT + depends on TREE_RCU + default 64 if 64BIT + default 32 if !64BIT + help + This option controls the fanout of hierarchical implementations + of RCU, allowing RCU to work efficiently on machines with + large numbers of CPUs. This value must be at least the cube + root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit + systems and up to 262,144 for 64-bit systems. + + Select a specific number if testing RCU itself. + Take the default if unsure. + +config RCU_FANOUT_EXACT + bool "Disable tree-based hierarchical RCU auto-balancing" + depends on TREE_RCU + default n + help + This option forces use of the exact RCU_FANOUT value specified, + regardless of imbalances in the hierarchy. This is useful for + testing RCU itself, and might one day be useful on systems with + strong NUMA behavior. + + Without RCU_FANOUT_EXACT, the code will balance the hierarchy. + + Say N if unsure. + +config TREE_RCU_TRACE + def_bool RCU_TRACE && TREE_RCU + select DEBUG_FS + help + This option provides tracing for the TREE_RCU implementation, + permitting Makefile to trivially select kernel/rcutree_trace.c. + +config PREEMPT_RCU_TRACE + def_bool RCU_TRACE && PREEMPT_RCU + select DEBUG_FS + help + This option provides tracing for the PREEMPT_RCU implementation, + permitting Makefile to trivially select kernel/rcupreempt_trace.c. diff --git a/init/do_mounts.c b/init/do_mounts.c index d055b1914c3..708105e163d 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/fs.h> #include <linux/initrd.h> +#include <linux/async.h> #include <linux/nfs_fs.h> #include <linux/nfs_fs_sb.h> @@ -220,10 +221,10 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data) sys_chdir("/root"); ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev; - printk("VFS: Mounted root (%s filesystem)%s.\n", + printk("VFS: Mounted root (%s filesystem)%s on device %u:%u.\n", current->fs->pwd.mnt->mnt_sb->s_type->name, current->fs->pwd.mnt->mnt_sb->s_flags & MS_RDONLY ? - " readonly" : ""); + " readonly" : "", MAJOR(ROOT_DEV), MINOR(ROOT_DEV)); return 0; } @@ -372,6 +373,7 @@ void __init prepare_namespace(void) /* wait for the known devices to complete their probing */ while (driver_probe_done() != 0) msleep(100); + async_synchronize_full(); md_run_setup(); diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index d6da5cdd3c3..ff95e319288 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -271,7 +271,7 @@ static int __init raid_setup(char *str) __setup("raid=", raid_setup); __setup("md=", md_setup); -static void autodetect_raid(void) +static void __init autodetect_raid(void) { int fd; diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index a7c748fa977..0f0f0cf3ba9 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -9,6 +9,7 @@ #include <linux/string.h> #include "do_mounts.h" +#include "../fs/squashfs/squashfs_fs.h" int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */ @@ -41,6 +42,7 @@ static int __init crd_load(int in_fd, int out_fd); * ext2 * romfs * cramfs + * squashfs * gzip */ static int __init @@ -51,6 +53,7 @@ identify_ramdisk_image(int fd, int start_block) struct ext2_super_block *ext2sb; struct romfs_super_block *romfsb; struct cramfs_super *cramfsb; + struct squashfs_super_block *squashfsb; int nblocks = -1; unsigned char *buf; @@ -62,6 +65,7 @@ identify_ramdisk_image(int fd, int start_block) ext2sb = (struct ext2_super_block *) buf; romfsb = (struct romfs_super_block *) buf; cramfsb = (struct cramfs_super *) buf; + squashfsb = (struct squashfs_super_block *) buf; memset(buf, 0xe5, size); /* @@ -99,6 +103,16 @@ identify_ramdisk_image(int fd, int start_block) goto done; } + /* squashfs is at block zero too */ + if (le32_to_cpu(squashfsb->s_magic) == SQUASHFS_MAGIC) { + printk(KERN_NOTICE + "RAMDISK: squashfs filesystem found at block %d\n", + start_block); + nblocks = (le64_to_cpu(squashfsb->bytes_used) + BLOCK_SIZE - 1) + >> BLOCK_SIZE_BITS; + goto done; + } + /* * Read block 1 to test for minix and ext2 superblock */ diff --git a/init/initramfs.c b/init/initramfs.c index 4f5ba75aaa7..d9c941c0c3c 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -317,6 +317,7 @@ static int __init do_name(void) if (wfd >= 0) { sys_fchown(wfd, uid, gid); sys_fchmod(wfd, mode); + sys_ftruncate(wfd, body_len); vcollected = kstrdup(collected, GFP_KERNEL); state = CopyFile; } diff --git a/init/main.c b/init/main.c index 17e9757bfde..844209453c0 100644 --- a/init/main.c +++ b/init/main.c @@ -50,7 +50,6 @@ #include <linux/rmap.h> #include <linux/mempolicy.h> #include <linux/key.h> -#include <linux/unwind.h> #include <linux/buffer_head.h> #include <linux/page_cgroup.h> #include <linux/debug_locks.h> @@ -63,6 +62,7 @@ #include <linux/signal.h> #include <linux/idr.h> #include <linux/ftrace.h> +#include <linux/async.h> #include <trace/boot.h> #include <asm/io.h> @@ -75,15 +75,6 @@ #include <asm/smp.h> #endif -/* - * This is one of the first .c files built. Error out early if we have compiler - * trouble. - */ - -#if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0 -#warning gcc-4.1.0 is known to miscompile the kernel. A different compiler version is recommended. -#endif - static int kernel_init(void *); extern void init_IRQ(void); @@ -117,7 +108,7 @@ EXPORT_SYMBOL(system_state); extern void time_init(void); /* Default late time init is NULL. archs can override this later. */ -void (*late_time_init)(void); +void (*__initdata late_time_init)(void); extern void softirq_init(void); /* Untouched command line saved by arch-specific code. */ @@ -380,12 +371,7 @@ EXPORT_SYMBOL(nr_cpu_ids); /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */ static void __init setup_nr_cpu_ids(void) { - int cpu, highest_cpu = 0; - - for_each_possible_cpu(cpu) - highest_cpu = cpu; - - nr_cpu_ids = highest_cpu + 1; + nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; } #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA @@ -461,7 +447,7 @@ static void __init setup_command_line(char *command_line) * gcc-3.4 accidentally inlines this function, so use noinline. */ -static void noinline __init_refok rest_init(void) +static noinline void __init_refok rest_init(void) __releases(kernel_lock) { int pid; @@ -527,9 +513,9 @@ static void __init boot_cpu_init(void) { int cpu = smp_processor_id(); /* Mark the boot cpu "present", "online" etc for SMP and UP case */ - cpu_set(cpu, cpu_online_map); - cpu_set(cpu, cpu_present_map); - cpu_set(cpu, cpu_possible_map); + set_cpu_online(cpu, true); + set_cpu_present(cpu, true); + set_cpu_possible(cpu, true); } void __init __weak smp_setup_processor_id(void) @@ -551,7 +537,6 @@ asmlinkage void __init start_kernel(void) * Need to run as early as possible, to initialize the * lockdep hash: */ - unwind_init(); lockdep_init(); debug_objects_early_init(); cgroup_init_early(); @@ -573,7 +558,6 @@ asmlinkage void __init start_kernel(void) setup_arch(&command_line); mm_init_owner(&init_mm, &init_task); setup_command_line(command_line); - unwind_setup(); setup_per_cpu_areas(); setup_nr_cpu_ids(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ @@ -604,6 +588,8 @@ asmlinkage void __init start_kernel(void) sort_main_extable(); trap_init(); rcu_init(); + /* init some links before init_ISA_irqs() */ + early_irq_init(); init_IRQ(); pidhash_init(); init_timers(); @@ -614,7 +600,8 @@ asmlinkage void __init start_kernel(void) sched_clock_init(); profile_init(); if (!irqs_disabled()) - printk("start_kernel(): bug: interrupts were enabled early\n"); + printk(KERN_CRIT "start_kernel(): bug: interrupts were " + "enabled early\n"); early_boot_irqs_on(); local_irq_enable(); @@ -699,7 +686,7 @@ asmlinkage void __init start_kernel(void) rest_init(); } -static int initcall_debug; +int initcall_debug; core_param(initcall_debug, initcall_debug, bool, 0644); int do_one_initcall(initcall_t fn) @@ -798,8 +785,10 @@ static void run_init_process(char *init_filename) /* This is a non __init function. Force it to be noinline otherwise gcc * makes it inline to init() and it becomes part of init.text section */ -static int noinline init_post(void) +static noinline int init_post(void) { + /* need to finish all async __init code before freeing the memory */ + async_synchronize_full(); free_initmem(); unlock_kernel(); mark_rodata_ro(); |