diff options
36 files changed, 1328 insertions, 254 deletions
diff --git a/Documentation/cpu-freq/cpufreq-stats.txt b/Documentation/cpu-freq/cpufreq-stats.txt new file mode 100644 index 00000000000..e2d1e760b4b --- /dev/null +++ b/Documentation/cpu-freq/cpufreq-stats.txt @@ -0,0 +1,128 @@ + + CPU frequency and voltage scaling statictics in the Linux(TM) kernel + + + L i n u x c p u f r e q - s t a t s d r i v e r + + - information for users - + + + Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + +Contents +1. Introduction +2. Statistics Provided (with example) +3. Configuring cpufreq-stats + + +1. Introduction + +cpufreq-stats is a driver that provices CPU frequency statistics for each CPU. +This statistics is provided in /sysfs as a bunch of read_only interfaces. This +interface (when configured) will appear in a seperate directory under cpufreq +in /sysfs (<sysfs root>/devices/system/cpu/cpuX/cpufreq/stats/) for each CPU. +Various statistics will form read_only files under this directory. + +This driver is designed to be independent of any particular cpufreq_driver +that may be running on your CPU. So, it will work with any cpufreq_driver. + + +2. Statistics Provided (with example) + +cpufreq stats provides following statistics (explained in detail below). +- time_in_state +- total_trans +- trans_table + +All the statistics will be from the time the stats driver has been inserted +to the time when a read of a particular statistic is done. Obviously, stats +driver will not have any information about the the frequcny transitions before +the stats driver insertion. + +-------------------------------------------------------------------------------- +<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # ls -l +total 0 +drwxr-xr-x 2 root root 0 May 14 16:06 . +drwxr-xr-x 3 root root 0 May 14 15:58 .. +-r--r--r-- 1 root root 4096 May 14 16:06 time_in_state +-r--r--r-- 1 root root 4096 May 14 16:06 total_trans +-r--r--r-- 1 root root 4096 May 14 16:06 trans_table +-------------------------------------------------------------------------------- + +- time_in_state +This gives the amount of time spent in each of the frequencies supported by +this CPU. The cat output will have "<frequency> <time>" pair in each line, which +will mean this CPU spent <time> usertime units of time at <frequency>. Output +will have one line for each of the supported freuencies. usertime units here +is 10mS (similar to other time exported in /proc). + +-------------------------------------------------------------------------------- +<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat time_in_state +3600000 2089 +3400000 136 +3200000 34 +3000000 67 +2800000 172488 +-------------------------------------------------------------------------------- + + +- total_trans +This gives the total number of frequency transitions on this CPU. The cat +output will have a single count which is the total number of frequency +transitions. + +-------------------------------------------------------------------------------- +<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat total_trans +20 +-------------------------------------------------------------------------------- + +- trans_table +This will give a fine grained information about all the CPU frequency +transitions. The cat output here is a two dimensional matrix, where an entry +<i,j> (row i, column j) represents the count of number of transitions from +Freq_i to Freq_j. Freq_i is in descending order with increasing rows and +Freq_j is in descending order with increasing columns. The output here also +contains the actual freq values for each row and column for better readability. + +-------------------------------------------------------------------------------- +<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat trans_table + From : To + : 3600000 3400000 3200000 3000000 2800000 + 3600000: 0 5 0 0 0 + 3400000: 4 0 2 0 0 + 3200000: 0 1 0 2 0 + 3000000: 0 0 1 0 3 + 2800000: 0 0 0 2 0 +-------------------------------------------------------------------------------- + + +3. Configuring cpufreq-stats + +To configure cpufreq-stats in your kernel +Config Main Menu + Power management options (ACPI, APM) ---> + CPU Frequency scaling ---> + [*] CPU Frequency scaling + <*> CPU frequency translation statistics + [*] CPU frequency translation statistics details + + +"CPU Frequency scaling" (CONFIG_CPU_FREQ) should be enabled to configure +cpufreq-stats. + +"CPU frequency translation statistics" (CONFIG_CPU_FREQ_STAT) provides the +basic statistics which includes time_in_state and total_trans. + +"CPU frequency translation statistics details" (CONFIG_CPU_FREQ_STAT_DETAILS) +provides fine grained cpufreq stats by trans_table. The reason for having a +seperate config option for trans_table is: +- trans_table goes against the traditional /sysfs rule of one value per + interface. It provides a whole bunch of value in a 2 dimensional matrix + form. + +Once these two options are enabled and your CPU supports cpufrequency, you +will be able to see the CPU frequency statistics in /sysfs. + + + + diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 134aec1c6d1..b5f83e9f04d 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -54,7 +54,7 @@ asmlinkage void ret_from_fork(void); void default_idle(void) { while(1) { - if (need_resched()) { + if (!need_resched()) { local_irq_enable(); __asm__("sleep"); local_irq_disable(); diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index f25ffd74235..0f1eb507233 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -23,7 +23,7 @@ config X86_ACPI_CPUFREQ If in doubt, say N. config ELAN_CPUFREQ - tristate "AMD Elan" + tristate "AMD Elan SC400 and SC410" select CPU_FREQ_TABLE depends on X86_ELAN ---help--- @@ -38,6 +38,18 @@ config ELAN_CPUFREQ If in doubt, say N. +config SC520_CPUFREQ + tristate "AMD Elan SC520" + select CPU_FREQ_TABLE + depends on X86_ELAN + ---help--- + This adds the CPUFreq driver for AMD Elan SC520 processor. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say N. + + config X86_POWERNOW_K6 tristate "AMD Mobile K6-2/K6-3 PowerNow!" select CPU_FREQ_TABLE diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile index a922e97aeed..2e894f1c891 100644 --- a/arch/i386/kernel/cpu/cpufreq/Makefile +++ b/arch/i386/kernel/cpu/cpufreq/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_LONGHAUL) += longhaul.o obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o +obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o obj-$(CONFIG_X86_LONGRUN) += longrun.o obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index ab0f9f5aac1..04e3563da4f 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -29,6 +29,7 @@ #include <linux/cpufreq.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/pci.h> #include <asm/msr.h> #include <asm/timex.h> @@ -119,7 +120,13 @@ static int longhaul_get_cpu_mult(void) static void do_powersaver(union msr_longhaul *longhaul, unsigned int clock_ratio_index) { + struct pci_dev *dev; + unsigned long flags; + unsigned int tmp_mask; int version; + int i; + u16 pci_cmd; + u16 cmd_state[64]; switch (cpu_model) { case CPU_EZRA_T: @@ -137,17 +144,58 @@ static void do_powersaver(union msr_longhaul *longhaul, longhaul->bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; longhaul->bits.EnableSoftBusRatio = 1; longhaul->bits.RevisionKey = 0; - local_irq_disable(); - wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); + + preempt_disable(); + local_irq_save(flags); + + /* + * get current pci bus master state for all devices + * and clear bus master bit + */ + dev = NULL; + i = 0; + do { + dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); + if (dev != NULL) { + pci_read_config_word(dev, PCI_COMMAND, &pci_cmd); + cmd_state[i++] = pci_cmd; + pci_cmd &= ~PCI_COMMAND_MASTER; + pci_write_config_word(dev, PCI_COMMAND, pci_cmd); + } + } while (dev != NULL); + + tmp_mask=inb(0x21); /* works on C3. save mask. */ + outb(0xFE,0x21); /* TMR0 only */ + outb(0xFF,0x80); /* delay */ + local_irq_enable(); + + __hlt(); + wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); __hlt(); + local_irq_disable(); + + outb(tmp_mask,0x21); /* restore mask */ + + /* restore pci bus master state for all devices */ + dev = NULL; + i = 0; + do { + dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); + if (dev != NULL) { + pci_cmd = cmd_state[i++]; + pci_write_config_byte(dev, PCI_COMMAND, pci_cmd); + } + } while (dev != NULL); + local_irq_restore(flags); + preempt_enable(); + + /* disable bus ratio bit */ rdmsrl(MSR_VIA_LONGHAUL, longhaul->val); longhaul->bits.EnableSoftBusRatio = 0; longhaul->bits.RevisionKey = version; - local_irq_disable(); wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); - local_irq_enable(); } /** @@ -578,7 +626,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) longhaul_setup_voltagescaling(); policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cpuinfo.transition_latency = 200000; /* nsec */ policy->cur = calc_speed(longhaul_get_cpu_mult()); ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c index 913f652623d..5c530064eb7 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c @@ -23,6 +23,7 @@ #include <linux/dmi.h> #include <asm/msr.h> +#include <asm/timer.h> #include <asm/timex.h> #include <asm/io.h> #include <asm/system.h> @@ -586,13 +587,17 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); - /* A K7 with powernow technology is set to max frequency by BIOS */ - fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.MFID]; + /* recalibrate cpu_khz */ + result = recalibrate_cpu_khz(); + if (result) + return result; + + fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; if (!fsb) { printk(KERN_WARNING PFX "can not determine bus frequency\n"); return -EINVAL; } - dprintk("FSB: %3d.%03d MHz\n", fsb/1000, fsb%1000); + dprintk("FSB: %3dMHz\n", fsb/1000); if (dmi_check_system(powernow_dmi_table) || acpi_force) { printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index a65ff7e32e5..10cc096c0ad 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -4,7 +4,7 @@ * GNU general public license version 2. See "COPYING" or * http://www.gnu.org/licenses/gpl.html * - * Support : paul.devriendt@amd.com + * Support : mark.langsdorf@amd.com * * Based on the powernow-k7.c module written by Dave Jones. * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs @@ -15,12 +15,13 @@ * * Valuable input gratefully received from Dave Jones, Pavel Machek, * Dominik Brodowski, and others. + * Originally developed by Paul Devriendt. * Processor information obtained from Chapter 9 (Power and Thermal Management) * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD * Opteron Processors" available for download from www.amd.com * * Tables for specific CPUs can be infrerred from - * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf + * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf */ #include <linux/kernel.h> @@ -30,6 +31,7 @@ #include <linux/cpufreq.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/cpumask.h> #include <asm/msr.h> #include <asm/io.h> @@ -42,7 +44,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.00.09e" +#define VERSION "version 1.40.2" #include "powernow-k8.h" /* serialize freq changes */ @@ -50,6 +52,10 @@ static DECLARE_MUTEX(fidvid_sem); static struct powernow_k8_data *powernow_data[NR_CPUS]; +#ifndef CONFIG_SMP +static cpumask_t cpu_core_map[1]; +#endif + /* Return a frequency in MHz, given an input fid */ static u32 find_freq_from_fid(u32 fid) { @@ -274,11 +280,18 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid { u32 rvosteps = data->rvo; u32 savefid = data->currfid; + u32 maxvid, lo; dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", smp_processor_id(), data->currfid, data->currvid, reqvid, data->rvo); + rdmsr(MSR_FIDVID_STATUS, lo, maxvid); + maxvid = 0x1f & (maxvid >> 16); + dprintk("ph1 maxvid=0x%x\n", maxvid); + if (reqvid < maxvid) /* lower numbers are higher voltages */ + reqvid = maxvid; + while (data->currvid > reqvid) { dprintk("ph1: curr 0x%x, req vid 0x%x\n", data->currvid, reqvid); @@ -286,8 +299,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid return 1; } - while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { - if (data->currvid == 0) { + while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { + if (data->currvid == maxvid) { rvosteps = 0; } else { dprintk("ph1: changing vid for rvo, req 0x%x\n", @@ -671,7 +684,7 @@ static int find_psb_table(struct powernow_k8_data *data) * BIOS and Kernel Developer's Guide, which is available on * www.amd.com */ - printk(KERN_ERR PFX "BIOS error - no PSB\n"); + printk(KERN_INFO PFX "BIOS error - no PSB or ACPI _PSS objects\n"); return -ENODEV; } @@ -695,7 +708,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) struct cpufreq_frequency_table *powernow_table; if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { - dprintk("register performance failed\n"); + dprintk("register performance failed: bad ACPI data\n"); return -EIO; } @@ -746,22 +759,23 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) continue; } - if (fid < HI_FID_TABLE_BOTTOM) { - if (cntlofreq) { - /* if both entries are the same, ignore this - * one... - */ - if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || - (powernow_table[i].index != powernow_table[cntlofreq].index)) { - printk(KERN_ERR PFX "Too many lo freq table entries\n"); - goto err_out_mem; - } - - dprintk("double low frequency table entry, ignoring it.\n"); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } else - cntlofreq = i; + /* verify only 1 entry from the lo frequency table */ + if (fid < HI_FID_TABLE_BOTTOM) { + if (cntlofreq) { + /* if both entries are the same, ignore this + * one... + */ + if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || + (powernow_table[i].index != powernow_table[cntlofreq].index)) { + printk(KERN_ERR PFX "Too many lo freq table entries\n"); + goto err_out_mem; + } + + dprintk("double low frequency table entry, ignoring it.\n"); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } else + cntlofreq = i; } if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { @@ -816,7 +830,7 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde { u32 fid; u32 vid; - int res; + int res, i; struct cpufreq_freqs freqs; dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); @@ -841,7 +855,8 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde } if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { - printk("ignoring illegal change in lo freq table-%x to 0x%x\n", + printk(KERN_ERR PFX + "ignoring illegal change in lo freq table-%x to 0x%x\n", data->currfid, fid); return 1; } @@ -850,18 +865,20 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde smp_processor_id(), fid, vid); freqs.cpu = data->cpu; - freqs.old = find_khz_freq_from_fid(data->currfid); freqs.new = find_khz_freq_from_fid(fid); - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + for_each_cpu_mask(i, cpu_core_map[data->cpu]) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } - down(&fidvid_sem); res = transition_fid_vid(data, fid, vid); - up(&fidvid_sem); freqs.new = find_khz_freq_from_fid(data->currfid); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - + for_each_cpu_mask(i, cpu_core_map[data->cpu]) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } return res; } @@ -874,6 +891,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi u32 checkvid = data->currvid; unsigned int newstate; int ret = -EIO; + int i; /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; @@ -902,22 +920,41 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi data->currfid, data->currvid); if ((checkvid != data->currvid) || (checkfid != data->currfid)) { - printk(KERN_ERR PFX - "error - out of sync, fid 0x%x 0x%x, vid 0x%x 0x%x\n", - checkfid, data->currfid, checkvid, data->currvid); + printk(KERN_INFO PFX + "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", + checkfid, data->currfid, checkvid, data->currvid); } if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) goto err_out; + down(&fidvid_sem); + + for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { + /* make sure the sibling is initialized */ + if (!powernow_data[i]) { + ret = 0; + up(&fidvid_sem); + goto err_out; + } + } + powernow_k8_acpi_pst_values(data, newstate); if (transition_frequency(data, newstate)) { printk(KERN_ERR PFX "transition frequency failed\n"); ret = 1; + up(&fidvid_sem); goto err_out; } + /* Update all the fid/vids of our siblings */ + for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { + powernow_data[i]->currvid = data->currvid; + powernow_data[i]->currfid = data->currfid; + } + up(&fidvid_sem); + pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; @@ -962,7 +999,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) */ if ((num_online_cpus() != 1) || (num_possible_cpus() != 1)) { - printk(KERN_INFO PFX "MP systems not supported by PSB BIOS structure\n"); + printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); kfree(data); return -ENODEV; } @@ -1003,6 +1040,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) schedule(); pol->governor = CPUFREQ_DEFAULT_GOVERNOR; + pol->cpus = cpu_core_map[pol->cpu]; /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ @@ -1069,7 +1107,7 @@ static unsigned int powernowk8_get (unsigned int cpu) return 0; } preempt_disable(); - + if (query_current_values_with_pending_wait(data)) goto out; @@ -1127,9 +1165,10 @@ static void __exit powernowk8_exit(void) cpufreq_unregister_driver(&cpufreq_amd64_driver); } -MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com>"); +MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com."); MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); MODULE_LICENSE("GPL"); late_initcall(powernowk8_init); module_exit(powernowk8_exit); + diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index 63ebc8470f5..9ed5bf221cb 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -174,3 +174,18 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); + +#ifndef for_each_cpu_mask +#define for_each_cpu_mask(i,mask) for (i=0;i<1;i++) +#endif + +#ifdef CONFIG_SMP +static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) +{ +} +#else +static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) +{ + cpu_set(0, cpu_sharedcore_mask[0]); +} +#endif diff --git a/arch/i386/kernel/cpu/cpufreq/sc520_freq.c b/arch/i386/kernel/cpu/cpufreq/sc520_freq.c new file mode 100644 index 00000000000..ef457d50f4a --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/sc520_freq.c @@ -0,0 +1,186 @@ +/* + * sc520_freq.c: cpufreq driver for the AMD Elan sc520 + * + * Copyright (C) 2005 Sean Young <sean@mess.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on elanfreq.c + * + * 2005-03-30: - initial revision + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/delay.h> +#include <linux/cpufreq.h> + +#include <asm/msr.h> +#include <asm/timex.h> +#include <asm/io.h> + +#define MMCR_BASE 0xfffef000 /* The default base address */ +#define OFFS_CPUCTL 0x2 /* CPU Control Register */ + +static __u8 __iomem *cpuctl; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg) + +static struct cpufreq_frequency_table sc520_freq_table[] = { + {0x01, 100000}, + {0x02, 133000}, + {0, CPUFREQ_TABLE_END}, +}; + +static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) +{ + u8 clockspeed_reg = *cpuctl; + + switch (clockspeed_reg & 0x03) { + default: + printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg); + case 0x01: + return 100000; + case 0x02: + return 133000; + } +} + +static void sc520_freq_set_cpu_state (unsigned int state) +{ + + struct cpufreq_freqs freqs; + u8 clockspeed_reg; + + freqs.old = sc520_freq_get_cpu_frequency(0); + freqs.new = sc520_freq_table[state].frequency; + freqs.cpu = 0; /* AMD Elan is UP */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + dprintk("attempting to set frequency to %i kHz\n", + sc520_freq_table[state].frequency); + + local_irq_disable(); + + clockspeed_reg = *cpuctl & ~0x03; + *cpuctl = clockspeed_reg | sc520_freq_table[state].index; + + local_irq_enable(); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +}; + +static int sc520_freq_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); +} + +static int sc520_freq_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate)) + return -EINVAL; + + sc520_freq_set_cpu_state(newstate); + + return 0; +} + + +/* + * Module init and exit code + */ + +static int sc520_freq_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = cpu_data; + int result; + + /* capability check */ + if (c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) + return -ENODEV; + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = 1000000; /* 1ms */ + policy->cur = sc520_freq_get_cpu_frequency(0); + + result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); + + return 0; +} + + +static int sc520_freq_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + + +static struct freq_attr* sc520_freq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver sc520_freq_driver = { + .get = sc520_freq_get_cpu_frequency, + .verify = sc520_freq_verify, + .target = sc520_freq_target, + .init = sc520_freq_cpu_init, + .exit = sc520_freq_cpu_exit, + .name = "sc520_freq", + .owner = THIS_MODULE, + .attr = sc520_freq_attr, +}; + + +static int __init sc520_freq_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + /* Test if we have the right hardware */ + if(c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) { + dprintk("no Elan SC520 processor found!\n"); + return -ENODEV; + } + cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); + if(!cpuctl) { + printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); + return -ENOMEM; + } + + return cpufreq_register_driver(&sc520_freq_driver); +} + + +static void __exit sc520_freq_exit(void) +{ + cpufreq_unregister_driver(&sc520_freq_driver); + iounmap(cpuctl); +} + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sean Young <sean@mess.org>"); +MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU"); + +module_init(sc520_freq_init); +module_exit(sc520_freq_exit); + diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index 07d5612dc00..7dcbf70fc16 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -54,6 +54,8 @@ enum { CPU_DOTHAN_A1, CPU_DOTHAN_A2, CPU_DOTHAN_B0, + CPU_MP4HT_D0, + CPU_MP4HT_E0, }; static const struct cpu_id cpu_ids[] = { @@ -61,6 +63,8 @@ static const struct cpu_id cpu_ids[] = { [CPU_DOTHAN_A1] = { 6, 13, 1 }, [CPU_DOTHAN_A2] = { 6, 13, 2 }, [CPU_DOTHAN_B0] = { 6, 13, 6 }, + [CPU_MP4HT_D0] = {15, 3, 4 }, + [CPU_MP4HT_E0] = {15, 4, 1 }, }; #define N_IDS (sizeof(cpu_ids)/sizeof(cpu_ids[0])) @@ -226,6 +230,8 @@ static struct cpu_model models[] = { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, { NULL, } }; diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c index 8ba430a9c3a..d368b3f5fce 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c @@ -336,7 +336,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, if (!prev_speed) return -EIO; - dprintk("previous seped is %u\n", prev_speed); + dprintk("previous speed is %u\n", prev_speed); local_irq_save(flags); @@ -348,7 +348,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, goto out; } - dprintk("low seped is %u\n", *low_speed); + dprintk("low speed is %u\n", *low_speed); /* switch to high state */ set_state(SPEEDSTEP_HIGH); @@ -358,7 +358,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, goto out; } - dprintk("high seped is %u\n", *high_speed); + dprintk("high speed is %u\n", *high_speed); if (*low_speed == *high_speed) { ret = -ENODEV; diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index 79440b3f087..b25fb6b635a 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -357,6 +357,9 @@ static int __init speedstep_init(void) case SPEEDSTEP_PROCESSOR_PIII_C: case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: break; + case SPEEDSTEP_PROCESSOR_P4M: + printk(KERN_INFO "speedstep-smi: you're trying to use this cpufreq driver on a Pentium 4-based CPU. Most likely it will not work.\n"); + break; default: speedstep_processor = 0; } diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 945ec73163c..2bfbddebdbf 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1502,11 +1502,13 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_map_memmap(); +#ifdef CONFIG_ACPI_BOOT /* * Parse the ACPI tables for possible boot-time SMP configuration. */ acpi_boot_table_init(); acpi_boot_init(); +#endif #ifdef CONFIG_X86_LOCAL_APIC if (smp_found_config) diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c index f7f90005e22..8e201219f52 100644 --- a/arch/i386/kernel/timers/common.c +++ b/arch/i386/kernel/timers/common.c @@ -6,6 +6,7 @@ #include <linux/timex.h> #include <linux/errno.h> #include <linux/jiffies.h> +#include <linux/module.h> #include <asm/io.h> #include <asm/timer.h> @@ -24,7 +25,7 @@ #define CALIBRATE_TIME (5 * 1000020/HZ) -unsigned long __init calibrate_tsc(void) +unsigned long calibrate_tsc(void) { mach_prepare_counter(); @@ -139,7 +140,7 @@ bad_calibration: #endif /* calculate cpu_khz */ -void __init init_cpu_khz(void) +void init_cpu_khz(void) { if (cpu_has_tsc) { unsigned long tsc_quotient = calibrate_tsc(); @@ -158,3 +159,4 @@ void __init init_cpu_khz(void) } } } + diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 7926d967be0..180444d8782 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -320,6 +320,26 @@ core_initcall(cpufreq_tsc); static inline void cpufreq_delayed_get(void) { return; } #endif +int recalibrate_cpu_khz(void) +{ +#ifndef CONFIG_SMP + unsigned long cpu_khz_old = cpu_khz; + + if (cpu_has_tsc) { + init_cpu_khz(); + cpu_data[0].loops_per_jiffy = + cpufreq_scale(cpu_data[0].loops_per_jiffy, + cpu_khz_old, + cpu_khz); + return 0; + } else + return -ENODEV; +#else + return -ENODEV; +#endif +} +EXPORT_SYMBOL(recalibrate_cpu_khz); + static void mark_offset_tsc(void) { unsigned long lost,delay; diff --git a/arch/ppc/syslib/prom_init.c b/arch/ppc/syslib/prom_init.c index 2cee87137f2..7f15136830f 100644 --- a/arch/ppc/syslib/prom_init.c +++ b/arch/ppc/syslib/prom_init.c @@ -626,8 +626,18 @@ inspect_node(phandle node, struct device_node *dad, l = call_prom("package-to-path", 3, 1, node, mem_start, mem_end - mem_start); if (l >= 0) { + char *p, *ep; + np->full_name = PTRUNRELOC((char *) mem_start); *(char *)(mem_start + l) = 0; + /* Fixup an Apple bug where they have bogus \0 chars in the + * middle of the path in some properties + */ + for (p = (char *)mem_start, ep = p + l; p < ep; p++) + if ((*p) == '\0') { + memmove(p, p+1, ep - p); + ep--; + } mem_start = ALIGNUL(mem_start + l + 1); } diff --git a/arch/ppc64/kernel/pSeries_reconfig.c b/arch/ppc64/kernel/pSeries_reconfig.c index cb5443f2e49..dc2a69d412a 100644 --- a/arch/ppc64/kernel/pSeries_reconfig.c +++ b/arch/ppc64/kernel/pSeries_reconfig.c @@ -47,14 +47,6 @@ static void remove_node_proc_entries(struct device_node *np) remove_proc_entry(pp->name, np->pde); pp = pp->next; } - - /* Assuming that symlinks have the same parent directory as - * np->pde. - */ - if (np->name_link) - remove_proc_entry(np->name_link->name, parent->pde); - if (np->addr_link) - remove_proc_entry(np->addr_link->name, parent->pde); if (np->pde) remove_proc_entry(np->pde->name, parent->pde); } diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c index bc53967a864..3de950de367 100644 --- a/arch/ppc64/kernel/prom_init.c +++ b/arch/ppc64/kernel/prom_init.c @@ -1566,7 +1566,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, { int l, align; phandle child; - char *namep, *prev_name, *sstart; + char *namep, *prev_name, *sstart, *p, *ep; unsigned long soff; unsigned char *valp; unsigned long offset = reloc_offset(); @@ -1588,6 +1588,14 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, call_prom("package-to-path", 3, 1, node, namep, l); } namep[l] = '\0'; + /* Fixup an Apple bug where they have bogus \0 chars in the + * middle of the path in some properties + */ + for (p = namep, ep = namep + l; p < ep; p++) + if (*p == '\0') { + memmove(p, p+1, ep - p); + ep--; l--; + } *mem_start = _ALIGN(((unsigned long) namep) + strlen(namep) + 1, 4); } diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 3d54745108c..33364a7d2cd 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -325,9 +325,7 @@ int timer_interrupt(struct pt_regs * regs) irq_enter(); -#ifndef CONFIG_PPC_ISERIES profile_tick(CPU_PROFILING, regs); -#endif lpaca->lppaca.int_dword.fields.decr_int = 0; diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 0f430d9d363..289f448ac89 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -305,6 +305,7 @@ config HPET_TIMER config X86_PM_TIMER bool "PM timer" + depends on ACPI default y help Support the ACPI PM timer for time keeping. This is slow, diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 80e9b498c44..afd87e64d0a 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -37,6 +37,7 @@ #include <asm/desc.h> #include <asm/proto.h> #include <asm/mach_apic.h> +#include <asm/acpi.h> #define __apicdebuginit __init diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index f86d9db94bf..61a63be6b29 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -30,6 +30,7 @@ #include <asm/pgalloc.h> #include <asm/io_apic.h> #include <asm/proto.h> +#include <asm/acpi.h> /* Have we found an MP table */ int smp_found_config; diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 735b6767c8e..fb8c809b4cd 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -27,7 +27,9 @@ #include <linux/bcd.h> #include <linux/kallsyms.h> #include <linux/acpi.h> +#ifdef CONFIG_ACPI #include <acpi/achware.h> /* for PM timer frequency */ +#endif #include <asm/8253pit.h> #include <asm/pgtable.h> #include <asm/vsyscall.h> diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 95882bb1950..60c9be99c6d 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -46,6 +46,10 @@ config CPU_FREQ_STAT_DETAILS This will show detail CPU frequency translation table in sysfs file system +# Note that it is not currently possible to set the other governors (such as ondemand) +# as the default, since if they fail to initialise, cpufreq will be +# left in an undefined state. + choice prompt "Default CPUFreq governor" default CPU_FREQ_DEFAULT_GOV_USERSPACE if CPU_FREQ_SA1100 || CPU_FREQ_SA1110 @@ -115,4 +119,24 @@ config CPU_FREQ_GOV_ONDEMAND If in doubt, say N. +config CPU_FREQ_GOV_CONSERVATIVE + tristate "'conservative' cpufreq governor" + depends on CPU_FREQ + help + 'conservative' - this driver is rather similar to the 'ondemand' + governor both in its source code and its purpose, the difference is + its optimisation for better suitability in a battery powered + environment. The frequency is gracefully increased and decreased + rather than jumping to 100% when speed is required. + + If you have a desktop machine then you should really be considering + the 'ondemand' governor instead, however if you are using a laptop, + PDA or even an AMD64 based computer (due to the unacceptable + step-by-step latency issues between the minimum and maximum frequency + transitions in the CPU) you will probably want to use this governor. + + For details, take a look at linux/Documentation/cpu-freq. + + If in doubt, say N. + endif # CPU_FREQ diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 67b16e5a41a..71fc3b4173f 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o +obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o # CPUfreq cross-arch helpers obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8e561313d09..03b5fb2ddcf 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -258,7 +258,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) (likely(cpufreq_cpu_data[freqs->cpu]->cur)) && (unlikely(freqs->old != cpufreq_cpu_data[freqs->cpu]->cur))) { - printk(KERN_WARNING "Warning: CPU frequency is %u, " + dprintk(KERN_WARNING "Warning: CPU frequency is %u, " "cpufreq assumed %u kHz.\n", freqs->old, cpufreq_cpu_data[freqs->cpu]->cur); freqs->old = cpufreq_cpu_data[freqs->cpu]->cur; } @@ -814,7 +814,7 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, unsigne { struct cpufreq_freqs freqs; - printk(KERN_WARNING "Warning: CPU frequency out of sync: cpufreq and timing " + dprintk(KERN_WARNING "Warning: CPU frequency out of sync: cpufreq and timing " "core thinks of %u, is %u kHz.\n", old_freq, new_freq); freqs.cpu = cpu; @@ -923,7 +923,7 @@ static int cpufreq_suspend(struct sys_device * sysdev, u32 state) struct cpufreq_freqs freqs; if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) - printk(KERN_DEBUG "Warning: CPU frequency is %u, " + dprintk(KERN_DEBUG "Warning: CPU frequency is %u, " "cpufreq assumed %u kHz.\n", cur_freq, cpu_policy->cur); @@ -1004,7 +1004,7 @@ static int cpufreq_resume(struct sys_device * sysdev) struct cpufreq_freqs freqs; if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) - printk(KERN_WARNING "Warning: CPU frequency" + dprintk(KERN_WARNING "Warning: CPU frequency" "is %u, cpufreq assumed %u kHz.\n", cur_freq, cpu_policy->cur); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c new file mode 100644 index 00000000000..e1df376e709 --- /dev/null +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -0,0 +1,586 @@ +/* + * drivers/cpufreq/cpufreq_conservative.c + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. + * Jun Nakajima <jun.nakajima@intel.com> + * (C) 2004 Alexander Clouter <alex-kernel@digriz.org.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/ctype.h> +#include <linux/cpufreq.h> +#include <linux/sysctl.h> +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/sysfs.h> +#include <linux/sched.h> +#include <linux/kmod.h> +#include <linux/workqueue.h> +#include <linux/jiffies.h> +#include <linux/kernel_stat.h> +#include <linux/percpu.h> + +/* + * dbs is used in this file as a shortform for demandbased switching + * It helps to keep variable names smaller, simpler + */ + +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define MIN_FREQUENCY_UP_THRESHOLD (0) +#define MAX_FREQUENCY_UP_THRESHOLD (100) + +#define DEF_FREQUENCY_DOWN_THRESHOLD (20) +#define MIN_FREQUENCY_DOWN_THRESHOLD (0) +#define MAX_FREQUENCY_DOWN_THRESHOLD (100) + +/* + * The polling frequency of this governor depends on the capability of + * the processor. Default polling frequency is 1000 times the transition + * latency of the processor. The governor will work on any processor with + * transition latency <= 10mS, using appropriate sampling + * rate. + * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) + * this governor will not work. + * All times here are in uS. + */ +static unsigned int def_sampling_rate; +#define MIN_SAMPLING_RATE (def_sampling_rate / 2) +#define MAX_SAMPLING_RATE (500 * def_sampling_rate) +#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (100000) +#define DEF_SAMPLING_DOWN_FACTOR (5) +#define TRANSITION_LATENCY_LIMIT (10 * 1000) + +static void do_dbs_timer(void *data); + +struct cpu_dbs_info_s { + struct cpufreq_policy *cur_policy; + unsigned int prev_cpu_idle_up; + unsigned int prev_cpu_idle_down; + unsigned int enable; +}; +static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + +static DECLARE_MUTEX (dbs_sem); +static DECLARE_WORK (dbs_work, do_dbs_timer, NULL); + +struct dbs_tuners { + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; + unsigned int down_threshold; + unsigned int ignore_nice; + unsigned int freq_step; +}; + +static struct dbs_tuners dbs_tuners_ins = { + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, + .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, +}; + +static inline unsigned int get_cpu_idle_time(unsigned int cpu) +{ + return kstat_cpu(cpu).cpustat.idle + + kstat_cpu(cpu).cpustat.iowait + + ( !dbs_tuners_ins.ignore_nice ? + kstat_cpu(cpu).cpustat.nice : + 0); +} + +/************************** sysfs interface ************************/ +static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) +{ + return sprintf (buf, "%u\n", MAX_SAMPLING_RATE); +} + +static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) +{ + return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); +} + +#define define_one_ro(_name) \ +static struct freq_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +define_one_ro(sampling_rate_max); +define_one_ro(sampling_rate_min); + +/* cpufreq_conservative Governor Tunables */ +#define show_one(file_name, object) \ +static ssize_t show_##file_name \ +(struct cpufreq_policy *unused, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ +} +show_one(sampling_rate, sampling_rate); +show_one(sampling_down_factor, sampling_down_factor); +show_one(up_threshold, up_threshold); +show_one(down_threshold, down_threshold); +show_one(ignore_nice, ignore_nice); +show_one(freq_step, freq_step); + +static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf (buf, "%u", &input); + if (ret != 1 ) + return -EINVAL; + + down(&dbs_sem); + dbs_tuners_ins.sampling_down_factor = input; + up(&dbs_sem); + + return count; +} + +static ssize_t store_sampling_rate(struct cpufreq_policy *unused, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf (buf, "%u", &input); + + down(&dbs_sem); + if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) { + up(&dbs_sem); + return -EINVAL; + } + + dbs_tuners_ins.sampling_rate = input; + up(&dbs_sem); + + return count; +} + +static ssize_t store_up_threshold(struct cpufreq_policy *unused, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf (buf, "%u", &input); + + down(&dbs_sem); + if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || + input < MIN_FREQUENCY_UP_THRESHOLD || + input <= dbs_tuners_ins.down_threshold) { + up(&dbs_sem); + return -EINVAL; + } + + dbs_tuners_ins.up_threshold = input; + up(&dbs_sem); + + return count; +} + +static ssize_t store_down_threshold(struct cpufreq_policy *unused, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf (buf, "%u", &input); + + down(&dbs_sem); + if (ret != 1 || input > MAX_FREQUENCY_DOWN_THRESHOLD || + input < MIN_FREQUENCY_DOWN_THRESHOLD || + input >= dbs_tuners_ins.up_threshold) { + up(&dbs_sem); + return -EINVAL; + } + + dbs_tuners_ins.down_threshold = input; + up(&dbs_sem); + + return count; +} + +static ssize_t store_ignore_nice(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf (buf, "%u", &input); + if ( ret != 1 ) + return -EINVAL; + + if ( input > 1 ) + input = 1; + + down(&dbs_sem); + if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */ + up(&dbs_sem); + return count; + } + dbs_tuners_ins.ignore_nice = input; + + /* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(cpu_dbs_info, j); + j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); + j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; + } + up(&dbs_sem); + + return count; +} + +static ssize_t store_freq_step(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf (buf, "%u", &input); + + if ( ret != 1 ) + return -EINVAL; + + if ( input > 100 ) + input = 100; + + /* no need to test here if freq_step is zero as the user might actually + * want this, they would be crazy though :) */ + down(&dbs_sem); + dbs_tuners_ins.freq_step = input; + up(&dbs_sem); + + return count; +} + +#define define_one_rw(_name) \ +static struct freq_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) + +define_one_rw(sampling_rate); +define_one_rw(sampling_down_factor); +define_one_rw(up_threshold); +define_one_rw(down_threshold); +define_one_rw(ignore_nice); +define_one_rw(freq_step); + +static struct attribute * dbs_attributes[] = { + &sampling_rate_max.attr, + &sampling_rate_min.attr, + &sampling_rate.attr, + &sampling_down_factor.attr, + &up_threshold.attr, + &down_threshold.attr, + &ignore_nice.attr, + &freq_step.attr, + NULL +}; + +static struct attribute_group dbs_attr_group = { + .attrs = dbs_attributes, + .name = "conservative", +}; + +/************************** sysfs end ************************/ + +static void dbs_check_cpu(int cpu) +{ + unsigned int idle_ticks, up_idle_ticks, down_idle_ticks; + unsigned int freq_step; + unsigned int freq_down_sampling_rate; + static int down_skip[NR_CPUS]; + static int requested_freq[NR_CPUS]; + static unsigned short init_flag = 0; + struct cpu_dbs_info_s *this_dbs_info; + struct cpu_dbs_info_s *dbs_info; + + struct cpufreq_policy *policy; + unsigned int j; + + this_dbs_info = &per_cpu(cpu_dbs_info, cpu); + if (!this_dbs_info->enable) + return; + + policy = this_dbs_info->cur_policy; + + if ( init_flag == 0 ) { + for ( /* NULL */; init_flag < NR_CPUS; init_flag++ ) { + dbs_info = &per_cpu(cpu_dbs_info, init_flag); + requested_freq[cpu] = dbs_info->cur_policy->cur; + } + init_flag = 1; + } + + /* + * The default safe range is 20% to 80% + * Every sampling_rate, we check + * - If current idle time is less than 20%, then we try to + * increase frequency + * Every sampling_rate*sampling_down_factor, we check + * - If current idle time is more than 80%, then we try to + * decrease frequency + * + * Any frequency increase takes it to the maximum frequency. + * Frequency reduction happens at minimum steps of + * 5% (default) of max_frequency + */ + + /* Check for frequency increase */ + + idle_ticks = UINT_MAX; + for_each_cpu_mask(j, policy->cpus) { + unsigned int tmp_idle_ticks, total_idle_ticks; + struct cpu_dbs_info_s *j_dbs_info; + + j_dbs_info = &per_cpu(cpu_dbs_info, j); + /* Check for frequency increase */ + total_idle_ticks = get_cpu_idle_time(j); + tmp_idle_ticks = total_idle_ticks - + j_dbs_info->prev_cpu_idle_up; + j_dbs_info->prev_cpu_idle_up = total_idle_ticks; + + if (tmp_idle_ticks < idle_ticks) + idle_ticks = tmp_idle_ticks; + } + + /* Scale idle ticks by 100 and compare with up and down ticks */ + idle_ticks *= 100; + up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) * + usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + + if (idle_ticks < up_idle_ticks) { + down_skip[cpu] = 0; + for_each_cpu_mask(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + + j_dbs_info = &per_cpu(cpu_dbs_info, j); + j_dbs_info->prev_cpu_idle_down = + j_dbs_info->prev_cpu_idle_up; + } + /* if we are already at full speed then break out early */ + if (requested_freq[cpu] == policy->max) + return; + + freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100; + + /* max freq cannot be less than 100. But who knows.... */ + if (unlikely(freq_step == 0)) + freq_step = 5; + + requested_freq[cpu] += freq_step; + if (requested_freq[cpu] > policy->max) + requested_freq[cpu] = policy->max; + + __cpufreq_driver_target(policy, requested_freq[cpu], + CPUFREQ_RELATION_H); + return; + } + + /* Check for frequency decrease */ + down_skip[cpu]++; + if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor) + return; + + idle_ticks = UINT_MAX; + for_each_cpu_mask(j, policy->cpus) { + unsigned int tmp_idle_ticks, total_idle_ticks; + struct cpu_dbs_info_s *j_dbs_info; + + j_dbs_info = &per_cpu(cpu_dbs_info, j); + total_idle_ticks = j_dbs_info->prev_cpu_idle_up; + tmp_idle_ticks = total_idle_ticks - + j_dbs_info->prev_cpu_idle_down; + j_dbs_info->prev_cpu_idle_down = total_idle_ticks; + + if (tmp_idle_ticks < idle_ticks) + idle_ticks = tmp_idle_ticks; + } + + /* Scale idle ticks by 100 and compare with up and down ticks */ + idle_ticks *= 100; + down_skip[cpu] = 0; + + freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * + dbs_tuners_ins.sampling_down_factor; + down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) * + usecs_to_jiffies(freq_down_sampling_rate); + + if (idle_ticks > down_idle_ticks) { + /* if we are already at the lowest speed then break out early + * or if we 'cannot' reduce the speed as the user might want + * freq_step to be zero */ + if (requested_freq[cpu] == policy->min + || dbs_tuners_ins.freq_step == 0) + return; + + freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100; + + /* max freq cannot be less than 100. But who knows.... */ + if (unlikely(freq_step == 0)) + freq_step = 5; + + requested_freq[cpu] -= freq_step; + if (requested_freq[cpu] < policy->min) + requested_freq[cpu] = policy->min; + + __cpufreq_driver_target(policy, + requested_freq[cpu], + CPUFREQ_RELATION_H); + return; + } +} + +static void do_dbs_timer(void *data) +{ + int i; + down(&dbs_sem); + for_each_online_cpu(i) + dbs_check_cpu(i); + schedule_delayed_work(&dbs_work, + usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); + up(&dbs_sem); +} + +static inline void dbs_timer_init(void) +{ + INIT_WORK(&dbs_work, do_dbs_timer, NULL); + schedule_delayed_work(&dbs_work, + usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); + return; +} + +static inline void dbs_timer_exit(void) +{ + cancel_delayed_work(&dbs_work); + return; +} + +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int j; + + this_dbs_info = &per_cpu(cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || + (!policy->cur)) + return -EINVAL; + + if (policy->cpuinfo.transition_latency > + (TRANSITION_LATENCY_LIMIT * 1000)) + return -EINVAL; + if (this_dbs_info->enable) /* Already enabled */ + break; + + down(&dbs_sem); + for_each_cpu_mask(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); + j_dbs_info->prev_cpu_idle_down + = j_dbs_info->prev_cpu_idle_up; + } + this_dbs_info->enable = 1; + sysfs_create_group(&policy->kobj, &dbs_attr_group); + dbs_enable++; + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + unsigned int latency; + /* policy latency is in nS. Convert it to uS first */ + + latency = policy->cpuinfo.transition_latency; + if (latency < 1000) + latency = 1000; + + def_sampling_rate = (latency / 1000) * + DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; + dbs_tuners_ins.sampling_rate = def_sampling_rate; + dbs_tuners_ins.ignore_nice = 0; + dbs_tuners_ins.freq_step = 5; + + dbs_timer_init(); + } + + up(&dbs_sem); + break; + + case CPUFREQ_GOV_STOP: + down(&dbs_sem); + this_dbs_info->enable = 0; + sysfs_remove_group(&policy->kobj, &dbs_attr_group); + dbs_enable--; + /* + * Stop the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 0) + dbs_timer_exit(); + + up(&dbs_sem); + + break; + + case CPUFREQ_GOV_LIMITS: + down(&dbs_sem); + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target( + this_dbs_info->cur_policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target( + this_dbs_info->cur_policy, + policy->min, CPUFREQ_RELATION_L); + up(&dbs_sem); + break; + } + return 0; +} + +static struct cpufreq_governor cpufreq_gov_dbs = { + .name = "conservative", + .governor = cpufreq_governor_dbs, + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gov_dbs_init(void) +{ + return cpufreq_register_governor(&cpufreq_gov_dbs); +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + /* Make sure that the scheduled work is indeed not running */ + flush_scheduled_work(); + + cpufreq_unregister_governor(&cpufreq_gov_dbs); +} + + +MODULE_AUTHOR ("Alexander Clouter <alex-kernel@digriz.org.uk>"); +MODULE_DESCRIPTION ("'cpufreq_conservative' - A dynamic cpufreq governor for " + "Low Latency Frequency Transition capable processors " + "optimised for use in a battery environment"); +MODULE_LICENSE ("GPL"); + +module_init(cpufreq_gov_dbs_init); +module_exit(cpufreq_gov_dbs_exit); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 8d83a21c647..c1fc9c62bb5 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -34,13 +34,9 @@ */ #define DEF_FREQUENCY_UP_THRESHOLD (80) -#define MIN_FREQUENCY_UP_THRESHOLD (0) +#define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) -#define DEF_FREQUENCY_DOWN_THRESHOLD (20) -#define MIN_FREQUENCY_DOWN_THRESHOLD (0) -#define MAX_FREQUENCY_DOWN_THRESHOLD (100) - /* * The polling frequency of this governor depends on the capability of * the processor. Default polling frequency is 1000 times the transition @@ -55,9 +51,9 @@ static unsigned int def_sampling_rate; #define MIN_SAMPLING_RATE (def_sampling_rate / 2) #define MAX_SAMPLING_RATE (500 * def_sampling_rate) #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) -#define DEF_SAMPLING_DOWN_FACTOR (10) +#define DEF_SAMPLING_DOWN_FACTOR (1) +#define MAX_SAMPLING_DOWN_FACTOR (10) #define TRANSITION_LATENCY_LIMIT (10 * 1000) -#define sampling_rate_in_HZ(x) (((x * HZ) < (1000 * 1000))?1:((x * HZ) / (1000 * 1000))) static void do_dbs_timer(void *data); @@ -78,15 +74,23 @@ struct dbs_tuners { unsigned int sampling_rate; unsigned int sampling_down_factor; unsigned int up_threshold; - unsigned int down_threshold; + unsigned int ignore_nice; }; static struct dbs_tuners dbs_tuners_ins = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, - .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, }; +static inline unsigned int get_cpu_idle_time(unsigned int cpu) +{ + return kstat_cpu(cpu).cpustat.idle + + kstat_cpu(cpu).cpustat.iowait + + ( !dbs_tuners_ins.ignore_nice ? + kstat_cpu(cpu).cpustat.nice : + 0); +} + /************************** sysfs interface ************************/ static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) { @@ -115,7 +119,7 @@ static ssize_t show_##file_name \ show_one(sampling_rate, sampling_rate); show_one(sampling_down_factor, sampling_down_factor); show_one(up_threshold, up_threshold); -show_one(down_threshold, down_threshold); +show_one(ignore_nice, ignore_nice); static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, const char *buf, size_t count) @@ -126,6 +130,9 @@ static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, if (ret != 1 ) return -EINVAL; + if (input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + down(&dbs_sem); dbs_tuners_ins.sampling_down_factor = input; up(&dbs_sem); @@ -161,8 +168,7 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused, down(&dbs_sem); if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || - input < MIN_FREQUENCY_UP_THRESHOLD || - input <= dbs_tuners_ins.down_threshold) { + input < MIN_FREQUENCY_UP_THRESHOLD) { up(&dbs_sem); return -EINVAL; } @@ -173,22 +179,35 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused, return count; } -static ssize_t store_down_threshold(struct cpufreq_policy *unused, +static ssize_t store_ignore_nice(struct cpufreq_policy *policy, const char *buf, size_t count) { unsigned int input; int ret; + + unsigned int j; + ret = sscanf (buf, "%u", &input); + if ( ret != 1 ) + return -EINVAL; + if ( input > 1 ) + input = 1; + down(&dbs_sem); - if (ret != 1 || input > MAX_FREQUENCY_DOWN_THRESHOLD || - input < MIN_FREQUENCY_DOWN_THRESHOLD || - input >= dbs_tuners_ins.up_threshold) { + if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */ up(&dbs_sem); - return -EINVAL; + return count; } + dbs_tuners_ins.ignore_nice = input; - dbs_tuners_ins.down_threshold = input; + /* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(cpu_dbs_info, j); + j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); + j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; + } up(&dbs_sem); return count; @@ -201,7 +220,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name) define_one_rw(sampling_rate); define_one_rw(sampling_down_factor); define_one_rw(up_threshold); -define_one_rw(down_threshold); +define_one_rw(ignore_nice); static struct attribute * dbs_attributes[] = { &sampling_rate_max.attr, @@ -209,7 +228,7 @@ static struct attribute * dbs_attributes[] = { &sampling_rate.attr, &sampling_down_factor.attr, &up_threshold.attr, - &down_threshold.attr, + &ignore_nice.attr, NULL }; @@ -222,9 +241,8 @@ static struct attribute_group dbs_attr_group = { static void dbs_check_cpu(int cpu) { - unsigned int idle_ticks, up_idle_ticks, down_idle_ticks; - unsigned int total_idle_ticks; - unsigned int freq_down_step; + unsigned int idle_ticks, up_idle_ticks, total_ticks; + unsigned int freq_next; unsigned int freq_down_sampling_rate; static int down_skip[NR_CPUS]; struct cpu_dbs_info_s *this_dbs_info; @@ -238,38 +256,25 @@ static void dbs_check_cpu(int cpu) policy = this_dbs_info->cur_policy; /* - * The default safe range is 20% to 80% - * Every sampling_rate, we check - * - If current idle time is less than 20%, then we try to - * increase frequency - * Every sampling_rate*sampling_down_factor, we check - * - If current idle time is more than 80%, then we try to - * decrease frequency + * Every sampling_rate, we check, if current idle time is less + * than 20% (default), then we try to increase frequency + * Every sampling_rate*sampling_down_factor, we look for a the lowest + * frequency which can sustain the load while keeping idle time over + * 30%. If such a frequency exist, we try to decrease to this frequency. * * Any frequency increase takes it to the maximum frequency. * Frequency reduction happens at minimum steps of - * 5% of max_frequency + * 5% (default) of current frequency */ /* Check for frequency increase */ - total_idle_ticks = kstat_cpu(cpu).cpustat.idle + - kstat_cpu(cpu).cpustat.iowait; - idle_ticks = total_idle_ticks - - this_dbs_info->prev_cpu_idle_up; - this_dbs_info->prev_cpu_idle_up = total_idle_ticks; - - + idle_ticks = UINT_MAX; for_each_cpu_mask(j, policy->cpus) { - unsigned int tmp_idle_ticks; + unsigned int tmp_idle_ticks, total_idle_ticks; struct cpu_dbs_info_s *j_dbs_info; - if (j == cpu) - continue; - j_dbs_info = &per_cpu(cpu_dbs_info, j); - /* Check for frequency increase */ - total_idle_ticks = kstat_cpu(j).cpustat.idle + - kstat_cpu(j).cpustat.iowait; + total_idle_ticks = get_cpu_idle_time(j); tmp_idle_ticks = total_idle_ticks - j_dbs_info->prev_cpu_idle_up; j_dbs_info->prev_cpu_idle_up = total_idle_ticks; @@ -281,13 +286,23 @@ static void dbs_check_cpu(int cpu) /* Scale idle ticks by 100 and compare with up and down ticks */ idle_ticks *= 100; up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) * - sampling_rate_in_HZ(dbs_tuners_ins.sampling_rate); + usecs_to_jiffies(dbs_tuners_ins.sampling_rate); if (idle_ticks < up_idle_ticks) { + down_skip[cpu] = 0; + for_each_cpu_mask(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + + j_dbs_info = &per_cpu(cpu_dbs_info, j); + j_dbs_info->prev_cpu_idle_down = + j_dbs_info->prev_cpu_idle_up; + } + /* if we are already at full speed then break out early */ + if (policy->cur == policy->max) + return; + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); - down_skip[cpu] = 0; - this_dbs_info->prev_cpu_idle_down = total_idle_ticks; return; } @@ -296,23 +311,14 @@ static void dbs_check_cpu(int cpu) if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor) return; - total_idle_ticks = kstat_cpu(cpu).cpustat.idle + - kstat_cpu(cpu).cpustat.iowait; - idle_ticks = total_idle_ticks - - this_dbs_info->prev_cpu_idle_down; - this_dbs_info->prev_cpu_idle_down = total_idle_ticks; - + idle_ticks = UINT_MAX; for_each_cpu_mask(j, policy->cpus) { - unsigned int tmp_idle_ticks; + unsigned int tmp_idle_ticks, total_idle_ticks; struct cpu_dbs_info_s *j_dbs_info; - if (j == cpu) - continue; - j_dbs_info = &per_cpu(cpu_dbs_info, j); - /* Check for frequency increase */ - total_idle_ticks = kstat_cpu(j).cpustat.idle + - kstat_cpu(j).cpustat.iowait; + /* Check for frequency decrease */ + total_idle_ticks = j_dbs_info->prev_cpu_idle_up; tmp_idle_ticks = total_idle_ticks - j_dbs_info->prev_cpu_idle_down; j_dbs_info->prev_cpu_idle_down = total_idle_ticks; @@ -321,38 +327,37 @@ static void dbs_check_cpu(int cpu) idle_ticks = tmp_idle_ticks; } - /* Scale idle ticks by 100 and compare with up and down ticks */ - idle_ticks *= 100; down_skip[cpu] = 0; + /* if we cannot reduce the frequency anymore, break out early */ + if (policy->cur == policy->min) + return; + /* Compute how many ticks there are between two measurements */ freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * dbs_tuners_ins.sampling_down_factor; - down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) * - sampling_rate_in_HZ(freq_down_sampling_rate); + total_ticks = usecs_to_jiffies(freq_down_sampling_rate); - if (idle_ticks > down_idle_ticks ) { - freq_down_step = (5 * policy->max) / 100; - - /* max freq cannot be less than 100. But who knows.... */ - if (unlikely(freq_down_step == 0)) - freq_down_step = 5; + /* + * The optimal frequency is the frequency that is the lowest that + * can support the current CPU usage without triggering the up + * policy. To be safe, we focus 10 points under the threshold. + */ + freq_next = ((total_ticks - idle_ticks) * 100) / total_ticks; + freq_next = (freq_next * policy->cur) / + (dbs_tuners_ins.up_threshold - 10); - __cpufreq_driver_target(policy, - policy->cur - freq_down_step, - CPUFREQ_RELATION_H); - return; - } + if (freq_next <= ((policy->cur * 95) / 100)) + __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); } static void do_dbs_timer(void *data) { int i; down(&dbs_sem); - for (i = 0; i < NR_CPUS; i++) - if (cpu_online(i)) - dbs_check_cpu(i); + for_each_online_cpu(i) + dbs_check_cpu(i); schedule_delayed_work(&dbs_work, - sampling_rate_in_HZ(dbs_tuners_ins.sampling_rate)); + usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); up(&dbs_sem); } @@ -360,7 +365,7 @@ static inline void dbs_timer_init(void) { INIT_WORK(&dbs_work, do_dbs_timer, NULL); schedule_delayed_work(&dbs_work, - sampling_rate_in_HZ(dbs_tuners_ins.sampling_rate)); + usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); return; } @@ -397,12 +402,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, j_dbs_info = &per_cpu(cpu_dbs_info, j); j_dbs_info->cur_policy = policy; - j_dbs_info->prev_cpu_idle_up = - kstat_cpu(j).cpustat.idle + - kstat_cpu(j).cpustat.iowait; - j_dbs_info->prev_cpu_idle_down = - kstat_cpu(j).cpustat.idle + - kstat_cpu(j).cpustat.iowait; + j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); + j_dbs_info->prev_cpu_idle_down + = j_dbs_info->prev_cpu_idle_up; } this_dbs_info->enable = 1; sysfs_create_group(&policy->kobj, &dbs_attr_group); @@ -422,6 +424,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, def_sampling_rate = (latency / 1000) * DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; dbs_tuners_ins.sampling_rate = def_sampling_rate; + dbs_tuners_ins.ignore_nice = 0; dbs_timer_init(); } @@ -461,12 +464,11 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, return 0; } -struct cpufreq_governor cpufreq_gov_dbs = { +static struct cpufreq_governor cpufreq_gov_dbs = { .name = "ondemand", .governor = cpufreq_governor_dbs, .owner = THIS_MODULE, }; -EXPORT_SYMBOL(cpufreq_gov_dbs); static int __init cpufreq_gov_dbs_init(void) { diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 2084593937c..741b6b191e6 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -19,6 +19,7 @@ #include <linux/percpu.h> #include <linux/kobject.h> #include <linux/spinlock.h> +#include <asm/cputime.h> static spinlock_t cpufreq_stats_lock; @@ -29,20 +30,14 @@ static struct freq_attr _attr_##_name = {\ .show = _show,\ }; -static unsigned long -delta_time(unsigned long old, unsigned long new) -{ - return (old > new) ? (old - new): (new + ~old + 1); -} - struct cpufreq_stats { unsigned int cpu; unsigned int total_trans; - unsigned long long last_time; + unsigned long long last_time; unsigned int max_state; unsigned int state_num; unsigned int last_index; - unsigned long long *time_in_state; + cputime64_t *time_in_state; unsigned int *freq_table; #ifdef CONFIG_CPU_FREQ_STAT_DETAILS unsigned int *trans_table; @@ -60,12 +55,16 @@ static int cpufreq_stats_update (unsigned int cpu) { struct cpufreq_stats *stat; + unsigned long long cur_time; + + cur_time = get_jiffies_64(); spin_lock(&cpufreq_stats_lock); stat = cpufreq_stats_table[cpu]; if (stat->time_in_state) - stat->time_in_state[stat->last_index] += - delta_time(stat->last_time, jiffies); - stat->last_time = jiffies; + stat->time_in_state[stat->last_index] = + cputime64_add(stat->time_in_state[stat->last_index], + cputime_sub(cur_time, stat->last_time)); + stat->last_time = cur_time; spin_unlock(&cpufreq_stats_lock); return 0; } @@ -90,8 +89,8 @@ show_time_in_state(struct cpufreq_policy *policy, char *buf) return 0; cpufreq_stats_update(stat->cpu); for (i = 0; i < stat->state_num; i++) { - len += sprintf(buf + len, "%u %llu\n", - stat->freq_table[i], stat->time_in_state[i]); + len += sprintf(buf + len, "%u %llu\n", stat->freq_table[i], + (unsigned long long)cputime64_to_clock_t(stat->time_in_state[i])); } return len; } @@ -107,16 +106,30 @@ show_trans_table(struct cpufreq_policy *policy, char *buf) if(!stat) return 0; cpufreq_stats_update(stat->cpu); + len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n"); + len += snprintf(buf + len, PAGE_SIZE - len, " : "); + for (i = 0; i < stat->state_num; i++) { + if (len >= PAGE_SIZE) + break; + len += snprintf(buf + len, PAGE_SIZE - len, "%9u ", + stat->freq_table[i]); + } + if (len >= PAGE_SIZE) + return len; + + len += snprintf(buf + len, PAGE_SIZE - len, "\n"); + for (i = 0; i < stat->state_num; i++) { if (len >= PAGE_SIZE) break; - len += snprintf(buf + len, PAGE_SIZE - len, "%9u:\t", + + len += snprintf(buf + len, PAGE_SIZE - len, "%9u: ", stat->freq_table[i]); for (j = 0; j < stat->state_num; j++) { if (len >= PAGE_SIZE) break; - len += snprintf(buf + len, PAGE_SIZE - len, "%u\t", + len += snprintf(buf + len, PAGE_SIZE - len, "%9u ", stat->trans_table[i*stat->max_state+j]); } len += snprintf(buf + len, PAGE_SIZE - len, "\n"); @@ -197,7 +210,7 @@ cpufreq_stats_create_table (struct cpufreq_policy *policy, count++; } - alloc_size = count * sizeof(int) + count * sizeof(long long); + alloc_size = count * sizeof(int) + count * sizeof(cputime64_t); #ifdef CONFIG_CPU_FREQ_STAT_DETAILS alloc_size += count * count * sizeof(int); @@ -224,7 +237,7 @@ cpufreq_stats_create_table (struct cpufreq_policy *policy, } stat->state_num = j; spin_lock(&cpufreq_stats_lock); - stat->last_time = jiffies; + stat->last_time = get_jiffies_64(); stat->last_index = freq_table_get_index(stat, policy->cur); spin_unlock(&cpufreq_stats_lock); cpufreq_cpu_put(data); diff --git a/drivers/firmware/pcdp.c b/drivers/firmware/pcdp.c index 6d5df6c2efa..df1b721154d 100644 --- a/drivers/firmware/pcdp.c +++ b/drivers/firmware/pcdp.c @@ -11,6 +11,7 @@ * published by the Free Software Foundation. */ +#include <linux/config.h> #include <linux/acpi.h> #include <linux/console.h> #include <linux/efi.h> diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index 67423c696c0..6fd57f15419 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -12,15 +12,8 @@ #include <asm/uaccess.h> #ifndef HAVE_ARCH_DEVTREE_FIXUPS -static inline void set_node_proc_entry(struct device_node *np, struct proc_dir_entry *de) -{ -} - -static void inline set_node_name_link(struct device_node *np, struct proc_dir_entry *de) -{ -} - -static void inline set_node_addr_link(struct device_node *np, struct proc_dir_entry *de) +static inline void set_node_proc_entry(struct device_node *np, + struct proc_dir_entry *de) { } #endif @@ -58,89 +51,67 @@ static int property_read_proc(char *page, char **start, off_t off, /* * Process a node, adding entries for its children and its properties. */ -void proc_device_tree_add_node(struct device_node *np, struct proc_dir_entry *de) +void proc_device_tree_add_node(struct device_node *np, + struct proc_dir_entry *de) { struct property *pp; struct proc_dir_entry *ent; - struct device_node *child, *sib; - const char *p, *at; - int l; - struct proc_dir_entry *list, **lastp, *al; + struct device_node *child; + struct proc_dir_entry *list = NULL, **lastp; + const char *p; set_node_proc_entry(np, de); lastp = &list; - for (pp = np->properties; pp != 0; pp = pp->next) { - /* - * Unfortunately proc_register puts each new entry - * at the beginning of the list. So we rearrange them. - */ - ent = create_proc_read_entry(pp->name, strncmp(pp->name, "security-", 9) ? - S_IRUGO : S_IRUSR, de, property_read_proc, pp); - if (ent == 0) - break; - if (!strncmp(pp->name, "security-", 9)) - ent->size = 0; /* don't leak number of password chars */ - else - ent->size = pp->length; - *lastp = ent; - lastp = &ent->next; - } - child = NULL; - while ((child = of_get_next_child(np, child))) { + for (child = NULL; (child = of_get_next_child(np, child));) { p = strrchr(child->full_name, '/'); if (!p) p = child->full_name; else ++p; - /* chop off '@0' if the name ends with that */ - l = strlen(p); - if (l > 2 && p[l-2] == '@' && p[l-1] == '0') - l -= 2; ent = proc_mkdir(p, de); if (ent == 0) break; *lastp = ent; + ent->next = NULL; lastp = &ent->next; proc_device_tree_add_node(child, ent); - - /* - * If we left the address part on the name, consider - * adding symlinks from the name and address parts. - */ - if (p[l] != 0 || (at = strchr(p, '@')) == 0) - continue; - + } + of_node_put(child); + for (pp = np->properties; pp != 0; pp = pp->next) { /* - * If this is the first node with a given name property, - * add a symlink with the name property as its name. + * Yet another Apple device-tree bogosity: on some machines, + * they have properties & nodes with the same name. Those + * properties are quite unimportant for us though, thus we + * simply "skip" them here, but we do have to check. */ - sib = NULL; - while ((sib = of_get_next_child(np, sib)) && sib != child) - if (sib->name && strcmp(sib->name, child->name) == 0) - break; - if (sib == child && strncmp(p, child->name, l) != 0) { - al = proc_symlink(child->name, de, ent->name); - if (al == 0) { - of_node_put(sib); + for (ent = list; ent != NULL; ent = ent->next) + if (!strcmp(ent->name, pp->name)) break; - } - set_node_name_link(child, al); - *lastp = al; - lastp = &al->next; + if (ent != NULL) { + printk(KERN_WARNING "device-tree: property \"%s\" name" + " conflicts with node in %s\n", pp->name, + np->full_name); + continue; } - of_node_put(sib); + /* - * Add another directory with the @address part as its name. + * Unfortunately proc_register puts each new entry + * at the beginning of the list. So we rearrange them. */ - al = proc_symlink(at, de, ent->name); - if (al == 0) + ent = create_proc_read_entry(pp->name, + strncmp(pp->name, "security-", 9) + ? S_IRUGO : S_IRUSR, de, + property_read_proc, pp); + if (ent == 0) break; - set_node_addr_link(child, al); - *lastp = al; - lastp = &al->next; + if (!strncmp(pp->name, "security-", 9)) + ent->size = 0; /* don't leak number of password chars */ + else + ent->size = pp->length; + ent->next = NULL; + *lastp = ent; + lastp = &ent->next; } - of_node_put(child); - *lastp = NULL; de->subdir = list; } diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c index 457a8fe2857..85d8dbe843f 100644 --- a/fs/udf/udftime.c +++ b/fs/udf/udftime.c @@ -46,7 +46,7 @@ #endif /* How many days come before each month (0-12). */ -const unsigned short int __mon_yday[2][13] = +static const unsigned short int __mon_yday[2][13] = { /* Normal years. */ { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h index 40c54f69780..c3470984983 100644 --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h @@ -53,6 +53,7 @@ extern struct init_timer_opts timer_cyclone_init; extern unsigned long calibrate_tsc(void); extern void init_cpu_khz(void); +extern int recalibrate_cpu_khz(void); #ifdef CONFIG_HPET_TIMER extern struct init_timer_opts timer_hpet_init; extern unsigned long calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr); diff --git a/include/asm-ppc64/prom.h b/include/asm-ppc64/prom.h index 2440a2c90ae..04b1a84f7ca 100644 --- a/include/asm-ppc64/prom.h +++ b/include/asm-ppc64/prom.h @@ -147,9 +147,7 @@ struct device_node { struct device_node *sibling; struct device_node *next; /* next device of same type */ struct device_node *allnext; /* next in list of all nodes */ - struct proc_dir_entry *pde; /* this node's proc directory */ - struct proc_dir_entry *name_link; /* name symlink */ - struct proc_dir_entry *addr_link; /* addr symlink */ + struct proc_dir_entry *pde; /* this node's proc directory */ struct kref kref; unsigned long _flags; }; @@ -174,15 +172,6 @@ static inline void set_node_proc_entry(struct device_node *dn, struct proc_dir_e dn->pde = de; } -static void inline set_node_name_link(struct device_node *dn, struct proc_dir_entry *de) -{ - dn->name_link = de; -} - -static void inline set_node_addr_link(struct device_node *dn, struct proc_dir_entry *de) -{ - dn->addr_link = de; -} /* OBSOLETE: Old stlye node lookup */ extern struct device_node *find_devices(const char *name); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index f21af067d01..927daa86c9b 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -49,7 +49,7 @@ int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list); /* Frequency values here are CPU kHz so that hardware which doesn't run * with some frequencies can complain without having to guess what per * cent / per mille means. - * Maximum transition latency is in microseconds - if it's unknown, + * Maximum transition latency is in nanoseconds - if it's unknown, * CPUFREQ_ETERNAL shall be used. */ diff --git a/kernel/module.c b/kernel/module.c index 5734ab09d3f..83b3d376708 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1758,6 +1758,7 @@ sys_init_module(void __user *umod, const char __user *uargs) { struct module *mod; + mm_segment_t old_fs = get_fs(); int ret = 0; /* Must have permission */ @@ -1775,6 +1776,9 @@ sys_init_module(void __user *umod, return PTR_ERR(mod); } + /* flush the icache in correct context */ + set_fs(KERNEL_DS); + /* Flush the instruction cache, since we've played with text */ if (mod->module_init) flush_icache_range((unsigned long)mod->module_init, @@ -1783,6 +1787,8 @@ sys_init_module(void __user *umod, flush_icache_range((unsigned long)mod->module_core, (unsigned long)mod->module_core + mod->core_size); + set_fs(old_fs); + /* Now sew it into the lists. They won't access us, since strong_try_module_get() will fail. */ stop_machine_run(__link_module, mod, NR_CPUS); |