1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
|
#include <linux/sched.h>
#include <linux/clocksource.h>
#include <linux/workqueue.h>
#include <linux/delay.h>
#include <linux/cpufreq.h>
#include <linux/jiffies.h>
#include <linux/init.h>
#include <linux/dmi.h>
#include <linux/percpu.h>
#include <asm/delay.h>
#include <asm/tsc.h>
#include <asm/io.h>
#include <asm/timer.h>
#include "mach_timer.h"
extern int tsc_unstable;
extern int tsc_disabled;
/* Accelerators for sched_clock()
* convert from cycles(64bits) => nanoseconds (64bits)
* basic equation:
* ns = cycles / (freq / ns_per_sec)
* ns = cycles * (ns_per_sec / freq)
* ns = cycles * (10^9 / (cpu_khz * 10^3))
* ns = cycles * (10^6 / cpu_khz)
*
* Then we use scaling math (suggested by george@mvista.com) to get:
* ns = cycles * (10^6 * SC / cpu_khz) / SC
* ns = cycles * cyc2ns_scale / SC
*
* And since SC is a constant power of two, we can convert the div
* into a shift.
*
* We can use khz divisor instead of mhz to keep a better precision, since
* cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
* (mathieu.desnoyers@polymtl.ca)
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
DEFINE_PER_CPU(unsigned long, cyc2ns);
void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
{
unsigned long long tsc_now, ns_now;
unsigned long flags, *scale;
local_irq_save(flags);
sched_clock_idle_sleep_event();
scale = &per_cpu(cyc2ns, cpu);
rdtscll(tsc_now);
ns_now = __cycles_2_ns(tsc_now);
if (cpu_khz)
*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
/*
* Start smoothly with the new frequency:
*/
sched_clock_idle_wakeup_event(0);
local_irq_restore(flags);
}
#ifdef CONFIG_CPU_FREQ
/*
* if the CPU frequency is scaled, TSC-based delays will need a different
* loops_per_jiffy value to function properly.
*/
static unsigned int ref_freq;
static unsigned long loops_per_jiffy_ref;
static unsigned long cpu_khz_ref;
static int
time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
{
struct cpufreq_freqs *freq = data;
if (!ref_freq) {
if (!freq->old){
ref_freq = freq->new;
return 0;
}
ref_freq = freq->old;
loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy;
cpu_khz_ref = cpu_khz;
}
if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
(val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
(val == CPUFREQ_RESUMECHANGE)) {
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
cpu_data(freq->cpu).loops_per_jiffy =
cpufreq_scale(loops_per_jiffy_ref,
ref_freq, freq->new);
if (cpu_khz) {
if (num_online_cpus() == 1)
cpu_khz = cpufreq_scale(cpu_khz_ref,
ref_freq, freq->new);
if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
tsc_khz = cpu_khz;
set_cyc2ns_scale(cpu_khz, freq->cpu);
/*
* TSC based sched_clock turns
* to junk w/ cpufreq
*/
mark_tsc_unstable("cpufreq changes");
}
}
}
return 0;
}
static struct notifier_block time_cpufreq_notifier_block = {
.notifier_call = time_cpufreq_notifier
};
static int __init cpufreq_tsc(void)
{
return cpufreq_register_notifier(&time_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
}
core_initcall(cpufreq_tsc);
#endif
/* clock source code */
static struct clocksource clocksource_tsc;
/*
* We compare the TSC to the cycle_last value in the clocksource
* structure to avoid a nasty time-warp issue. This can be observed in
* a very small window right after one CPU updated cycle_last under
* xtime lock and the other CPU reads a TSC value which is smaller
* than the cycle_last reference value due to a TSC which is slighty
* behind. This delta is nowhere else observable, but in that case it
* results in a forward time jump in the range of hours due to the
* unsigned delta calculation of the time keeping core code, which is
* necessary to support wrapping clocksources like pm timer.
*/
static cycle_t read_tsc(void)
{
cycle_t ret;
rdtscll(ret);
return ret >= clocksource_tsc.cycle_last ?
ret : clocksource_tsc.cycle_last;
}
static struct clocksource clocksource_tsc = {
.name = "tsc",
.rating = 300,
.read = read_tsc,
.mask = CLOCKSOURCE_MASK(64),
.mult = 0, /* to be set */
.shift = 22,
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_MUST_VERIFY,
};
void mark_tsc_unstable(char *reason)
{
if (!tsc_unstable) {
tsc_unstable = 1;
printk("Marking TSC unstable due to: %s.\n", reason);
/* Can be called before registration */
if (clocksource_tsc.mult)
clocksource_change_rating(&clocksource_tsc, 0);
else
clocksource_tsc.rating = 0;
}
}
EXPORT_SYMBOL_GPL(mark_tsc_unstable);
static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
{
printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
d->ident);
tsc_unstable = 1;
return 0;
}
/* List of systems that have known TSC problems */
static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
{
.callback = dmi_mark_tsc_unstable,
.ident = "IBM Thinkpad 380XD",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
},
},
{}
};
/*
* Make an educated guess if the TSC is trustworthy and synchronized
* over all CPUs.
*/
__cpuinit int unsynchronized_tsc(void)
{
if (!cpu_has_tsc || tsc_unstable)
return 1;
/* Anything with constant TSC should be synchronized */
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
return 0;
/*
* Intel systems are normally all synchronized.
* Exceptions must mark TSC as unstable:
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
/* assume multi socket systems are not synchronized: */
if (num_possible_cpus() > 1)
tsc_unstable = 1;
}
return tsc_unstable;
}
/*
* Geode_LX - the OLPC CPU has a possibly a very reliable TSC
*/
#ifdef CONFIG_MGEODE_LX
/* RTSC counts during suspend */
#define RTSC_SUSP 0x100
static void __init check_geode_tsc_reliable(void)
{
unsigned long res_low, res_high;
rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
if (res_low & RTSC_SUSP)
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
}
#else
static inline void check_geode_tsc_reliable(void) { }
#endif
void __init tsc_init(void)
{
int cpu;
u64 lpj;
if (!cpu_has_tsc || tsc_disabled > 0)
return;
cpu_khz = calculate_cpu_khz();
tsc_khz = cpu_khz;
if (!cpu_khz) {
mark_tsc_unstable("could not calculate TSC khz");
return;
}
lpj = ((u64)tsc_khz * 1000);
do_div(lpj, HZ);
lpj_fine = lpj;
/* now allow native_sched_clock() to use rdtsc */
tsc_disabled = 0;
printk("Detected %lu.%03lu MHz processor.\n",
(unsigned long)cpu_khz / 1000,
(unsigned long)cpu_khz % 1000);
/*
* Secondary CPUs do not run through tsc_init(), so set up
* all the scale factors for all CPUs, assuming the same
* speed as the bootup CPU. (cpufreq notifiers will fix this
* up if their speed diverges)
*/
for_each_possible_cpu(cpu)
set_cyc2ns_scale(cpu_khz, cpu);
use_tsc_delay();
/* Check and install the TSC clocksource */
dmi_check_system(bad_tsc_dmi_table);
unsynchronized_tsc();
check_geode_tsc_reliable();
clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
clocksource_tsc.shift);
/* lower the rating if we already know its unstable: */
if (check_tsc_unstable()) {
clocksource_tsc.rating = 0;
clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
}
clocksource_register(&clocksource_tsc);
}
|