/root/src/xen/xen/drivers/cpufreq/cpufreq_ondemand.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c |
3 | | * |
4 | | * Copyright (C) 2001 Russell King |
5 | | * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. |
6 | | * Jun Nakajima <jun.nakajima@intel.com> |
7 | | * Feb 2008 Liu Jinsong <jinsong.liu@intel.com> |
8 | | * Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor |
9 | | * |
10 | | * This program is free software; you can redistribute it and/or modify |
11 | | * it under the terms of the GNU General Public License version 2 as |
12 | | * published by the Free Software Foundation. |
13 | | */ |
14 | | |
15 | | #include <xen/types.h> |
16 | | #include <xen/percpu.h> |
17 | | #include <xen/cpumask.h> |
18 | | #include <xen/types.h> |
19 | | #include <xen/sched.h> |
20 | | #include <xen/timer.h> |
21 | | #include <acpi/cpufreq/cpufreq.h> |
22 | | |
23 | | #define DEF_FREQUENCY_UP_THRESHOLD (80) |
24 | 0 | #define MIN_FREQUENCY_UP_THRESHOLD (11) |
25 | 0 | #define MAX_FREQUENCY_UP_THRESHOLD (100) |
26 | | |
27 | 0 | #define MIN_DBS_INTERVAL (MICROSECS(100)) |
28 | 0 | #define MIN_SAMPLING_RATE_RATIO (2) |
29 | 0 | #define MIN_SAMPLING_MILLISECS (MIN_SAMPLING_RATE_RATIO * 10) |
30 | | #define MIN_STAT_SAMPLING_RATE \ |
31 | 0 | (MIN_SAMPLING_MILLISECS * MILLISECS(1)) |
32 | | #define MIN_SAMPLING_RATE \ |
33 | 0 | (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) |
34 | 0 | #define MAX_SAMPLING_RATE (500 * def_sampling_rate) |
35 | 0 | #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) |
36 | 0 | #define TRANSITION_LATENCY_LIMIT (10 * 1000 ) |
37 | | |
38 | | static uint64_t def_sampling_rate; |
39 | | static uint64_t usr_sampling_rate; |
40 | | |
41 | | /* Sampling types */ |
42 | | enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; |
43 | | |
44 | | static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); |
45 | | |
46 | | static unsigned int dbs_enable; /* number of CPUs using this policy */ |
47 | | |
48 | | static struct dbs_tuners { |
49 | | uint64_t sampling_rate; |
50 | | unsigned int up_threshold; |
51 | | unsigned int powersave_bias; |
52 | | } dbs_tuners_ins = { |
53 | | .sampling_rate = 0, |
54 | | .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, |
55 | | .powersave_bias = 0, |
56 | | }; |
57 | | |
58 | | static DEFINE_PER_CPU(struct timer, dbs_timer); |
59 | | |
60 | | int write_ondemand_sampling_rate(unsigned int sampling_rate) |
61 | 0 | { |
62 | 0 | if ( (sampling_rate > MAX_SAMPLING_RATE / MICROSECS(1)) || |
63 | 0 | (sampling_rate < MIN_SAMPLING_RATE / MICROSECS(1)) ) |
64 | 0 | return -EINVAL; |
65 | 0 |
|
66 | 0 | dbs_tuners_ins.sampling_rate = sampling_rate * MICROSECS(1); |
67 | 0 | return 0; |
68 | 0 | } |
69 | | |
70 | | int write_ondemand_up_threshold(unsigned int up_threshold) |
71 | 0 | { |
72 | 0 | if ( (up_threshold > MAX_FREQUENCY_UP_THRESHOLD) || |
73 | 0 | (up_threshold < MIN_FREQUENCY_UP_THRESHOLD) ) |
74 | 0 | return -EINVAL; |
75 | 0 |
|
76 | 0 | dbs_tuners_ins.up_threshold = up_threshold; |
77 | 0 | return 0; |
78 | 0 | } |
79 | | |
80 | | int get_cpufreq_ondemand_para(uint32_t *sampling_rate_max, |
81 | | uint32_t *sampling_rate_min, |
82 | | uint32_t *sampling_rate, |
83 | | uint32_t *up_threshold) |
84 | 0 | { |
85 | 0 | if (!sampling_rate_max || !sampling_rate_min || |
86 | 0 | !sampling_rate || !up_threshold) |
87 | 0 | return -EINVAL; |
88 | 0 |
|
89 | 0 | *sampling_rate_max = MAX_SAMPLING_RATE/MICROSECS(1); |
90 | 0 | *sampling_rate_min = MIN_SAMPLING_RATE/MICROSECS(1); |
91 | 0 | *sampling_rate = dbs_tuners_ins.sampling_rate / MICROSECS(1); |
92 | 0 | *up_threshold = dbs_tuners_ins.up_threshold; |
93 | 0 |
|
94 | 0 | return 0; |
95 | 0 | } |
96 | | |
97 | | static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) |
98 | 0 | { |
99 | 0 | uint64_t cur_ns, total_ns; |
100 | 0 | uint64_t max_load_freq = 0; |
101 | 0 | struct cpufreq_policy *policy; |
102 | 0 | unsigned int max; |
103 | 0 | unsigned int j; |
104 | 0 |
|
105 | 0 | if (!this_dbs_info->enable) |
106 | 0 | return; |
107 | 0 |
|
108 | 0 | policy = this_dbs_info->cur_policy; |
109 | 0 | max = policy->max; |
110 | 0 |
|
111 | 0 | if (unlikely(policy->resume)) { |
112 | 0 | __cpufreq_driver_target(policy, max,CPUFREQ_RELATION_H); |
113 | 0 | return; |
114 | 0 | } |
115 | 0 |
|
116 | 0 | cur_ns = NOW(); |
117 | 0 | total_ns = cur_ns - this_dbs_info->prev_cpu_wall; |
118 | 0 | this_dbs_info->prev_cpu_wall = NOW(); |
119 | 0 |
|
120 | 0 | if (total_ns < MIN_DBS_INTERVAL) |
121 | 0 | return; |
122 | 0 |
|
123 | 0 | /* Get Idle Time */ |
124 | 0 | for_each_cpu(j, policy->cpus) { |
125 | 0 | uint64_t idle_ns, total_idle_ns; |
126 | 0 | uint64_t load, load_freq, freq_avg; |
127 | 0 | struct cpu_dbs_info_s *j_dbs_info; |
128 | 0 |
|
129 | 0 | j_dbs_info = &per_cpu(cpu_dbs_info, j); |
130 | 0 | total_idle_ns = get_cpu_idle_time(j); |
131 | 0 | idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle; |
132 | 0 | j_dbs_info->prev_cpu_idle = total_idle_ns; |
133 | 0 |
|
134 | 0 | if (unlikely(total_ns < idle_ns)) |
135 | 0 | continue; |
136 | 0 |
|
137 | 0 | load = 100 * (total_ns - idle_ns) / total_ns; |
138 | 0 |
|
139 | 0 | freq_avg = cpufreq_driver_getavg(j, GOV_GETAVG); |
140 | 0 |
|
141 | 0 | load_freq = load * freq_avg; |
142 | 0 | if (load_freq > max_load_freq) |
143 | 0 | max_load_freq = load_freq; |
144 | 0 | } |
145 | 0 |
|
146 | 0 | /* Check for frequency increase */ |
147 | 0 | if (max_load_freq > (uint64_t) dbs_tuners_ins.up_threshold * policy->cur) { |
148 | 0 | /* if we are already at full speed then break out early */ |
149 | 0 | if (policy->cur == max) |
150 | 0 | return; |
151 | 0 | __cpufreq_driver_target(policy, max, CPUFREQ_RELATION_H); |
152 | 0 | return; |
153 | 0 | } |
154 | 0 |
|
155 | 0 | /* Check for frequency decrease */ |
156 | 0 | /* if we cannot reduce the frequency anymore, break out early */ |
157 | 0 | if (policy->cur == policy->min) |
158 | 0 | return; |
159 | 0 |
|
160 | 0 | /* |
161 | 0 | * The optimal frequency is the frequency that is the lowest that |
162 | 0 | * can support the current CPU usage without triggering the up |
163 | 0 | * policy. To be safe, we focus 10 points under the threshold. |
164 | 0 | */ |
165 | 0 | if (max_load_freq |
166 | 0 | < (uint64_t) (dbs_tuners_ins.up_threshold - 10) * policy->cur) { |
167 | 0 | uint64_t freq_next; |
168 | 0 |
|
169 | 0 | freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - 10); |
170 | 0 |
|
171 | 0 | __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); |
172 | 0 | } |
173 | 0 | } |
174 | | |
175 | | static void do_dbs_timer(void *dbs) |
176 | 0 | { |
177 | 0 | struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs; |
178 | 0 |
|
179 | 0 | if (!dbs_info->enable) |
180 | 0 | return; |
181 | 0 |
|
182 | 0 | dbs_check_cpu(dbs_info); |
183 | 0 |
|
184 | 0 | set_timer(&per_cpu(dbs_timer, dbs_info->cpu), |
185 | 0 | align_timer(NOW() , dbs_tuners_ins.sampling_rate)); |
186 | 0 | } |
187 | | |
188 | | static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) |
189 | 0 | { |
190 | 0 | dbs_info->enable = 1; |
191 | 0 |
|
192 | 0 | init_timer(&per_cpu(dbs_timer, dbs_info->cpu), do_dbs_timer, |
193 | 0 | (void *)dbs_info, dbs_info->cpu); |
194 | 0 |
|
195 | 0 | set_timer(&per_cpu(dbs_timer, dbs_info->cpu), NOW()+dbs_tuners_ins.sampling_rate); |
196 | 0 |
|
197 | 0 | if ( processor_pminfo[dbs_info->cpu]->perf.shared_type |
198 | 0 | == CPUFREQ_SHARED_TYPE_HW ) |
199 | 0 | { |
200 | 0 | dbs_info->stoppable = 1; |
201 | 0 | } |
202 | 0 | } |
203 | | |
204 | | static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) |
205 | 0 | { |
206 | 0 | dbs_info->enable = 0; |
207 | 0 | dbs_info->stoppable = 0; |
208 | 0 | kill_timer(&per_cpu(dbs_timer, dbs_info->cpu)); |
209 | 0 | } |
210 | | |
211 | | int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) |
212 | 0 | { |
213 | 0 | unsigned int cpu = policy->cpu; |
214 | 0 | struct cpu_dbs_info_s *this_dbs_info; |
215 | 0 | unsigned int j; |
216 | 0 |
|
217 | 0 | this_dbs_info = &per_cpu(cpu_dbs_info, cpu); |
218 | 0 |
|
219 | 0 | switch (event) { |
220 | 0 | case CPUFREQ_GOV_START: |
221 | 0 | if ((!cpu_online(cpu)) || (!policy->cur)) |
222 | 0 | return -EINVAL; |
223 | 0 |
|
224 | 0 | if (policy->cpuinfo.transition_latency > |
225 | 0 | (TRANSITION_LATENCY_LIMIT * 1000)) { |
226 | 0 | printk(KERN_WARNING "ondemand governor failed to load " |
227 | 0 | "due to too long transition latency\n"); |
228 | 0 | return -EINVAL; |
229 | 0 | } |
230 | 0 | if (this_dbs_info->enable) |
231 | 0 | /* Already enabled */ |
232 | 0 | break; |
233 | 0 |
|
234 | 0 | dbs_enable++; |
235 | 0 |
|
236 | 0 | for_each_cpu(j, policy->cpus) { |
237 | 0 | struct cpu_dbs_info_s *j_dbs_info; |
238 | 0 | j_dbs_info = &per_cpu(cpu_dbs_info, j); |
239 | 0 | j_dbs_info->cur_policy = policy; |
240 | 0 |
|
241 | 0 | j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j); |
242 | 0 | j_dbs_info->prev_cpu_wall = NOW(); |
243 | 0 | } |
244 | 0 | this_dbs_info->cpu = cpu; |
245 | 0 | /* |
246 | 0 | * Start the timerschedule work, when this governor |
247 | 0 | * is used for first time |
248 | 0 | */ |
249 | 0 | if ((dbs_enable == 1) && !dbs_tuners_ins.sampling_rate) { |
250 | 0 | def_sampling_rate = (uint64_t) policy->cpuinfo.transition_latency * |
251 | 0 | DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; |
252 | 0 |
|
253 | 0 | if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) |
254 | 0 | def_sampling_rate = MIN_STAT_SAMPLING_RATE; |
255 | 0 |
|
256 | 0 | if (!usr_sampling_rate) |
257 | 0 | dbs_tuners_ins.sampling_rate = def_sampling_rate; |
258 | 0 | else if (usr_sampling_rate < MIN_SAMPLING_RATE) { |
259 | 0 | printk(KERN_WARNING "cpufreq/ondemand: " |
260 | 0 | "specified sampling rate too low, using %"PRIu64"\n", |
261 | 0 | MIN_SAMPLING_RATE); |
262 | 0 | dbs_tuners_ins.sampling_rate = MIN_SAMPLING_RATE; |
263 | 0 | } else if (usr_sampling_rate > MAX_SAMPLING_RATE) { |
264 | 0 | printk(KERN_WARNING "cpufreq/ondemand: " |
265 | 0 | "specified sampling rate too high, using %"PRIu64"\n", |
266 | 0 | MAX_SAMPLING_RATE); |
267 | 0 | dbs_tuners_ins.sampling_rate = MAX_SAMPLING_RATE; |
268 | 0 | } else |
269 | 0 | dbs_tuners_ins.sampling_rate = usr_sampling_rate; |
270 | 0 | } |
271 | 0 | dbs_timer_init(this_dbs_info); |
272 | 0 |
|
273 | 0 | break; |
274 | 0 |
|
275 | 0 | case CPUFREQ_GOV_STOP: |
276 | 0 | if ( !this_dbs_info->enable ) |
277 | 0 | /* Already not enabled */ |
278 | 0 | break; |
279 | 0 |
|
280 | 0 | dbs_timer_exit(this_dbs_info); |
281 | 0 | dbs_enable--; |
282 | 0 |
|
283 | 0 | break; |
284 | 0 |
|
285 | 0 | case CPUFREQ_GOV_LIMITS: |
286 | 0 | if ( this_dbs_info->cur_policy == NULL ) |
287 | 0 | { |
288 | 0 | printk(KERN_WARNING "CPU%d ondemand governor not started yet," |
289 | 0 | "unable to GOV_LIMIT\n", cpu); |
290 | 0 | return -EINVAL; |
291 | 0 | } |
292 | 0 | if (policy->max < this_dbs_info->cur_policy->cur) |
293 | 0 | __cpufreq_driver_target(this_dbs_info->cur_policy, |
294 | 0 | policy->max, CPUFREQ_RELATION_H); |
295 | 0 | else if (policy->min > this_dbs_info->cur_policy->cur) |
296 | 0 | __cpufreq_driver_target(this_dbs_info->cur_policy, |
297 | 0 | policy->min, CPUFREQ_RELATION_L); |
298 | 0 | break; |
299 | 0 | } |
300 | 0 | return 0; |
301 | 0 | } |
302 | | |
303 | | static bool_t __init cpufreq_dbs_handle_option(const char *name, const char *val) |
304 | 0 | { |
305 | 0 | if ( !strcmp(name, "rate") && val ) |
306 | 0 | { |
307 | 0 | usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1); |
308 | 0 | } |
309 | 0 | else if ( !strcmp(name, "up_threshold") && val ) |
310 | 0 | { |
311 | 0 | unsigned long tmp = simple_strtoul(val, NULL, 0); |
312 | 0 |
|
313 | 0 | if ( tmp < MIN_FREQUENCY_UP_THRESHOLD ) |
314 | 0 | { |
315 | 0 | printk(XENLOG_WARNING "cpufreq/ondemand: " |
316 | 0 | "specified threshold too low, using %d\n", |
317 | 0 | MIN_FREQUENCY_UP_THRESHOLD); |
318 | 0 | tmp = MIN_FREQUENCY_UP_THRESHOLD; |
319 | 0 | } |
320 | 0 | else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD ) |
321 | 0 | { |
322 | 0 | printk(XENLOG_WARNING "cpufreq/ondemand: " |
323 | 0 | "specified threshold too high, using %d\n", |
324 | 0 | MAX_FREQUENCY_UP_THRESHOLD); |
325 | 0 | tmp = MAX_FREQUENCY_UP_THRESHOLD; |
326 | 0 | } |
327 | 0 | dbs_tuners_ins.up_threshold = tmp; |
328 | 0 | } |
329 | 0 | else if ( !strcmp(name, "bias") && val ) |
330 | 0 | { |
331 | 0 | unsigned long tmp = simple_strtoul(val, NULL, 0); |
332 | 0 |
|
333 | 0 | if ( tmp > 1000 ) |
334 | 0 | { |
335 | 0 | printk(XENLOG_WARNING "cpufreq/ondemand: " |
336 | 0 | "specified bias too high, using 1000\n"); |
337 | 0 | tmp = 1000; |
338 | 0 | } |
339 | 0 | dbs_tuners_ins.powersave_bias = tmp; |
340 | 0 | } |
341 | 0 | else |
342 | 0 | return 0; |
343 | 0 | return 1; |
344 | 0 | } |
345 | | |
346 | | struct cpufreq_governor cpufreq_gov_dbs = { |
347 | | .name = "ondemand", |
348 | | .governor = cpufreq_governor_dbs, |
349 | | .handle_option = cpufreq_dbs_handle_option |
350 | | }; |
351 | | |
352 | | static int __init cpufreq_gov_dbs_init(void) |
353 | 1 | { |
354 | 1 | return cpufreq_register_governor(&cpufreq_gov_dbs); |
355 | 1 | } |
356 | | __initcall(cpufreq_gov_dbs_init); |
357 | | |
358 | | void cpufreq_dbs_timer_suspend(void) |
359 | 1.86M | { |
360 | 1.86M | int cpu; |
361 | 1.86M | |
362 | 1.86M | cpu = smp_processor_id(); |
363 | 1.86M | |
364 | 1.86M | if ( per_cpu(cpu_dbs_info,cpu).stoppable ) |
365 | 0 | { |
366 | 0 | stop_timer( &per_cpu(dbs_timer, cpu) ); |
367 | 0 | } |
368 | 1.86M | } |
369 | | |
370 | | void cpufreq_dbs_timer_resume(void) |
371 | 1.79M | { |
372 | 1.79M | int cpu; |
373 | 1.79M | struct timer* t; |
374 | 1.79M | s_time_t now; |
375 | 1.79M | |
376 | 1.79M | cpu = smp_processor_id(); |
377 | 1.79M | |
378 | 1.79M | if ( per_cpu(cpu_dbs_info,cpu).stoppable ) |
379 | 0 | { |
380 | 0 | now = NOW(); |
381 | 0 | t = &per_cpu(dbs_timer, cpu); |
382 | 0 | if (t->expires <= now) |
383 | 0 | { |
384 | 0 | t->function(t->data); |
385 | 0 | } |
386 | 0 | else |
387 | 0 | { |
388 | 0 | set_timer(t, align_timer(now , dbs_tuners_ins.sampling_rate)); |
389 | 0 | } |
390 | 0 | } |
391 | 1.79M | } |