xen-vtx-unstable

annotate linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c @ 6774:4d899a738d59

merge?
author cl349@firebug.cl.cam.ac.uk
date Tue Sep 13 15:05:49 2005 +0000 (2005-09-13)
parents 20b6be0e1fa1 1f460d0fd6c6
children e7c7196fa329 8ca0f98ba8e2
rev   line source
cl349@4087 1 /*
cl349@4087 2 * linux/arch/i386/kernel/time.c
cl349@4087 3 *
cl349@4087 4 * Copyright (C) 1991, 1992, 1995 Linus Torvalds
cl349@4087 5 *
cl349@4087 6 * This file contains the PC-specific time handling details:
cl349@4087 7 * reading the RTC at bootup, etc..
cl349@4087 8 * 1994-07-02 Alan Modra
cl349@4087 9 * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
cl349@4087 10 * 1995-03-26 Markus Kuhn
cl349@4087 11 * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
cl349@4087 12 * precision CMOS clock update
cl349@4087 13 * 1996-05-03 Ingo Molnar
cl349@4087 14 * fixed time warps in do_[slow|fast]_gettimeoffset()
cl349@4087 15 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
cl349@4087 16 * "A Kernel Model for Precision Timekeeping" by Dave Mills
cl349@4087 17 * 1998-09-05 (Various)
cl349@4087 18 * More robust do_fast_gettimeoffset() algorithm implemented
cl349@4087 19 * (works with APM, Cyrix 6x86MX and Centaur C6),
cl349@4087 20 * monotonic gettimeofday() with fast_get_timeoffset(),
cl349@4087 21 * drift-proof precision TSC calibration on boot
cl349@4087 22 * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
cl349@4087 23 * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
cl349@4087 24 * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
cl349@4087 25 * 1998-12-16 Andrea Arcangeli
cl349@4087 26 * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
cl349@4087 27 * because was not accounting lost_ticks.
cl349@4087 28 * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
cl349@4087 29 * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
cl349@4087 30 * serialize accesses to xtime/lost_ticks).
cl349@4087 31 */
cl349@4087 32
cl349@4087 33 #include <linux/errno.h>
cl349@4087 34 #include <linux/sched.h>
cl349@4087 35 #include <linux/kernel.h>
cl349@4087 36 #include <linux/param.h>
cl349@4087 37 #include <linux/string.h>
cl349@4087 38 #include <linux/mm.h>
cl349@4087 39 #include <linux/interrupt.h>
cl349@4087 40 #include <linux/time.h>
cl349@4087 41 #include <linux/delay.h>
cl349@4087 42 #include <linux/init.h>
cl349@4087 43 #include <linux/smp.h>
cl349@4087 44 #include <linux/module.h>
cl349@4087 45 #include <linux/sysdev.h>
cl349@4087 46 #include <linux/bcd.h>
cl349@4087 47 #include <linux/efi.h>
cl349@4087 48 #include <linux/mca.h>
cl349@4087 49 #include <linux/sysctl.h>
cl349@4112 50 #include <linux/percpu.h>
cl349@4087 51
cl349@4087 52 #include <asm/io.h>
cl349@4087 53 #include <asm/smp.h>
cl349@4087 54 #include <asm/irq.h>
cl349@4087 55 #include <asm/msr.h>
cl349@4087 56 #include <asm/delay.h>
cl349@4087 57 #include <asm/mpspec.h>
cl349@4087 58 #include <asm/uaccess.h>
cl349@4087 59 #include <asm/processor.h>
cl349@4087 60 #include <asm/timer.h>
cl349@4087 61
cl349@4087 62 #include "mach_time.h"
cl349@4087 63
cl349@4087 64 #include <linux/timex.h>
cl349@4087 65 #include <linux/config.h>
cl349@4087 66
cl349@4087 67 #include <asm/hpet.h>
cl349@4087 68
cl349@4087 69 #include <asm/arch_hooks.h>
cl349@4087 70
cl349@4087 71 #include "io_ports.h"
cl349@4087 72
kaf24@6003 73 #include <asm-xen/evtchn.h>
kaf24@6003 74
cl349@4087 75 extern spinlock_t i8259A_lock;
cl349@4087 76 int pit_latch_buggy; /* extern */
cl349@4087 77
cl349@4087 78 u64 jiffies_64 = INITIAL_JIFFIES;
cl349@4087 79
cl349@4087 80 EXPORT_SYMBOL(jiffies_64);
cl349@4087 81
kaf24@4475 82 #if defined(__x86_64__)
kaf24@4475 83 unsigned long vxtime_hz = PIT_TICK_RATE;
kaf24@4475 84 struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
kaf24@4475 85 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
kaf24@4475 86 unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
kaf24@4475 87 struct timespec __xtime __section_xtime;
kaf24@4475 88 struct timezone __sys_tz __section_sys_tz;
kaf24@4475 89 #endif
kaf24@4475 90
cl349@4501 91 #if defined(__x86_64__)
cl349@4501 92 unsigned int cpu_khz; /* Detected as we calibrate the TSC */
cl349@4501 93 #else
cl349@4087 94 unsigned long cpu_khz; /* Detected as we calibrate the TSC */
cl349@4501 95 #endif
cl349@4087 96
cl349@4087 97 extern unsigned long wall_jiffies;
cl349@4087 98
cl349@4087 99 DEFINE_SPINLOCK(rtc_lock);
cl349@4087 100
cl349@4087 101 DEFINE_SPINLOCK(i8253_lock);
cl349@4087 102 EXPORT_SYMBOL(i8253_lock);
cl349@4087 103
cl349@4087 104 extern struct init_timer_opts timer_tsc_init;
cl349@4087 105 extern struct timer_opts timer_tsc;
cl349@4087 106 struct timer_opts *cur_timer = &timer_tsc;
cl349@4087 107
cl349@4087 108 /* These are peridically updated in shared_info, and then copied here. */
kaf24@5812 109 struct shadow_time_info {
kaf24@5812 110 u64 tsc_timestamp; /* TSC at last update of time vals. */
kaf24@5812 111 u64 system_timestamp; /* Time, in nanosecs, since boot. */
kaf24@5812 112 u32 tsc_to_nsec_mul;
kaf24@5812 113 u32 tsc_to_usec_mul;
kaf24@5812 114 int tsc_shift;
kaf24@5812 115 u32 version;
kaf24@5812 116 };
kaf24@5812 117 static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
kaf24@6033 118 static struct timespec shadow_tv;
kaf24@6033 119 static u32 shadow_tv_version;
cl349@4087 120
cl349@4087 121 /* Keep track of last time we did processing/updating of jiffies and xtime. */
kaf24@4475 122 static u64 processed_system_time; /* System time (ns) at last processing. */
kaf24@4475 123 static DEFINE_PER_CPU(u64, processed_system_time);
cl349@4087 124
kaf24@6049 125 #define NS_PER_TICK (1000000000L/HZ)
cl349@4087 126
cl349@4087 127 static inline void __normalize_time(time_t *sec, s64 *nsec)
cl349@4087 128 {
cl349@4087 129 while (*nsec >= NSEC_PER_SEC) {
cl349@4087 130 (*nsec) -= NSEC_PER_SEC;
cl349@4087 131 (*sec)++;
cl349@4087 132 }
cl349@4087 133 while (*nsec < 0) {
cl349@4087 134 (*nsec) += NSEC_PER_SEC;
cl349@4087 135 (*sec)--;
cl349@4087 136 }
cl349@4087 137 }
cl349@4087 138
cl349@4087 139 /* Does this guest OS track Xen time, or set its wall clock independently? */
cl349@4087 140 static int independent_wallclock = 0;
cl349@4087 141 static int __init __independent_wallclock(char *str)
cl349@4087 142 {
cl349@4087 143 independent_wallclock = 1;
cl349@4087 144 return 1;
cl349@4087 145 }
cl349@4087 146 __setup("independent_wallclock", __independent_wallclock);
cl349@4087 147
kaf24@5812 148 int tsc_disable __initdata = 0;
kaf24@5812 149
kaf24@5812 150 static void delay_tsc(unsigned long loops)
kaf24@5812 151 {
kaf24@5812 152 unsigned long bclock, now;
kaf24@5812 153
kaf24@5812 154 rdtscl(bclock);
kaf24@5812 155 do
kaf24@5812 156 {
kaf24@5812 157 rep_nop();
kaf24@5812 158 rdtscl(now);
kaf24@5812 159 } while ((now-bclock) < loops);
kaf24@5812 160 }
kaf24@5812 161
kaf24@5812 162 struct timer_opts timer_tsc = {
kaf24@5812 163 .name = "tsc",
kaf24@5812 164 .delay = delay_tsc,
kaf24@5812 165 };
kaf24@5812 166
kaf24@6044 167 /*
kaf24@6044 168 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
kaf24@6044 169 * yielding a 64-bit result.
kaf24@6044 170 */
kaf24@6044 171 static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
kaf24@5812 172 {
kaf24@6044 173 u64 product;
kaf24@6076 174 #ifdef __i386__
kaf24@6048 175 u32 tmp1, tmp2;
kaf24@6076 176 #endif
kaf24@6044 177
kaf24@5812 178 if ( shift < 0 )
kaf24@6044 179 delta >>= -shift;
kaf24@6044 180 else
kaf24@6044 181 delta <<= shift;
kaf24@5812 182
kaf24@6076 183 #ifdef __i386__
kaf24@5812 184 __asm__ (
kaf24@6048 185 "mul %5 ; "
kaf24@6048 186 "mov %4,%%eax ; "
kaf24@6048 187 "mov %%edx,%4 ; "
kaf24@6048 188 "mul %5 ; "
kaf24@6048 189 "add %4,%%eax ; "
kaf24@6048 190 "xor %5,%5 ; "
kaf24@6048 191 "adc %5,%%edx ; "
kaf24@6048 192 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
kaf24@6048 193 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
kaf24@6076 194 #else
kaf24@6076 195 __asm__ (
kaf24@6076 196 "mul %%rdx ; shrd $32,%%rdx,%%rax"
kaf24@6076 197 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
kaf24@6076 198 #endif
kaf24@6044 199
kaf24@6044 200 return product;
kaf24@5812 201 }
kaf24@5812 202
kaf24@5812 203 void init_cpu_khz(void)
kaf24@5812 204 {
kaf24@5812 205 u64 __cpu_khz = 1000000ULL << 32;
kaf24@5812 206 struct vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_time[0];
kaf24@5812 207 do_div(__cpu_khz, info->tsc_to_system_mul);
kaf24@6044 208 if ( info->tsc_shift < 0 )
kaf24@6166 209 cpu_khz = __cpu_khz << -info->tsc_shift;
kaf24@6044 210 else
kaf24@6166 211 cpu_khz = __cpu_khz >> info->tsc_shift;
kaf24@5812 212 }
kaf24@5812 213
kaf24@5812 214 static u64 get_nsec_offset(struct shadow_time_info *shadow)
kaf24@5812 215 {
kaf24@6044 216 u64 now, delta;
kaf24@5812 217 rdtscll(now);
kaf24@6044 218 delta = now - shadow->tsc_timestamp;
kaf24@6044 219 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
kaf24@5812 220 }
kaf24@5812 221
kaf24@5812 222 static unsigned long get_usec_offset(struct shadow_time_info *shadow)
kaf24@5812 223 {
kaf24@6044 224 u64 now, delta;
kaf24@5812 225 rdtscll(now);
kaf24@6044 226 delta = now - shadow->tsc_timestamp;
kaf24@6044 227 return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift);
kaf24@5812 228 }
kaf24@5812 229
kaf24@6093 230 static void __update_wallclock(time_t sec, long nsec)
kaf24@6093 231 {
kaf24@6093 232 long wtm_nsec, xtime_nsec;
kaf24@6093 233 time_t wtm_sec, xtime_sec;
kaf24@6093 234 u64 tmp, wc_nsec;
kaf24@6093 235
kaf24@6093 236 /* Adjust wall-clock time base based on wall_jiffies ticks. */
kaf24@6093 237 wc_nsec = processed_system_time;
kaf24@6093 238 wc_nsec += (u64)sec * 1000000000ULL;
kaf24@6093 239 wc_nsec += (u64)nsec;
kaf24@6093 240 wc_nsec -= (jiffies - wall_jiffies) * (u64)(NSEC_PER_SEC / HZ);
kaf24@6093 241
kaf24@6093 242 /* Split wallclock base into seconds and nanoseconds. */
kaf24@6093 243 tmp = wc_nsec;
kaf24@6093 244 xtime_nsec = do_div(tmp, 1000000000);
kaf24@6093 245 xtime_sec = (time_t)tmp;
kaf24@6093 246
kaf24@6093 247 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec);
kaf24@6093 248 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec);
kaf24@6093 249
kaf24@6093 250 set_normalized_timespec(&xtime, xtime_sec, xtime_nsec);
kaf24@6093 251 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
kaf24@6093 252
kaf24@6093 253 time_adjust = 0; /* stop active adjtime() */
kaf24@6093 254 time_status |= STA_UNSYNC;
kaf24@6093 255 time_maxerror = NTP_PHASE_LIMIT;
kaf24@6093 256 time_esterror = NTP_PHASE_LIMIT;
kaf24@6093 257 }
kaf24@6093 258
kaf24@5812 259 static void update_wallclock(void)
kaf24@5812 260 {
kaf24@5812 261 shared_info_t *s = HYPERVISOR_shared_info;
kaf24@5812 262
kaf24@6033 263 do {
kaf24@6033 264 shadow_tv_version = s->wc_version;
kaf24@6033 265 rmb();
kaf24@6033 266 shadow_tv.tv_sec = s->wc_sec;
kaf24@6033 267 shadow_tv.tv_nsec = s->wc_nsec;
kaf24@6033 268 rmb();
kaf24@6033 269 }
kaf24@6033 270 while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version));
kaf24@5812 271
kaf24@6093 272 if (!independent_wallclock)
kaf24@6093 273 __update_wallclock(shadow_tv.tv_sec, shadow_tv.tv_nsec);
kaf24@5812 274 }
kaf24@5812 275
cl349@4087 276 /*
cl349@4087 277 * Reads a consistent set of time-base values from Xen, into a shadow data
kaf24@6026 278 * area.
cl349@4087 279 */
kaf24@6026 280 static void get_time_values_from_xen(void)
cl349@4087 281 {
kaf24@5812 282 shared_info_t *s = HYPERVISOR_shared_info;
kaf24@5812 283 struct vcpu_time_info *src;
kaf24@5812 284 struct shadow_time_info *dst;
kaf24@5812 285
kaf24@5812 286 src = &s->vcpu_time[smp_processor_id()];
kaf24@5812 287 dst = &per_cpu(shadow_time, smp_processor_id());
cl349@4087 288
cl349@4087 289 do {
kaf24@6033 290 dst->version = src->version;
cl349@4087 291 rmb();
kaf24@5812 292 dst->tsc_timestamp = src->tsc_timestamp;
kaf24@5812 293 dst->system_timestamp = src->system_time;
kaf24@5812 294 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
kaf24@5812 295 dst->tsc_shift = src->tsc_shift;
cl349@4087 296 rmb();
cl349@4087 297 }
kaf24@6033 298 while ((src->version & 1) | (dst->version ^ src->version));
kaf24@5812 299
kaf24@5812 300 dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
kaf24@5812 301 }
cl349@4087 302
kaf24@5812 303 static inline int time_values_up_to_date(int cpu)
kaf24@5812 304 {
kaf24@5812 305 struct vcpu_time_info *src;
kaf24@5812 306 struct shadow_time_info *dst;
kaf24@5812 307
smh22@5924 308 src = &HYPERVISOR_shared_info->vcpu_time[cpu];
smh22@5924 309 dst = &per_cpu(shadow_time, cpu);
kaf24@5812 310
kaf24@6033 311 return (dst->version == src->version);
cl349@4087 312 }
cl349@4087 313
cl349@4087 314 /*
vh249@5730 315 * This is a special lock that is owned by the CPU and holds the index
vh249@5730 316 * register we are working with. It is required for NMI access to the
vh249@5730 317 * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
vh249@5730 318 */
vh249@5730 319 volatile unsigned long cmos_lock = 0;
vh249@5730 320 EXPORT_SYMBOL(cmos_lock);
vh249@5730 321
vh249@5730 322 /* Routines for accessing the CMOS RAM/RTC. */
vh249@5730 323 unsigned char rtc_cmos_read(unsigned char addr)
vh249@5730 324 {
vh249@5730 325 unsigned char val;
vh249@5730 326 lock_cmos_prefix(addr);
vh249@5730 327 outb_p(addr, RTC_PORT(0));
vh249@5730 328 val = inb_p(RTC_PORT(1));
vh249@5730 329 lock_cmos_suffix(addr);
vh249@5730 330 return val;
vh249@5730 331 }
vh249@5730 332 EXPORT_SYMBOL(rtc_cmos_read);
vh249@5730 333
vh249@5730 334 void rtc_cmos_write(unsigned char val, unsigned char addr)
vh249@5730 335 {
vh249@5730 336 lock_cmos_prefix(addr);
vh249@5730 337 outb_p(addr, RTC_PORT(0));
vh249@5730 338 outb_p(val, RTC_PORT(1));
vh249@5730 339 lock_cmos_suffix(addr);
vh249@5730 340 }
vh249@5730 341 EXPORT_SYMBOL(rtc_cmos_write);
vh249@5730 342
vh249@5730 343 /*
cl349@4087 344 * This version of gettimeofday has microsecond resolution
cl349@4087 345 * and better than microsecond precision on fast x86 machines with TSC.
cl349@4087 346 */
cl349@4087 347 void do_gettimeofday(struct timeval *tv)
cl349@4087 348 {
cl349@4087 349 unsigned long seq;
cl349@4087 350 unsigned long usec, sec;
cl349@4087 351 unsigned long max_ntp_tick;
cl349@4087 352 s64 nsec;
kaf24@5812 353 unsigned int cpu;
kaf24@5812 354 struct shadow_time_info *shadow;
kaf24@6026 355 u32 local_time_version;
kaf24@5812 356
kaf24@5812 357 cpu = get_cpu();
kaf24@5812 358 shadow = &per_cpu(shadow_time, cpu);
cl349@4087 359
cl349@4087 360 do {
cl349@4087 361 unsigned long lost;
cl349@4087 362
kaf24@6026 363 local_time_version = shadow->version;
cl349@4087 364 seq = read_seqbegin(&xtime_lock);
cl349@4087 365
kaf24@5812 366 usec = get_usec_offset(shadow);
cl349@4087 367 lost = jiffies - wall_jiffies;
cl349@4087 368
cl349@4087 369 /*
cl349@4087 370 * If time_adjust is negative then NTP is slowing the clock
cl349@4087 371 * so make sure not to go into next possible interval.
cl349@4087 372 * Better to lose some accuracy than have time go backwards..
cl349@4087 373 */
cl349@4087 374 if (unlikely(time_adjust < 0)) {
cl349@4087 375 max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
cl349@4087 376 usec = min(usec, max_ntp_tick);
cl349@4087 377
cl349@4087 378 if (lost)
cl349@4087 379 usec += lost * max_ntp_tick;
cl349@4087 380 }
cl349@4087 381 else if (unlikely(lost))
cl349@4087 382 usec += lost * (USEC_PER_SEC / HZ);
cl349@4087 383
cl349@4087 384 sec = xtime.tv_sec;
cl349@4087 385 usec += (xtime.tv_nsec / NSEC_PER_USEC);
cl349@4087 386
kaf24@5812 387 nsec = shadow->system_timestamp - processed_system_time;
cl349@4087 388 __normalize_time(&sec, &nsec);
cl349@4087 389 usec += (long)nsec / NSEC_PER_USEC;
cl349@4087 390
kaf24@5812 391 if (unlikely(!time_values_up_to_date(cpu))) {
cl349@4087 392 /*
cl349@4087 393 * We may have blocked for a long time,
cl349@4087 394 * rendering our calculations invalid
cl349@4087 395 * (e.g. the time delta may have
cl349@4087 396 * overflowed). Detect that and recalculate
cl349@4087 397 * with fresh values.
cl349@4087 398 */
kaf24@6026 399 get_time_values_from_xen();
cl349@4087 400 continue;
cl349@4087 401 }
kaf24@6026 402 } while (read_seqretry(&xtime_lock, seq) ||
kaf24@6026 403 (local_time_version != shadow->version));
cl349@4087 404
kaf24@5812 405 put_cpu();
kaf24@5812 406
cl349@4087 407 while (usec >= USEC_PER_SEC) {
cl349@4087 408 usec -= USEC_PER_SEC;
cl349@4087 409 sec++;
cl349@4087 410 }
cl349@4087 411
cl349@4087 412 tv->tv_sec = sec;
cl349@4087 413 tv->tv_usec = usec;
cl349@4087 414 }
cl349@4087 415
cl349@4087 416 EXPORT_SYMBOL(do_gettimeofday);
cl349@4087 417
cl349@4087 418 int do_settimeofday(struct timespec *tv)
cl349@4087 419 {
kaf24@6093 420 time_t sec;
cl349@4087 421 s64 nsec;
kaf24@5812 422 unsigned int cpu;
kaf24@5812 423 struct shadow_time_info *shadow;
kaf24@6093 424 dom0_op_t op;
cl349@4087 425
cl349@4087 426 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
cl349@4087 427 return -EINVAL;
cl349@4087 428
kaf24@5812 429 cpu = get_cpu();
kaf24@5812 430 shadow = &per_cpu(shadow_time, cpu);
kaf24@5812 431
cl349@4087 432 write_seqlock_irq(&xtime_lock);
cl349@4087 433
cl349@4087 434 /*
cl349@4087 435 * Ensure we don't get blocked for a long time so that our time delta
cl349@4087 436 * overflows. If that were to happen then our shadow time values would
cl349@4087 437 * be stale, so we can retry with fresh ones.
cl349@4087 438 */
kaf24@6093 439 for ( ; ; ) {
kaf24@6093 440 nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow);
kaf24@6093 441 if (time_values_up_to_date(cpu))
kaf24@6093 442 break;
kaf24@6026 443 get_time_values_from_xen();
kaf24@6093 444 }
kaf24@6093 445 sec = tv->tv_sec;
kaf24@6093 446 __normalize_time(&sec, &nsec);
kaf24@6093 447
cl349@6618 448 if ((xen_start_info->flags & SIF_INITDOMAIN) &&
kaf24@6093 449 !independent_wallclock) {
kaf24@6093 450 op.cmd = DOM0_SETTIME;
kaf24@6093 451 op.u.settime.secs = sec;
kaf24@6093 452 op.u.settime.nsecs = nsec;
kaf24@6093 453 op.u.settime.system_time = shadow->system_timestamp;
kaf24@6093 454 HYPERVISOR_dom0_op(&op);
kaf24@6093 455 update_wallclock();
kaf24@6093 456 } else if (independent_wallclock) {
kaf24@6093 457 nsec -= shadow->system_timestamp;
kaf24@6093 458 __normalize_time(&sec, &nsec);
kaf24@6093 459 __update_wallclock(sec, nsec);
cl349@4087 460 }
cl349@4087 461
kaf24@6093 462 write_sequnlock_irq(&xtime_lock);
cl349@4087 463
kaf24@5812 464 put_cpu();
kaf24@5812 465
cl349@4087 466 clock_was_set();
cl349@4087 467 return 0;
cl349@4087 468 }
cl349@4087 469
cl349@4087 470 EXPORT_SYMBOL(do_settimeofday);
cl349@4087 471
cl349@4087 472 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
cl349@4087 473 static int set_rtc_mmss(unsigned long nowtime)
cl349@4087 474 {
cl349@4087 475 int retval;
cl349@4087 476
vh249@5730 477 WARN_ON(irqs_disabled());
vh249@5730 478
cl349@6618 479 if (!(xen_start_info->flags & SIF_INITDOMAIN))
kaf24@6093 480 return 0;
kaf24@6093 481
cl349@4087 482 /* gets recalled with irq locally disabled */
vh249@5730 483 spin_lock_irq(&rtc_lock);
cl349@4087 484 if (efi_enabled)
cl349@4087 485 retval = efi_set_rtc_mmss(nowtime);
cl349@4087 486 else
cl349@4087 487 retval = mach_set_rtc_mmss(nowtime);
vh249@5730 488 spin_unlock_irq(&rtc_lock);
cl349@4087 489
cl349@4087 490 return retval;
cl349@4087 491 }
vh249@5730 492 #else
vh249@5730 493 static int set_rtc_mmss(unsigned long nowtime)
vh249@5730 494 {
vh249@5730 495 return 0;
vh249@5730 496 }
cl349@4087 497 #endif
cl349@4087 498
cl349@4087 499 /* monotonic_clock(): returns # of nanoseconds passed since time_init()
cl349@4087 500 * Note: This function is required to return accurate
cl349@4087 501 * time even in the absence of multiple timer ticks.
cl349@4087 502 */
cl349@4087 503 unsigned long long monotonic_clock(void)
cl349@4087 504 {
kaf24@5812 505 int cpu = get_cpu();
kaf24@5812 506 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
kaf24@6026 507 u64 time;
kaf24@6026 508 u32 local_time_version;
kaf24@6026 509
kaf24@6026 510 do {
kaf24@6026 511 local_time_version = shadow->version;
kaf24@6026 512 smp_rmb();
kaf24@6026 513 time = shadow->system_timestamp + get_nsec_offset(shadow);
kaf24@6026 514 if (!time_values_up_to_date(cpu))
kaf24@6026 515 get_time_values_from_xen();
kaf24@6026 516 smp_rmb();
kaf24@6026 517 } while (local_time_version != shadow->version);
kaf24@5812 518
kaf24@5812 519 put_cpu();
kaf24@5812 520
kaf24@6026 521 return time;
cl349@4087 522 }
cl349@4087 523 EXPORT_SYMBOL(monotonic_clock);
cl349@4087 524
kaf24@5812 525 unsigned long long sched_clock(void)
kaf24@5812 526 {
kaf24@5812 527 return monotonic_clock();
kaf24@5812 528 }
kaf24@5812 529
cl349@4087 530 #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
cl349@4087 531 unsigned long profile_pc(struct pt_regs *regs)
cl349@4087 532 {
cl349@4087 533 unsigned long pc = instruction_pointer(regs);
cl349@4087 534
cl349@4087 535 if (in_lock_functions(pc))
cl349@4087 536 return *(unsigned long *)(regs->ebp + 4);
cl349@4087 537
cl349@4087 538 return pc;
cl349@4087 539 }
cl349@4087 540 EXPORT_SYMBOL(profile_pc);
cl349@4087 541 #endif
cl349@4087 542
kaf24@6193 543 irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
cl349@4087 544 {
kaf24@5812 545 s64 delta, delta_cpu;
kaf24@6719 546 int i, cpu = smp_processor_id();
kaf24@5812 547 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
cl349@4087 548
kaf24@6193 549 write_seqlock(&xtime_lock);
kaf24@6193 550
cl349@4087 551 do {
kaf24@6026 552 get_time_values_from_xen();
cl349@4087 553
kaf24@5812 554 delta = delta_cpu =
kaf24@5812 555 shadow->system_timestamp + get_nsec_offset(shadow);
kaf24@4475 556 delta -= processed_system_time;
kaf24@4475 557 delta_cpu -= per_cpu(processed_system_time, cpu);
cl349@4087 558 }
kaf24@5812 559 while (!time_values_up_to_date(cpu));
cl349@4087 560
kaf24@6033 561 if (unlikely(delta < (s64)-1000000) || unlikely(delta_cpu < 0)) {
kaf24@4475 562 printk("Timer ISR/%d: Time went backwards: "
kaf24@4475 563 "delta=%lld cpu_delta=%lld shadow=%lld "
kaf24@4475 564 "off=%lld processed=%lld cpu_processed=%lld\n",
kaf24@5812 565 cpu, delta, delta_cpu, shadow->system_timestamp,
kaf24@5812 566 (s64)get_nsec_offset(shadow),
kaf24@4475 567 processed_system_time,
kaf24@4475 568 per_cpu(processed_system_time, cpu));
kaf24@6719 569 for (i = 0; i < num_online_cpus(); i++)
kaf24@6719 570 printk(" %d: %lld\n", i,
kaf24@6719 571 per_cpu(processed_system_time, i));
cl349@4087 572 }
cl349@4087 573
kaf24@4475 574 /* System-wide jiffy work. */
cl349@4087 575 while (delta >= NS_PER_TICK) {
cl349@4087 576 delta -= NS_PER_TICK;
cl349@4087 577 processed_system_time += NS_PER_TICK;
cl349@4087 578 do_timer(regs);
kaf24@4475 579 }
kaf24@4475 580
kaf24@6193 581 if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
kaf24@6193 582 update_wallclock();
kaf24@6193 583 clock_was_set();
kaf24@6193 584 }
kaf24@6193 585
kaf24@6193 586 write_sequnlock(&xtime_lock);
kaf24@6193 587
kaf24@6193 588 /*
kaf24@6193 589 * Local CPU jiffy work. No need to hold xtime_lock, and I'm not sure
kaf24@6193 590 * if there is risk of deadlock if we do (since update_process_times
kaf24@6193 591 * may do scheduler rebalancing work and thus acquire runqueue locks).
kaf24@6193 592 */
kaf24@4475 593 while (delta_cpu >= NS_PER_TICK) {
kaf24@4475 594 delta_cpu -= NS_PER_TICK;
kaf24@4475 595 per_cpu(processed_system_time, cpu) += NS_PER_TICK;
cl349@4087 596 update_process_times(user_mode(regs));
kaf24@4475 597 profile_tick(CPU_PROFILING, regs);
cl349@4087 598 }
kaf24@6026 599
cl349@4087 600 return IRQ_HANDLED;
cl349@4087 601 }
cl349@4087 602
cl349@4087 603 /* not static: needed by APM */
cl349@4087 604 unsigned long get_cmos_time(void)
cl349@4087 605 {
cl349@4087 606 unsigned long retval;
cl349@4087 607
cl349@4087 608 spin_lock(&rtc_lock);
cl349@4087 609
cl349@4087 610 if (efi_enabled)
cl349@4087 611 retval = efi_get_time();
cl349@4087 612 else
cl349@4087 613 retval = mach_get_cmos_time();
cl349@4087 614
cl349@4087 615 spin_unlock(&rtc_lock);
cl349@4087 616
cl349@4087 617 return retval;
cl349@4087 618 }
vh249@5730 619 static void sync_cmos_clock(unsigned long dummy);
vh249@5730 620
vh249@5730 621 static struct timer_list sync_cmos_timer =
vh249@5730 622 TIMER_INITIALIZER(sync_cmos_clock, 0, 0);
vh249@5730 623
vh249@5730 624 static void sync_cmos_clock(unsigned long dummy)
vh249@5730 625 {
vh249@5730 626 struct timeval now, next;
vh249@5730 627 int fail = 1;
vh249@5730 628
vh249@5730 629 /*
vh249@5730 630 * If we have an externally synchronized Linux clock, then update
vh249@5730 631 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
vh249@5730 632 * called as close as possible to 500 ms before the new second starts.
vh249@5730 633 * This code is run on a timer. If the clock is set, that timer
vh249@5730 634 * may not expire at the correct time. Thus, we adjust...
vh249@5730 635 */
vh249@5730 636 if ((time_status & STA_UNSYNC) != 0)
vh249@5730 637 /*
vh249@5730 638 * Not synced, exit, do not restart a timer (if one is
vh249@5730 639 * running, let it run out).
vh249@5730 640 */
vh249@5730 641 return;
vh249@5730 642
vh249@5730 643 do_gettimeofday(&now);
vh249@5730 644 if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
vh249@5730 645 now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
vh249@5730 646 fail = set_rtc_mmss(now.tv_sec);
vh249@5730 647
vh249@5730 648 next.tv_usec = USEC_AFTER - now.tv_usec;
vh249@5730 649 if (next.tv_usec <= 0)
vh249@5730 650 next.tv_usec += USEC_PER_SEC;
vh249@5730 651
vh249@5730 652 if (!fail)
vh249@5730 653 next.tv_sec = 659;
vh249@5730 654 else
vh249@5730 655 next.tv_sec = 0;
vh249@5730 656
vh249@5730 657 if (next.tv_usec >= USEC_PER_SEC) {
vh249@5730 658 next.tv_sec++;
vh249@5730 659 next.tv_usec -= USEC_PER_SEC;
vh249@5730 660 }
vh249@5730 661 mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
vh249@5730 662 }
vh249@5730 663
vh249@5730 664 void notify_arch_cmos_timer(void)
vh249@5730 665 {
vh249@5730 666 mod_timer(&sync_cmos_timer, jiffies + 1);
vh249@5730 667 }
cl349@4087 668
cl349@4087 669 static long clock_cmos_diff, sleep_start;
cl349@4087 670
vh249@5730 671 static int timer_suspend(struct sys_device *dev, pm_message_t state)
cl349@4087 672 {
cl349@4087 673 /*
cl349@4087 674 * Estimate time zone so that set_time can update the clock
cl349@4087 675 */
cl349@4087 676 clock_cmos_diff = -get_cmos_time();
cl349@4087 677 clock_cmos_diff += get_seconds();
cl349@4087 678 sleep_start = get_cmos_time();
cl349@4087 679 return 0;
cl349@4087 680 }
cl349@4087 681
cl349@4087 682 static int timer_resume(struct sys_device *dev)
cl349@4087 683 {
cl349@4087 684 unsigned long flags;
cl349@4087 685 unsigned long sec;
cl349@4087 686 unsigned long sleep_length;
cl349@4087 687
cl349@4087 688 #ifdef CONFIG_HPET_TIMER
cl349@4087 689 if (is_hpet_enabled())
cl349@4087 690 hpet_reenable();
cl349@4087 691 #endif
cl349@4087 692 sec = get_cmos_time() + clock_cmos_diff;
cl349@4087 693 sleep_length = (get_cmos_time() - sleep_start) * HZ;
cl349@4087 694 write_seqlock_irqsave(&xtime_lock, flags);
cl349@4087 695 xtime.tv_sec = sec;
cl349@4087 696 xtime.tv_nsec = 0;
cl349@4087 697 write_sequnlock_irqrestore(&xtime_lock, flags);
cl349@4087 698 jiffies += sleep_length;
cl349@4087 699 wall_jiffies += sleep_length;
cl349@4087 700 return 0;
cl349@4087 701 }
cl349@4087 702
cl349@4087 703 static struct sysdev_class timer_sysclass = {
cl349@4087 704 .resume = timer_resume,
cl349@4087 705 .suspend = timer_suspend,
cl349@4087 706 set_kset_name("timer"),
cl349@4087 707 };
cl349@4087 708
cl349@4087 709
cl349@4087 710 /* XXX this driverfs stuff should probably go elsewhere later -john */
cl349@4087 711 static struct sys_device device_timer = {
cl349@4087 712 .id = 0,
cl349@4087 713 .cls = &timer_sysclass,
cl349@4087 714 };
cl349@4087 715
cl349@4087 716 static int time_init_device(void)
cl349@4087 717 {
cl349@4087 718 int error = sysdev_class_register(&timer_sysclass);
cl349@4087 719 if (!error)
cl349@4087 720 error = sysdev_register(&device_timer);
cl349@4087 721 return error;
cl349@4087 722 }
cl349@4087 723
cl349@4087 724 device_initcall(time_init_device);
cl349@4087 725
cl349@4087 726 #ifdef CONFIG_HPET_TIMER
cl349@4087 727 extern void (*late_time_init)(void);
cl349@4087 728 /* Duplicate of time_init() below, with hpet_enable part added */
vh249@5730 729 static void __init hpet_time_init(void)
cl349@4087 730 {
cl349@4087 731 xtime.tv_sec = get_cmos_time();
cl349@4087 732 xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
cl349@4087 733 set_normalized_timespec(&wall_to_monotonic,
cl349@4087 734 -xtime.tv_sec, -xtime.tv_nsec);
cl349@4087 735
vh249@5730 736 if ((hpet_enable() >= 0) && hpet_use_timer) {
cl349@4087 737 printk("Using HPET for base-timer\n");
cl349@4087 738 }
cl349@4087 739
cl349@4087 740 cur_timer = select_timer();
cl349@4087 741 printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
cl349@4087 742
cl349@4087 743 time_init_hook();
cl349@4087 744 }
cl349@4087 745 #endif
cl349@4087 746
cl349@4087 747 /* Dynamically-mapped IRQ. */
sos22@6268 748 DEFINE_PER_CPU(int, timer_irq);
cl349@4087 749
cl349@4087 750 static struct irqaction irq_timer = {
kaf24@4475 751 timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0",
cl349@4087 752 NULL, NULL
cl349@4087 753 };
cl349@4087 754
cl349@4087 755 void __init time_init(void)
cl349@4087 756 {
cl349@4087 757 #ifdef CONFIG_HPET_TIMER
cl349@4087 758 if (is_hpet_capable()) {
cl349@4087 759 /*
cl349@4087 760 * HPET initialization needs to do memory-mapped io. So, let
cl349@4087 761 * us do a late initialization after mem_init().
cl349@4087 762 */
cl349@4087 763 late_time_init = hpet_time_init;
cl349@4087 764 return;
cl349@4087 765 }
cl349@4087 766 #endif
kaf24@6026 767 get_time_values_from_xen();
kaf24@6081 768
kaf24@5812 769 processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
kaf24@4475 770 per_cpu(processed_system_time, 0) = processed_system_time;
cl349@4087 771
kaf24@6081 772 update_wallclock();
kaf24@6081 773
kaf24@5812 774 init_cpu_khz();
kaf24@6081 775 printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n",
kaf24@6081 776 cpu_khz / 1000, cpu_khz % 1000);
cl349@4087 777
kaf24@4475 778 #if defined(__x86_64__)
kaf24@4475 779 vxtime.mode = VXTIME_TSC;
kaf24@4475 780 vxtime.quot = (1000000L << 32) / vxtime_hz;
kaf24@4475 781 vxtime.tsc_quot = (1000L << 32) / cpu_khz;
kaf24@4475 782 vxtime.hz = vxtime_hz;
kaf24@4475 783 sync_core();
kaf24@4475 784 rdtscll(vxtime.last_tsc);
kaf24@4475 785 #endif
cl349@4087 786
kaf24@4475 787 per_cpu(timer_irq, 0) = bind_virq_to_irq(VIRQ_TIMER);
kaf24@4475 788 (void)setup_irq(per_cpu(timer_irq, 0), &irq_timer);
cl349@4087 789 }
cl349@4087 790
kaf24@5203 791 /* Convert jiffies to system time. */
kaf24@5203 792 static inline u64 jiffies_to_st(unsigned long j)
cl349@4087 793 {
kaf24@5203 794 unsigned long seq;
kaf24@5203 795 long delta;
kaf24@5203 796 u64 st;
kaf24@5203 797
kaf24@5203 798 do {
kaf24@5203 799 seq = read_seqbegin(&xtime_lock);
kaf24@5203 800 delta = j - jiffies;
kaf24@5203 801 /* NB. The next check can trigger in some wrap-around cases,
kaf24@5203 802 * but that's ok: we'll just end up with a shorter timeout. */
kaf24@5203 803 if (delta < 1)
kaf24@5203 804 delta = 1;
kaf24@5203 805 st = processed_system_time + (delta * NS_PER_TICK);
kaf24@5203 806 } while (read_seqretry(&xtime_lock, seq));
kaf24@5203 807
kaf24@5203 808 return st;
cl349@4087 809 }
cl349@4087 810
cl349@4087 811 /*
kaf24@5203 812 * stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu
kaf24@5203 813 * These functions are based on implementations from arch/s390/kernel/time.c
cl349@4087 814 */
kaf24@5203 815 void stop_hz_timer(void)
cl349@4087 816 {
kaf24@5203 817 unsigned int cpu = smp_processor_id();
kaf24@4295 818 unsigned long j;
kaf24@5203 819
kaf24@5203 820 /* s390 does this /before/ checking rcu_pending(). We copy them. */
kaf24@5203 821 cpu_set(cpu, nohz_cpu_mask);
cl349@4087 822
kaf24@5203 823 /* Leave ourselves in 'tick mode' if rcu or softirq pending. */
kaf24@5203 824 if (rcu_pending(cpu) || local_softirq_pending()) {
kaf24@5203 825 cpu_clear(cpu, nohz_cpu_mask);
kaf24@5201 826 j = jiffies + 1;
kaf24@5203 827 } else {
kaf24@5203 828 j = next_timer_interrupt();
kaf24@5203 829 }
cl349@4087 830
kaf24@5203 831 BUG_ON(HYPERVISOR_set_timer_op(jiffies_to_st(j)) != 0);
kaf24@5203 832 }
kaf24@5201 833
kaf24@5203 834 void start_hz_timer(void)
kaf24@5203 835 {
kaf24@5203 836 cpu_clear(smp_processor_id(), nohz_cpu_mask);
cl349@4087 837 }
cl349@4087 838
cl349@4087 839 void time_suspend(void)
cl349@4087 840 {
cl349@4087 841 /* nothing */
cl349@5957 842 teardown_irq(per_cpu(timer_irq, 0), &irq_timer);
cl349@5957 843 unbind_virq_from_irq(VIRQ_TIMER);
cl349@4087 844 }
cl349@4087 845
cl349@4087 846 /* No locking required. We are only CPU running, and interrupts are off. */
cl349@4087 847 void time_resume(void)
cl349@4087 848 {
kaf24@5812 849 init_cpu_khz();
cl349@4087 850
kaf24@6026 851 get_time_values_from_xen();
cl349@4087 852
kaf24@6081 853 processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
kaf24@4475 854 per_cpu(processed_system_time, 0) = processed_system_time;
cl349@5957 855
kaf24@6081 856 update_wallclock();
kaf24@6081 857
cl349@5957 858 per_cpu(timer_irq, 0) = bind_virq_to_irq(VIRQ_TIMER);
cl349@5957 859 (void)setup_irq(per_cpu(timer_irq, 0), &irq_timer);
cl349@4087 860 }
cl349@4087 861
cl349@4112 862 #ifdef CONFIG_SMP
kaf24@4491 863 static char timer_name[NR_CPUS][15];
cl349@5957 864 void local_setup_timer_irq(void)
cl349@5957 865 {
cl349@5957 866 int cpu = smp_processor_id();
cl349@5957 867
cl349@5957 868 if (cpu == 0)
cl349@5957 869 return;
cl349@5957 870 per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);
cl349@5957 871 sprintf(timer_name[cpu], "timer%d", cpu);
cl349@5957 872 BUG_ON(request_irq(per_cpu(timer_irq, cpu), timer_interrupt,
cl349@5957 873 SA_INTERRUPT, timer_name[cpu], NULL));
cl349@5957 874 }
cl349@5957 875
kaf24@4475 876 void local_setup_timer(void)
cl349@4112 877 {
kaf24@4475 878 int seq, cpu = smp_processor_id();
cl349@4112 879
cl349@4112 880 do {
kaf24@4475 881 seq = read_seqbegin(&xtime_lock);
kaf24@5812 882 per_cpu(processed_system_time, cpu) =
kaf24@5812 883 per_cpu(shadow_time, cpu).system_timestamp;
cl349@4112 884 } while (read_seqretry(&xtime_lock, seq));
cl349@4112 885
cl349@5957 886 local_setup_timer_irq();
cl349@5957 887 }
cl349@5957 888
cl349@5957 889 void local_teardown_timer_irq(void)
cl349@5957 890 {
cl349@5957 891 int cpu = smp_processor_id();
cl349@5957 892
cl349@5957 893 if (cpu == 0)
cl349@5957 894 return;
cl349@5957 895 free_irq(per_cpu(timer_irq, cpu), NULL);
cl349@5957 896 unbind_virq_from_irq(VIRQ_TIMER);
cl349@4112 897 }
cl349@4112 898 #endif
cl349@4112 899
cl349@4087 900 /*
cl349@4087 901 * /proc/sys/xen: This really belongs in another file. It can stay here for
cl349@4087 902 * now however.
cl349@4087 903 */
cl349@4087 904 static ctl_table xen_subtable[] = {
cl349@4087 905 {1, "independent_wallclock", &independent_wallclock,
cl349@4087 906 sizeof(independent_wallclock), 0644, NULL, proc_dointvec},
cl349@4087 907 {0}
cl349@4087 908 };
cl349@4087 909 static ctl_table xen_table[] = {
cl349@4087 910 {123, "xen", NULL, 0, 0555, xen_subtable},
cl349@4087 911 {0}
cl349@4087 912 };
cl349@4087 913 static int __init xen_sysctl_init(void)
cl349@4087 914 {
cl349@4087 915 (void)register_sysctl_table(xen_table, 0);
cl349@4087 916 return 0;
cl349@4087 917 }
cl349@4087 918 __initcall(xen_sysctl_init);
kaf24@5812 919
kaf24@5812 920 /*
kaf24@5812 921 * Local variables:
kaf24@5812 922 * c-file-style: "linux"
kaf24@5812 923 * indent-tabs-mode: t
kaf24@5812 924 * c-indent-level: 8
kaf24@5812 925 * c-basic-offset: 8
kaf24@5812 926 * tab-width: 8
kaf24@5812 927 * End:
kaf24@5812 928 */