1/* 2 * Copyright 2006 Andi Kleen, SUSE Labs. 3 * Subject to the GNU Public License, v.2 4 * 5 * Fast user context implementation of clock_gettime, gettimeofday, and time. 6 * 7 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> 8 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany 9 * 10 * The code should have no internal unresolved relocations. 11 * Check with readelf after changing. 12 */ 13 14#include <uapi/linux/time.h> 15#include <asm/vgtod.h> 16#include <asm/hpet.h> 17#include <asm/vvar.h> 18#include <asm/unistd.h> 19#include <asm/msr.h> 20#include <linux/math64.h> 21#include <linux/time.h> 22 23#define gtod (&VVAR(vsyscall_gtod_data)) 24 25extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); 26extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); 27extern time_t __vdso_time(time_t *t); 28 29#ifdef CONFIG_HPET_TIMER 30extern u8 hpet_page 31 __attribute__((visibility("hidden"))); 32 33static notrace cycle_t vread_hpet(void) 34{ 35 return *(const volatile u32 *)(&hpet_page + HPET_COUNTER); 36} 37#endif 38 39#ifndef BUILD_VDSO32 40 41#include <linux/kernel.h> 42#include <asm/vsyscall.h> 43#include <asm/fixmap.h> 44#include <asm/pvclock.h> 45 46notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 47{ 48 long ret; 49 asm("syscall" : "=a" (ret) : 50 "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); 51 return ret; 52} 53 54notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) 55{ 56 long ret; 57 58 asm("syscall" : "=a" (ret) : 59 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); 60 return ret; 61} 62 63#ifdef CONFIG_PARAVIRT_CLOCK 64 65static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu) 66{ 67 const struct pvclock_vsyscall_time_info *pvti_base; 68 int idx = cpu / (PAGE_SIZE/PVTI_SIZE); 69 int offset = cpu % (PAGE_SIZE/PVTI_SIZE); 70 71 BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END); 72 73 pvti_base = (struct pvclock_vsyscall_time_info *) 74 __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx); 75 76 return &pvti_base[offset]; 77} 78 79static notrace cycle_t vread_pvclock(int *mode) 80{ 81 const struct pvclock_vsyscall_time_info *pvti; 82 cycle_t ret; 83 u64 last; 84 u32 version; 85 u8 flags; 86 unsigned cpu, cpu1; 87 88 89 /* 90 * Note: hypervisor must guarantee that: 91 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. 92 * 2. that per-CPU pvclock time info is updated if the 93 * underlying CPU changes. 94 * 3. that version is increased whenever underlying CPU 95 * changes. 96 * 97 */ 98 do { 99 cpu = __getcpu() & VGETCPU_CPU_MASK; 100 /* TODO: We can put vcpu id into higher bits of pvti.version. 101 * This will save a couple of cycles by getting rid of 102 * __getcpu() calls (Gleb). 103 */ 104 105 pvti = get_pvti(cpu); 106 107 version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); 108 109 /* 110 * Test we're still on the cpu as well as the version. 111 * We could have been migrated just after the first 112 * vgetcpu but before fetching the version, so we 113 * wouldn't notice a version change. 114 */ 115 cpu1 = __getcpu() & VGETCPU_CPU_MASK; 116 } while (unlikely(cpu != cpu1 || 117 (pvti->pvti.version & 1) || 118 pvti->pvti.version != version)); 119 120 if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) 121 *mode = VCLOCK_NONE; 122 123 /* refer to tsc.c read_tsc() comment for rationale */ 124 last = gtod->cycle_last; 125 126 if (likely(ret >= last)) 127 return ret; 128 129 return last; 130} 131#endif 132 133#else 134 135notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 136{ 137 long ret; 138 139 asm( 140 "mov %%ebx, %%edx \n" 141 "mov %2, %%ebx \n" 142 "call __kernel_vsyscall \n" 143 "mov %%edx, %%ebx \n" 144 : "=a" (ret) 145 : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) 146 : "memory", "edx"); 147 return ret; 148} 149 150notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) 151{ 152 long ret; 153 154 asm( 155 "mov %%ebx, %%edx \n" 156 "mov %2, %%ebx \n" 157 "call __kernel_vsyscall \n" 158 "mov %%edx, %%ebx \n" 159 : "=a" (ret) 160 : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) 161 : "memory", "edx"); 162 return ret; 163} 164 165#ifdef CONFIG_PARAVIRT_CLOCK 166 167static notrace cycle_t vread_pvclock(int *mode) 168{ 169 *mode = VCLOCK_NONE; 170 return 0; 171} 172#endif 173 174#endif 175 176notrace static cycle_t vread_tsc(void) 177{ 178 cycle_t ret; 179 u64 last; 180 181 /* 182 * Empirically, a fence (of type that depends on the CPU) 183 * before rdtsc is enough to ensure that rdtsc is ordered 184 * with respect to loads. The various CPU manuals are unclear 185 * as to whether rdtsc can be reordered with later loads, 186 * but no one has ever seen it happen. 187 */ 188 rdtsc_barrier(); 189 ret = (cycle_t)__native_read_tsc(); 190 191 last = gtod->cycle_last; 192 193 if (likely(ret >= last)) 194 return ret; 195 196 /* 197 * GCC likes to generate cmov here, but this branch is extremely 198 * predictable (it's just a funciton of time and the likely is 199 * very likely) and there's a data dependence, so force GCC 200 * to generate a branch instead. I don't barrier() because 201 * we don't actually need a barrier, and if this function 202 * ever gets inlined it will generate worse code. 203 */ 204 asm volatile (""); 205 return last; 206} 207 208notrace static inline u64 vgetsns(int *mode) 209{ 210 u64 v; 211 cycles_t cycles; 212 213 if (gtod->vclock_mode == VCLOCK_TSC) 214 cycles = vread_tsc(); 215#ifdef CONFIG_HPET_TIMER 216 else if (gtod->vclock_mode == VCLOCK_HPET) 217 cycles = vread_hpet(); 218#endif 219#ifdef CONFIG_PARAVIRT_CLOCK 220 else if (gtod->vclock_mode == VCLOCK_PVCLOCK) 221 cycles = vread_pvclock(mode); 222#endif 223 else 224 return 0; 225 v = (cycles - gtod->cycle_last) & gtod->mask; 226 return v * gtod->mult; 227} 228 229/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ 230notrace static int __always_inline do_realtime(struct timespec *ts) 231{ 232 unsigned long seq; 233 u64 ns; 234 int mode; 235 236 do { 237 seq = gtod_read_begin(gtod); 238 mode = gtod->vclock_mode; 239 ts->tv_sec = gtod->wall_time_sec; 240 ns = gtod->wall_time_snsec; 241 ns += vgetsns(&mode); 242 ns >>= gtod->shift; 243 } while (unlikely(gtod_read_retry(gtod, seq))); 244 245 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 246 ts->tv_nsec = ns; 247 248 return mode; 249} 250 251notrace static int __always_inline do_monotonic(struct timespec *ts) 252{ 253 unsigned long seq; 254 u64 ns; 255 int mode; 256 257 do { 258 seq = gtod_read_begin(gtod); 259 mode = gtod->vclock_mode; 260 ts->tv_sec = gtod->monotonic_time_sec; 261 ns = gtod->monotonic_time_snsec; 262 ns += vgetsns(&mode); 263 ns >>= gtod->shift; 264 } while (unlikely(gtod_read_retry(gtod, seq))); 265 266 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 267 ts->tv_nsec = ns; 268 269 return mode; 270} 271 272notrace static void do_realtime_coarse(struct timespec *ts) 273{ 274 unsigned long seq; 275 do { 276 seq = gtod_read_begin(gtod); 277 ts->tv_sec = gtod->wall_time_coarse_sec; 278 ts->tv_nsec = gtod->wall_time_coarse_nsec; 279 } while (unlikely(gtod_read_retry(gtod, seq))); 280} 281 282notrace static void do_monotonic_coarse(struct timespec *ts) 283{ 284 unsigned long seq; 285 do { 286 seq = gtod_read_begin(gtod); 287 ts->tv_sec = gtod->monotonic_time_coarse_sec; 288 ts->tv_nsec = gtod->monotonic_time_coarse_nsec; 289 } while (unlikely(gtod_read_retry(gtod, seq))); 290} 291 292notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 293{ 294 switch (clock) { 295 case CLOCK_REALTIME: 296 if (do_realtime(ts) == VCLOCK_NONE) 297 goto fallback; 298 break; 299 case CLOCK_MONOTONIC: 300 if (do_monotonic(ts) == VCLOCK_NONE) 301 goto fallback; 302 break; 303 case CLOCK_REALTIME_COARSE: 304 do_realtime_coarse(ts); 305 break; 306 case CLOCK_MONOTONIC_COARSE: 307 do_monotonic_coarse(ts); 308 break; 309 default: 310 goto fallback; 311 } 312 313 return 0; 314fallback: 315 return vdso_fallback_gettime(clock, ts); 316} 317int clock_gettime(clockid_t, struct timespec *) 318 __attribute__((weak, alias("__vdso_clock_gettime"))); 319 320notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 321{ 322 if (likely(tv != NULL)) { 323 if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) 324 return vdso_fallback_gtod(tv, tz); 325 tv->tv_usec /= 1000; 326 } 327 if (unlikely(tz != NULL)) { 328 tz->tz_minuteswest = gtod->tz_minuteswest; 329 tz->tz_dsttime = gtod->tz_dsttime; 330 } 331 332 return 0; 333} 334int gettimeofday(struct timeval *, struct timezone *) 335 __attribute__((weak, alias("__vdso_gettimeofday"))); 336 337/* 338 * This will break when the xtime seconds get inaccurate, but that is 339 * unlikely 340 */ 341notrace time_t __vdso_time(time_t *t) 342{ 343 /* This is atomic on x86 so we don't need any locks. */ 344 time_t result = ACCESS_ONCE(gtod->wall_time_sec); 345 346 if (t) 347 *t = result; 348 return result; 349} 350int time(time_t *t) 351 __attribute__((weak, alias("__vdso_time"))); 352