1/*
2 * Copyright 2006 Andi Kleen, SUSE Labs.
3 * Subject to the GNU Public License, v.2
4 *
5 * Fast user context implementation of clock_gettime, gettimeofday, and time.
6 *
7 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
8 *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
9 *
10 * The code should have no internal unresolved relocations.
11 * Check with readelf after changing.
12 */
13
14#include <uapi/linux/time.h>
15#include <asm/vgtod.h>
16#include <asm/hpet.h>
17#include <asm/vvar.h>
18#include <asm/unistd.h>
19#include <asm/msr.h>
20#include <linux/math64.h>
21#include <linux/time.h>
22
23#define gtod (&VVAR(vsyscall_gtod_data))
24
25extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
26extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
27extern time_t __vdso_time(time_t *t);
28
29#ifdef CONFIG_HPET_TIMER
30extern u8 hpet_page
31	__attribute__((visibility("hidden")));
32
33static notrace cycle_t vread_hpet(void)
34{
35	return *(const volatile u32 *)(&hpet_page + HPET_COUNTER);
36}
37#endif
38
39#ifndef BUILD_VDSO32
40
41#include <linux/kernel.h>
42#include <asm/vsyscall.h>
43#include <asm/fixmap.h>
44#include <asm/pvclock.h>
45
46notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
47{
48	long ret;
49	asm("syscall" : "=a" (ret) :
50	    "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
51	return ret;
52}
53
54notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
55{
56	long ret;
57
58	asm("syscall" : "=a" (ret) :
59	    "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
60	return ret;
61}
62
63#ifdef CONFIG_PARAVIRT_CLOCK
64
65static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
66{
67	const struct pvclock_vsyscall_time_info *pvti_base;
68	int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
69	int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
70
71	BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
72
73	pvti_base = (struct pvclock_vsyscall_time_info *)
74		    __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
75
76	return &pvti_base[offset];
77}
78
79static notrace cycle_t vread_pvclock(int *mode)
80{
81	const struct pvclock_vsyscall_time_info *pvti;
82	cycle_t ret;
83	u64 last;
84	u32 version;
85	u8 flags;
86	unsigned cpu, cpu1;
87
88
89	/*
90	 * Note: hypervisor must guarantee that:
91	 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
92	 * 2. that per-CPU pvclock time info is updated if the
93	 *    underlying CPU changes.
94	 * 3. that version is increased whenever underlying CPU
95	 *    changes.
96	 *
97	 */
98	do {
99		cpu = __getcpu() & VGETCPU_CPU_MASK;
100		/* TODO: We can put vcpu id into higher bits of pvti.version.
101		 * This will save a couple of cycles by getting rid of
102		 * __getcpu() calls (Gleb).
103		 */
104
105		pvti = get_pvti(cpu);
106
107		version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
108
109		/*
110		 * Test we're still on the cpu as well as the version.
111		 * We could have been migrated just after the first
112		 * vgetcpu but before fetching the version, so we
113		 * wouldn't notice a version change.
114		 */
115		cpu1 = __getcpu() & VGETCPU_CPU_MASK;
116	} while (unlikely(cpu != cpu1 ||
117			  (pvti->pvti.version & 1) ||
118			  pvti->pvti.version != version));
119
120	if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
121		*mode = VCLOCK_NONE;
122
123	/* refer to tsc.c read_tsc() comment for rationale */
124	last = gtod->cycle_last;
125
126	if (likely(ret >= last))
127		return ret;
128
129	return last;
130}
131#endif
132
133#else
134
135notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
136{
137	long ret;
138
139	asm(
140		"mov %%ebx, %%edx \n"
141		"mov %2, %%ebx \n"
142		"call __kernel_vsyscall \n"
143		"mov %%edx, %%ebx \n"
144		: "=a" (ret)
145		: "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
146		: "memory", "edx");
147	return ret;
148}
149
150notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
151{
152	long ret;
153
154	asm(
155		"mov %%ebx, %%edx \n"
156		"mov %2, %%ebx \n"
157		"call __kernel_vsyscall \n"
158		"mov %%edx, %%ebx \n"
159		: "=a" (ret)
160		: "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
161		: "memory", "edx");
162	return ret;
163}
164
165#ifdef CONFIG_PARAVIRT_CLOCK
166
167static notrace cycle_t vread_pvclock(int *mode)
168{
169	*mode = VCLOCK_NONE;
170	return 0;
171}
172#endif
173
174#endif
175
176notrace static cycle_t vread_tsc(void)
177{
178	cycle_t ret;
179	u64 last;
180
181	/*
182	 * Empirically, a fence (of type that depends on the CPU)
183	 * before rdtsc is enough to ensure that rdtsc is ordered
184	 * with respect to loads.  The various CPU manuals are unclear
185	 * as to whether rdtsc can be reordered with later loads,
186	 * but no one has ever seen it happen.
187	 */
188	rdtsc_barrier();
189	ret = (cycle_t)__native_read_tsc();
190
191	last = gtod->cycle_last;
192
193	if (likely(ret >= last))
194		return ret;
195
196	/*
197	 * GCC likes to generate cmov here, but this branch is extremely
198	 * predictable (it's just a funciton of time and the likely is
199	 * very likely) and there's a data dependence, so force GCC
200	 * to generate a branch instead.  I don't barrier() because
201	 * we don't actually need a barrier, and if this function
202	 * ever gets inlined it will generate worse code.
203	 */
204	asm volatile ("");
205	return last;
206}
207
208notrace static inline u64 vgetsns(int *mode)
209{
210	u64 v;
211	cycles_t cycles;
212
213	if (gtod->vclock_mode == VCLOCK_TSC)
214		cycles = vread_tsc();
215#ifdef CONFIG_HPET_TIMER
216	else if (gtod->vclock_mode == VCLOCK_HPET)
217		cycles = vread_hpet();
218#endif
219#ifdef CONFIG_PARAVIRT_CLOCK
220	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
221		cycles = vread_pvclock(mode);
222#endif
223	else
224		return 0;
225	v = (cycles - gtod->cycle_last) & gtod->mask;
226	return v * gtod->mult;
227}
228
229/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
230notrace static int __always_inline do_realtime(struct timespec *ts)
231{
232	unsigned long seq;
233	u64 ns;
234	int mode;
235
236	do {
237		seq = gtod_read_begin(gtod);
238		mode = gtod->vclock_mode;
239		ts->tv_sec = gtod->wall_time_sec;
240		ns = gtod->wall_time_snsec;
241		ns += vgetsns(&mode);
242		ns >>= gtod->shift;
243	} while (unlikely(gtod_read_retry(gtod, seq)));
244
245	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
246	ts->tv_nsec = ns;
247
248	return mode;
249}
250
251notrace static int __always_inline do_monotonic(struct timespec *ts)
252{
253	unsigned long seq;
254	u64 ns;
255	int mode;
256
257	do {
258		seq = gtod_read_begin(gtod);
259		mode = gtod->vclock_mode;
260		ts->tv_sec = gtod->monotonic_time_sec;
261		ns = gtod->monotonic_time_snsec;
262		ns += vgetsns(&mode);
263		ns >>= gtod->shift;
264	} while (unlikely(gtod_read_retry(gtod, seq)));
265
266	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
267	ts->tv_nsec = ns;
268
269	return mode;
270}
271
272notrace static void do_realtime_coarse(struct timespec *ts)
273{
274	unsigned long seq;
275	do {
276		seq = gtod_read_begin(gtod);
277		ts->tv_sec = gtod->wall_time_coarse_sec;
278		ts->tv_nsec = gtod->wall_time_coarse_nsec;
279	} while (unlikely(gtod_read_retry(gtod, seq)));
280}
281
282notrace static void do_monotonic_coarse(struct timespec *ts)
283{
284	unsigned long seq;
285	do {
286		seq = gtod_read_begin(gtod);
287		ts->tv_sec = gtod->monotonic_time_coarse_sec;
288		ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
289	} while (unlikely(gtod_read_retry(gtod, seq)));
290}
291
292notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
293{
294	switch (clock) {
295	case CLOCK_REALTIME:
296		if (do_realtime(ts) == VCLOCK_NONE)
297			goto fallback;
298		break;
299	case CLOCK_MONOTONIC:
300		if (do_monotonic(ts) == VCLOCK_NONE)
301			goto fallback;
302		break;
303	case CLOCK_REALTIME_COARSE:
304		do_realtime_coarse(ts);
305		break;
306	case CLOCK_MONOTONIC_COARSE:
307		do_monotonic_coarse(ts);
308		break;
309	default:
310		goto fallback;
311	}
312
313	return 0;
314fallback:
315	return vdso_fallback_gettime(clock, ts);
316}
317int clock_gettime(clockid_t, struct timespec *)
318	__attribute__((weak, alias("__vdso_clock_gettime")));
319
320notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
321{
322	if (likely(tv != NULL)) {
323		if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
324			return vdso_fallback_gtod(tv, tz);
325		tv->tv_usec /= 1000;
326	}
327	if (unlikely(tz != NULL)) {
328		tz->tz_minuteswest = gtod->tz_minuteswest;
329		tz->tz_dsttime = gtod->tz_dsttime;
330	}
331
332	return 0;
333}
334int gettimeofday(struct timeval *, struct timezone *)
335	__attribute__((weak, alias("__vdso_gettimeofday")));
336
337/*
338 * This will break when the xtime seconds get inaccurate, but that is
339 * unlikely
340 */
341notrace time_t __vdso_time(time_t *t)
342{
343	/* This is atomic on x86 so we don't need any locks. */
344	time_t result = ACCESS_ONCE(gtod->wall_time_sec);
345
346	if (t)
347		*t = result;
348	return result;
349}
350int time(time_t *t)
351	__attribute__((weak, alias("__vdso_time")));
352