1#include <linux/bitops.h>
2#include <linux/types.h>
3#include <linux/slab.h>
4
5#include <asm/perf_event.h>
6#include <asm/insn.h>
7
8#include "perf_event.h"
9
10/* The size of a BTS record in bytes: */
11#define BTS_RECORD_SIZE		24
12
13#define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
14#define PEBS_BUFFER_SIZE	PAGE_SIZE
15#define PEBS_FIXUP_SIZE		PAGE_SIZE
16
17/*
18 * pebs_record_32 for p4 and core not supported
19
20struct pebs_record_32 {
21	u32 flags, ip;
22	u32 ax, bc, cx, dx;
23	u32 si, di, bp, sp;
24};
25
26 */
27
28union intel_x86_pebs_dse {
29	u64 val;
30	struct {
31		unsigned int ld_dse:4;
32		unsigned int ld_stlb_miss:1;
33		unsigned int ld_locked:1;
34		unsigned int ld_reserved:26;
35	};
36	struct {
37		unsigned int st_l1d_hit:1;
38		unsigned int st_reserved1:3;
39		unsigned int st_stlb_miss:1;
40		unsigned int st_locked:1;
41		unsigned int st_reserved2:26;
42	};
43};
44
45
46/*
47 * Map PEBS Load Latency Data Source encodings to generic
48 * memory data source information
49 */
50#define P(a, b) PERF_MEM_S(a, b)
51#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
52#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
53
54static const u64 pebs_data_source[] = {
55	P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
56	OP_LH | P(LVL, L1)  | P(SNOOP, NONE),	/* 0x01: L1 local */
57	OP_LH | P(LVL, LFB) | P(SNOOP, NONE),	/* 0x02: LFB hit */
58	OP_LH | P(LVL, L2)  | P(SNOOP, NONE),	/* 0x03: L2 hit */
59	OP_LH | P(LVL, L3)  | P(SNOOP, NONE),	/* 0x04: L3 hit */
60	OP_LH | P(LVL, L3)  | P(SNOOP, MISS),	/* 0x05: L3 hit, snoop miss */
61	OP_LH | P(LVL, L3)  | P(SNOOP, HIT),	/* 0x06: L3 hit, snoop hit */
62	OP_LH | P(LVL, L3)  | P(SNOOP, HITM),	/* 0x07: L3 hit, snoop hitm */
63	OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
64	OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
65	OP_LH | P(LVL, LOC_RAM)  | P(SNOOP, HIT),  /* 0x0a: L3 miss, shared */
66	OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
67	OP_LH | P(LVL, LOC_RAM)  | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
68	OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
69	OP_LH | P(LVL, IO)  | P(SNOOP, NONE), /* 0x0e: I/O */
70	OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
71};
72
73static u64 precise_store_data(u64 status)
74{
75	union intel_x86_pebs_dse dse;
76	u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
77
78	dse.val = status;
79
80	/*
81	 * bit 4: TLB access
82	 * 1 = stored missed 2nd level TLB
83	 *
84	 * so it either hit the walker or the OS
85	 * otherwise hit 2nd level TLB
86	 */
87	if (dse.st_stlb_miss)
88		val |= P(TLB, MISS);
89	else
90		val |= P(TLB, HIT);
91
92	/*
93	 * bit 0: hit L1 data cache
94	 * if not set, then all we know is that
95	 * it missed L1D
96	 */
97	if (dse.st_l1d_hit)
98		val |= P(LVL, HIT);
99	else
100		val |= P(LVL, MISS);
101
102	/*
103	 * bit 5: Locked prefix
104	 */
105	if (dse.st_locked)
106		val |= P(LOCK, LOCKED);
107
108	return val;
109}
110
111static u64 precise_datala_hsw(struct perf_event *event, u64 status)
112{
113	union perf_mem_data_src dse;
114
115	dse.val = PERF_MEM_NA;
116
117	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
118		dse.mem_op = PERF_MEM_OP_STORE;
119	else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
120		dse.mem_op = PERF_MEM_OP_LOAD;
121
122	/*
123	 * L1 info only valid for following events:
124	 *
125	 * MEM_UOPS_RETIRED.STLB_MISS_STORES
126	 * MEM_UOPS_RETIRED.LOCK_STORES
127	 * MEM_UOPS_RETIRED.SPLIT_STORES
128	 * MEM_UOPS_RETIRED.ALL_STORES
129	 */
130	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
131		if (status & 1)
132			dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
133		else
134			dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
135	}
136	return dse.val;
137}
138
139static u64 load_latency_data(u64 status)
140{
141	union intel_x86_pebs_dse dse;
142	u64 val;
143	int model = boot_cpu_data.x86_model;
144	int fam = boot_cpu_data.x86;
145
146	dse.val = status;
147
148	/*
149	 * use the mapping table for bit 0-3
150	 */
151	val = pebs_data_source[dse.ld_dse];
152
153	/*
154	 * Nehalem models do not support TLB, Lock infos
155	 */
156	if (fam == 0x6 && (model == 26 || model == 30
157	    || model == 31 || model == 46)) {
158		val |= P(TLB, NA) | P(LOCK, NA);
159		return val;
160	}
161	/*
162	 * bit 4: TLB access
163	 * 0 = did not miss 2nd level TLB
164	 * 1 = missed 2nd level TLB
165	 */
166	if (dse.ld_stlb_miss)
167		val |= P(TLB, MISS) | P(TLB, L2);
168	else
169		val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
170
171	/*
172	 * bit 5: locked prefix
173	 */
174	if (dse.ld_locked)
175		val |= P(LOCK, LOCKED);
176
177	return val;
178}
179
180struct pebs_record_core {
181	u64 flags, ip;
182	u64 ax, bx, cx, dx;
183	u64 si, di, bp, sp;
184	u64 r8,  r9,  r10, r11;
185	u64 r12, r13, r14, r15;
186};
187
188struct pebs_record_nhm {
189	u64 flags, ip;
190	u64 ax, bx, cx, dx;
191	u64 si, di, bp, sp;
192	u64 r8,  r9,  r10, r11;
193	u64 r12, r13, r14, r15;
194	u64 status, dla, dse, lat;
195};
196
197/*
198 * Same as pebs_record_nhm, with two additional fields.
199 */
200struct pebs_record_hsw {
201	u64 flags, ip;
202	u64 ax, bx, cx, dx;
203	u64 si, di, bp, sp;
204	u64 r8,  r9,  r10, r11;
205	u64 r12, r13, r14, r15;
206	u64 status, dla, dse, lat;
207	u64 real_ip, tsx_tuning;
208};
209
210union hsw_tsx_tuning {
211	struct {
212		u32 cycles_last_block     : 32,
213		    hle_abort		  : 1,
214		    rtm_abort		  : 1,
215		    instruction_abort     : 1,
216		    non_instruction_abort : 1,
217		    retry		  : 1,
218		    data_conflict	  : 1,
219		    capacity_writes	  : 1,
220		    capacity_reads	  : 1;
221	};
222	u64	    value;
223};
224
225#define PEBS_HSW_TSX_FLAGS	0xff00000000ULL
226
227void init_debug_store_on_cpu(int cpu)
228{
229	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
230
231	if (!ds)
232		return;
233
234	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
235		     (u32)((u64)(unsigned long)ds),
236		     (u32)((u64)(unsigned long)ds >> 32));
237}
238
239void fini_debug_store_on_cpu(int cpu)
240{
241	if (!per_cpu(cpu_hw_events, cpu).ds)
242		return;
243
244	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
245}
246
247static DEFINE_PER_CPU(void *, insn_buffer);
248
249static int alloc_pebs_buffer(int cpu)
250{
251	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
252	int node = cpu_to_node(cpu);
253	int max, thresh = 1; /* always use a single PEBS record */
254	void *buffer, *ibuffer;
255
256	if (!x86_pmu.pebs)
257		return 0;
258
259	buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
260	if (unlikely(!buffer))
261		return -ENOMEM;
262
263	/*
264	 * HSW+ already provides us the eventing ip; no need to allocate this
265	 * buffer then.
266	 */
267	if (x86_pmu.intel_cap.pebs_format < 2) {
268		ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
269		if (!ibuffer) {
270			kfree(buffer);
271			return -ENOMEM;
272		}
273		per_cpu(insn_buffer, cpu) = ibuffer;
274	}
275
276	max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
277
278	ds->pebs_buffer_base = (u64)(unsigned long)buffer;
279	ds->pebs_index = ds->pebs_buffer_base;
280	ds->pebs_absolute_maximum = ds->pebs_buffer_base +
281		max * x86_pmu.pebs_record_size;
282
283	ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
284		thresh * x86_pmu.pebs_record_size;
285
286	return 0;
287}
288
289static void release_pebs_buffer(int cpu)
290{
291	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
292
293	if (!ds || !x86_pmu.pebs)
294		return;
295
296	kfree(per_cpu(insn_buffer, cpu));
297	per_cpu(insn_buffer, cpu) = NULL;
298
299	kfree((void *)(unsigned long)ds->pebs_buffer_base);
300	ds->pebs_buffer_base = 0;
301}
302
303static int alloc_bts_buffer(int cpu)
304{
305	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
306	int node = cpu_to_node(cpu);
307	int max, thresh;
308	void *buffer;
309
310	if (!x86_pmu.bts)
311		return 0;
312
313	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
314	if (unlikely(!buffer)) {
315		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
316		return -ENOMEM;
317	}
318
319	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
320	thresh = max / 16;
321
322	ds->bts_buffer_base = (u64)(unsigned long)buffer;
323	ds->bts_index = ds->bts_buffer_base;
324	ds->bts_absolute_maximum = ds->bts_buffer_base +
325		max * BTS_RECORD_SIZE;
326	ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
327		thresh * BTS_RECORD_SIZE;
328
329	return 0;
330}
331
332static void release_bts_buffer(int cpu)
333{
334	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
335
336	if (!ds || !x86_pmu.bts)
337		return;
338
339	kfree((void *)(unsigned long)ds->bts_buffer_base);
340	ds->bts_buffer_base = 0;
341}
342
343static int alloc_ds_buffer(int cpu)
344{
345	int node = cpu_to_node(cpu);
346	struct debug_store *ds;
347
348	ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
349	if (unlikely(!ds))
350		return -ENOMEM;
351
352	per_cpu(cpu_hw_events, cpu).ds = ds;
353
354	return 0;
355}
356
357static void release_ds_buffer(int cpu)
358{
359	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
360
361	if (!ds)
362		return;
363
364	per_cpu(cpu_hw_events, cpu).ds = NULL;
365	kfree(ds);
366}
367
368void release_ds_buffers(void)
369{
370	int cpu;
371
372	if (!x86_pmu.bts && !x86_pmu.pebs)
373		return;
374
375	get_online_cpus();
376	for_each_online_cpu(cpu)
377		fini_debug_store_on_cpu(cpu);
378
379	for_each_possible_cpu(cpu) {
380		release_pebs_buffer(cpu);
381		release_bts_buffer(cpu);
382		release_ds_buffer(cpu);
383	}
384	put_online_cpus();
385}
386
387void reserve_ds_buffers(void)
388{
389	int bts_err = 0, pebs_err = 0;
390	int cpu;
391
392	x86_pmu.bts_active = 0;
393	x86_pmu.pebs_active = 0;
394
395	if (!x86_pmu.bts && !x86_pmu.pebs)
396		return;
397
398	if (!x86_pmu.bts)
399		bts_err = 1;
400
401	if (!x86_pmu.pebs)
402		pebs_err = 1;
403
404	get_online_cpus();
405
406	for_each_possible_cpu(cpu) {
407		if (alloc_ds_buffer(cpu)) {
408			bts_err = 1;
409			pebs_err = 1;
410		}
411
412		if (!bts_err && alloc_bts_buffer(cpu))
413			bts_err = 1;
414
415		if (!pebs_err && alloc_pebs_buffer(cpu))
416			pebs_err = 1;
417
418		if (bts_err && pebs_err)
419			break;
420	}
421
422	if (bts_err) {
423		for_each_possible_cpu(cpu)
424			release_bts_buffer(cpu);
425	}
426
427	if (pebs_err) {
428		for_each_possible_cpu(cpu)
429			release_pebs_buffer(cpu);
430	}
431
432	if (bts_err && pebs_err) {
433		for_each_possible_cpu(cpu)
434			release_ds_buffer(cpu);
435	} else {
436		if (x86_pmu.bts && !bts_err)
437			x86_pmu.bts_active = 1;
438
439		if (x86_pmu.pebs && !pebs_err)
440			x86_pmu.pebs_active = 1;
441
442		for_each_online_cpu(cpu)
443			init_debug_store_on_cpu(cpu);
444	}
445
446	put_online_cpus();
447}
448
449/*
450 * BTS
451 */
452
453struct event_constraint bts_constraint =
454	EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
455
456void intel_pmu_enable_bts(u64 config)
457{
458	unsigned long debugctlmsr;
459
460	debugctlmsr = get_debugctlmsr();
461
462	debugctlmsr |= DEBUGCTLMSR_TR;
463	debugctlmsr |= DEBUGCTLMSR_BTS;
464	if (config & ARCH_PERFMON_EVENTSEL_INT)
465		debugctlmsr |= DEBUGCTLMSR_BTINT;
466
467	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
468		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
469
470	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
471		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
472
473	update_debugctlmsr(debugctlmsr);
474}
475
476void intel_pmu_disable_bts(void)
477{
478	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
479	unsigned long debugctlmsr;
480
481	if (!cpuc->ds)
482		return;
483
484	debugctlmsr = get_debugctlmsr();
485
486	debugctlmsr &=
487		~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
488		  DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
489
490	update_debugctlmsr(debugctlmsr);
491}
492
493int intel_pmu_drain_bts_buffer(void)
494{
495	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
496	struct debug_store *ds = cpuc->ds;
497	struct bts_record {
498		u64	from;
499		u64	to;
500		u64	flags;
501	};
502	struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
503	struct bts_record *at, *top;
504	struct perf_output_handle handle;
505	struct perf_event_header header;
506	struct perf_sample_data data;
507	struct pt_regs regs;
508
509	if (!event)
510		return 0;
511
512	if (!x86_pmu.bts_active)
513		return 0;
514
515	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
516	top = (struct bts_record *)(unsigned long)ds->bts_index;
517
518	if (top <= at)
519		return 0;
520
521	memset(&regs, 0, sizeof(regs));
522
523	ds->bts_index = ds->bts_buffer_base;
524
525	perf_sample_data_init(&data, 0, event->hw.last_period);
526
527	/*
528	 * Prepare a generic sample, i.e. fill in the invariant fields.
529	 * We will overwrite the from and to address before we output
530	 * the sample.
531	 */
532	perf_prepare_sample(&header, &data, event, &regs);
533
534	if (perf_output_begin(&handle, event, header.size * (top - at)))
535		return 1;
536
537	for (; at < top; at++) {
538		data.ip		= at->from;
539		data.addr	= at->to;
540
541		perf_output_sample(&handle, &header, &data, event);
542	}
543
544	perf_output_end(&handle);
545
546	/* There's new data available. */
547	event->hw.interrupts++;
548	event->pending_kill = POLL_IN;
549	return 1;
550}
551
552/*
553 * PEBS
554 */
555struct event_constraint intel_core2_pebs_event_constraints[] = {
556	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
557	INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
558	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
559	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
560	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
561	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
562	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
563	EVENT_CONSTRAINT_END
564};
565
566struct event_constraint intel_atom_pebs_event_constraints[] = {
567	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
568	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
569	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
570	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
571	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
572	EVENT_CONSTRAINT_END
573};
574
575struct event_constraint intel_slm_pebs_event_constraints[] = {
576	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
577	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
578	/* Allow all events as PEBS with no flags */
579	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
580	EVENT_CONSTRAINT_END
581};
582
583struct event_constraint intel_nehalem_pebs_event_constraints[] = {
584	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
585	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
586	INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
587	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
588	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
589	INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
590	INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
591	INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
592	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
593	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
594	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
595	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
596	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
597	EVENT_CONSTRAINT_END
598};
599
600struct event_constraint intel_westmere_pebs_event_constraints[] = {
601	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
602	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
603	INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
604	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
605	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
606	INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
607	INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
608	INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
609	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
610	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
611	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
612	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
613	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
614	EVENT_CONSTRAINT_END
615};
616
617struct event_constraint intel_snb_pebs_event_constraints[] = {
618	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
619	INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
620	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
621	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
622	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
623        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
624        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
625        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
626        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
627	/* Allow all events as PEBS with no flags */
628	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
629	EVENT_CONSTRAINT_END
630};
631
632struct event_constraint intel_ivb_pebs_event_constraints[] = {
633        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
634        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
635	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
636	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
637	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
638	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
639	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
640	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
641	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
642	/* Allow all events as PEBS with no flags */
643	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
644        EVENT_CONSTRAINT_END
645};
646
647struct event_constraint intel_hsw_pebs_event_constraints[] = {
648	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
649	INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
650	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
651	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
652	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
653	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
654	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
655	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
656	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
657	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
658	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
659	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
660	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
661	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
662	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
663	/* Allow all events as PEBS with no flags */
664	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
665	EVENT_CONSTRAINT_END
666};
667
668struct event_constraint *intel_pebs_constraints(struct perf_event *event)
669{
670	struct event_constraint *c;
671
672	if (!event->attr.precise_ip)
673		return NULL;
674
675	if (x86_pmu.pebs_constraints) {
676		for_each_event_constraint(c, x86_pmu.pebs_constraints) {
677			if ((event->hw.config & c->cmask) == c->code) {
678				event->hw.flags |= c->flags;
679				return c;
680			}
681		}
682	}
683
684	return &emptyconstraint;
685}
686
687void intel_pmu_pebs_enable(struct perf_event *event)
688{
689	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
690	struct hw_perf_event *hwc = &event->hw;
691
692	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
693
694	cpuc->pebs_enabled |= 1ULL << hwc->idx;
695
696	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
697		cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
698	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
699		cpuc->pebs_enabled |= 1ULL << 63;
700}
701
702void intel_pmu_pebs_disable(struct perf_event *event)
703{
704	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
705	struct hw_perf_event *hwc = &event->hw;
706
707	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
708
709	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
710		cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
711	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
712		cpuc->pebs_enabled &= ~(1ULL << 63);
713
714	if (cpuc->enabled)
715		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
716
717	hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
718}
719
720void intel_pmu_pebs_enable_all(void)
721{
722	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
723
724	if (cpuc->pebs_enabled)
725		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
726}
727
728void intel_pmu_pebs_disable_all(void)
729{
730	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
731
732	if (cpuc->pebs_enabled)
733		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
734}
735
736static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
737{
738	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
739	unsigned long from = cpuc->lbr_entries[0].from;
740	unsigned long old_to, to = cpuc->lbr_entries[0].to;
741	unsigned long ip = regs->ip;
742	int is_64bit = 0;
743	void *kaddr;
744	int size;
745
746	/*
747	 * We don't need to fixup if the PEBS assist is fault like
748	 */
749	if (!x86_pmu.intel_cap.pebs_trap)
750		return 1;
751
752	/*
753	 * No LBR entry, no basic block, no rewinding
754	 */
755	if (!cpuc->lbr_stack.nr || !from || !to)
756		return 0;
757
758	/*
759	 * Basic blocks should never cross user/kernel boundaries
760	 */
761	if (kernel_ip(ip) != kernel_ip(to))
762		return 0;
763
764	/*
765	 * unsigned math, either ip is before the start (impossible) or
766	 * the basic block is larger than 1 page (sanity)
767	 */
768	if ((ip - to) > PEBS_FIXUP_SIZE)
769		return 0;
770
771	/*
772	 * We sampled a branch insn, rewind using the LBR stack
773	 */
774	if (ip == to) {
775		set_linear_ip(regs, from);
776		return 1;
777	}
778
779	size = ip - to;
780	if (!kernel_ip(ip)) {
781		int bytes;
782		u8 *buf = this_cpu_read(insn_buffer);
783
784		/* 'size' must fit our buffer, see above */
785		bytes = copy_from_user_nmi(buf, (void __user *)to, size);
786		if (bytes != 0)
787			return 0;
788
789		kaddr = buf;
790	} else {
791		kaddr = (void *)to;
792	}
793
794	do {
795		struct insn insn;
796
797		old_to = to;
798
799#ifdef CONFIG_X86_64
800		is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
801#endif
802		insn_init(&insn, kaddr, size, is_64bit);
803		insn_get_length(&insn);
804		/*
805		 * Make sure there was not a problem decoding the
806		 * instruction and getting the length.  This is
807		 * doubly important because we have an infinite
808		 * loop if insn.length=0.
809		 */
810		if (!insn.length)
811			break;
812
813		to += insn.length;
814		kaddr += insn.length;
815		size -= insn.length;
816	} while (to < ip);
817
818	if (to == ip) {
819		set_linear_ip(regs, old_to);
820		return 1;
821	}
822
823	/*
824	 * Even though we decoded the basic block, the instruction stream
825	 * never matched the given IP, either the TO or the IP got corrupted.
826	 */
827	return 0;
828}
829
830static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
831{
832	if (pebs->tsx_tuning) {
833		union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
834		return tsx.cycles_last_block;
835	}
836	return 0;
837}
838
839static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
840{
841	u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
842
843	/* For RTM XABORTs also log the abort code from AX */
844	if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
845		txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
846	return txn;
847}
848
849static void __intel_pmu_pebs_event(struct perf_event *event,
850				   struct pt_regs *iregs, void *__pebs)
851{
852#define PERF_X86_EVENT_PEBS_HSW_PREC \
853		(PERF_X86_EVENT_PEBS_ST_HSW | \
854		 PERF_X86_EVENT_PEBS_LD_HSW | \
855		 PERF_X86_EVENT_PEBS_NA_HSW)
856	/*
857	 * We cast to the biggest pebs_record but are careful not to
858	 * unconditionally access the 'extra' entries.
859	 */
860	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
861	struct pebs_record_hsw *pebs = __pebs;
862	struct perf_sample_data data;
863	struct pt_regs regs;
864	u64 sample_type;
865	int fll, fst, dsrc;
866	int fl = event->hw.flags;
867
868	if (!intel_pmu_save_and_restart(event))
869		return;
870
871	sample_type = event->attr.sample_type;
872	dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
873
874	fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
875	fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
876
877	perf_sample_data_init(&data, 0, event->hw.last_period);
878
879	data.period = event->hw.last_period;
880
881	/*
882	 * Use latency for weight (only avail with PEBS-LL)
883	 */
884	if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
885		data.weight = pebs->lat;
886
887	/*
888	 * data.data_src encodes the data source
889	 */
890	if (dsrc) {
891		u64 val = PERF_MEM_NA;
892		if (fll)
893			val = load_latency_data(pebs->dse);
894		else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
895			val = precise_datala_hsw(event, pebs->dse);
896		else if (fst)
897			val = precise_store_data(pebs->dse);
898		data.data_src.val = val;
899	}
900
901	/*
902	 * We use the interrupt regs as a base because the PEBS record
903	 * does not contain a full regs set, specifically it seems to
904	 * lack segment descriptors, which get used by things like
905	 * user_mode().
906	 *
907	 * In the simple case fix up only the IP and BP,SP regs, for
908	 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
909	 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
910	 */
911	regs = *iregs;
912	regs.flags = pebs->flags;
913	set_linear_ip(&regs, pebs->ip);
914	regs.bp = pebs->bp;
915	regs.sp = pebs->sp;
916
917	if (sample_type & PERF_SAMPLE_REGS_INTR) {
918		regs.ax = pebs->ax;
919		regs.bx = pebs->bx;
920		regs.cx = pebs->cx;
921		regs.dx = pebs->dx;
922		regs.si = pebs->si;
923		regs.di = pebs->di;
924		regs.bp = pebs->bp;
925		regs.sp = pebs->sp;
926
927		regs.flags = pebs->flags;
928#ifndef CONFIG_X86_32
929		regs.r8 = pebs->r8;
930		regs.r9 = pebs->r9;
931		regs.r10 = pebs->r10;
932		regs.r11 = pebs->r11;
933		regs.r12 = pebs->r12;
934		regs.r13 = pebs->r13;
935		regs.r14 = pebs->r14;
936		regs.r15 = pebs->r15;
937#endif
938	}
939
940	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
941		regs.ip = pebs->real_ip;
942		regs.flags |= PERF_EFLAGS_EXACT;
943	} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
944		regs.flags |= PERF_EFLAGS_EXACT;
945	else
946		regs.flags &= ~PERF_EFLAGS_EXACT;
947
948	if ((sample_type & PERF_SAMPLE_ADDR) &&
949	    x86_pmu.intel_cap.pebs_format >= 1)
950		data.addr = pebs->dla;
951
952	if (x86_pmu.intel_cap.pebs_format >= 2) {
953		/* Only set the TSX weight when no memory weight. */
954		if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
955			data.weight = intel_hsw_weight(pebs);
956
957		if (sample_type & PERF_SAMPLE_TRANSACTION)
958			data.txn = intel_hsw_transaction(pebs);
959	}
960
961	if (has_branch_stack(event))
962		data.br_stack = &cpuc->lbr_stack;
963
964	if (perf_event_overflow(event, &data, &regs))
965		x86_pmu_stop(event, 0);
966}
967
968static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
969{
970	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
971	struct debug_store *ds = cpuc->ds;
972	struct perf_event *event = cpuc->events[0]; /* PMC0 only */
973	struct pebs_record_core *at, *top;
974	int n;
975
976	if (!x86_pmu.pebs_active)
977		return;
978
979	at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
980	top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
981
982	/*
983	 * Whatever else happens, drain the thing
984	 */
985	ds->pebs_index = ds->pebs_buffer_base;
986
987	if (!test_bit(0, cpuc->active_mask))
988		return;
989
990	WARN_ON_ONCE(!event);
991
992	if (!event->attr.precise_ip)
993		return;
994
995	n = top - at;
996	if (n <= 0)
997		return;
998
999	/*
1000	 * Should not happen, we program the threshold at 1 and do not
1001	 * set a reset value.
1002	 */
1003	WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
1004	at += n - 1;
1005
1006	__intel_pmu_pebs_event(event, iregs, at);
1007}
1008
1009static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
1010{
1011	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1012	struct debug_store *ds = cpuc->ds;
1013	struct perf_event *event = NULL;
1014	void *at, *top;
1015	u64 status = 0;
1016	int bit;
1017
1018	if (!x86_pmu.pebs_active)
1019		return;
1020
1021	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
1022	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
1023
1024	ds->pebs_index = ds->pebs_buffer_base;
1025
1026	if (unlikely(at > top))
1027		return;
1028
1029	/*
1030	 * Should not happen, we program the threshold at 1 and do not
1031	 * set a reset value.
1032	 */
1033	WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
1034		  "Unexpected number of pebs records %ld\n",
1035		  (long)(top - at) / x86_pmu.pebs_record_size);
1036
1037	for (; at < top; at += x86_pmu.pebs_record_size) {
1038		struct pebs_record_nhm *p = at;
1039
1040		for_each_set_bit(bit, (unsigned long *)&p->status,
1041				 x86_pmu.max_pebs_events) {
1042			event = cpuc->events[bit];
1043			if (!test_bit(bit, cpuc->active_mask))
1044				continue;
1045
1046			WARN_ON_ONCE(!event);
1047
1048			if (!event->attr.precise_ip)
1049				continue;
1050
1051			if (__test_and_set_bit(bit, (unsigned long *)&status))
1052				continue;
1053
1054			break;
1055		}
1056
1057		if (!event || bit >= x86_pmu.max_pebs_events)
1058			continue;
1059
1060		__intel_pmu_pebs_event(event, iregs, at);
1061	}
1062}
1063
1064/*
1065 * BTS, PEBS probe and setup
1066 */
1067
1068void __init intel_ds_init(void)
1069{
1070	/*
1071	 * No support for 32bit formats
1072	 */
1073	if (!boot_cpu_has(X86_FEATURE_DTES64))
1074		return;
1075
1076	x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
1077	x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
1078	if (x86_pmu.pebs) {
1079		char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
1080		int format = x86_pmu.intel_cap.pebs_format;
1081
1082		switch (format) {
1083		case 0:
1084			printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
1085			x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
1086			x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
1087			break;
1088
1089		case 1:
1090			printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
1091			x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
1092			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
1093			break;
1094
1095		case 2:
1096			pr_cont("PEBS fmt2%c, ", pebs_type);
1097			x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
1098			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
1099			break;
1100
1101		default:
1102			printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
1103			x86_pmu.pebs = 0;
1104		}
1105	}
1106}
1107
1108void perf_restore_debug_store(void)
1109{
1110	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
1111
1112	if (!x86_pmu.bts && !x86_pmu.pebs)
1113		return;
1114
1115	wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
1116}
1117