1/* 2 * Per core/cpu state 3 * 4 * Used to coordinate shared registers between HT threads or 5 * among events on a single PMU. 6 */ 7 8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10#include <linux/stddef.h> 11#include <linux/types.h> 12#include <linux/init.h> 13#include <linux/slab.h> 14#include <linux/export.h> 15#include <linux/watchdog.h> 16 17#include <asm/cpufeature.h> 18#include <asm/hardirq.h> 19#include <asm/apic.h> 20 21#include "perf_event.h" 22 23/* 24 * Intel PerfMon, used on Core and later. 25 */ 26static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = 27{ 28 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, 29 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 30 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, 31 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, 32 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 33 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 34 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 35 [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ 36}; 37 38static struct event_constraint intel_core_event_constraints[] __read_mostly = 39{ 40 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 41 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 42 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ 43 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ 44 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ 45 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ 46 EVENT_CONSTRAINT_END 47}; 48 49static struct event_constraint intel_core2_event_constraints[] __read_mostly = 50{ 51 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 52 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 53 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 54 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ 55 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 56 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 57 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ 58 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ 59 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ 60 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ 61 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ 62 INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */ 63 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ 64 EVENT_CONSTRAINT_END 65}; 66 67static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = 68{ 69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 70 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 71 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 72 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ 73 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ 74 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ 75 INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ 76 INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ 77 INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ 78 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 79 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 80 EVENT_CONSTRAINT_END 81}; 82 83static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 84{ 85 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 86 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 87 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), 88 EVENT_EXTRA_END 89}; 90 91static struct event_constraint intel_westmere_event_constraints[] __read_mostly = 92{ 93 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 94 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 95 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 96 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 97 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ 98 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 99 INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */ 100 EVENT_CONSTRAINT_END 101}; 102 103static struct event_constraint intel_snb_event_constraints[] __read_mostly = 104{ 105 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 106 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 107 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 108 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ 109 INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ 110 INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 111 INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ 112 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 113 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 114 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 115 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ 116 INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 117 118 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ 119 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 120 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 121 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 122 123 EVENT_CONSTRAINT_END 124}; 125 126static struct event_constraint intel_ivb_event_constraints[] __read_mostly = 127{ 128 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 129 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 130 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 131 INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */ 132 INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */ 133 INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */ 134 INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */ 135 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ 136 INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ 137 INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */ 138 INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 139 INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ 140 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 141 142 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ 143 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 144 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 145 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 146 147 EVENT_CONSTRAINT_END 148}; 149 150static struct extra_reg intel_westmere_extra_regs[] __read_mostly = 151{ 152 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 153 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 154 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), 155 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), 156 EVENT_EXTRA_END 157}; 158 159static struct event_constraint intel_v1_event_constraints[] __read_mostly = 160{ 161 EVENT_CONSTRAINT_END 162}; 163 164static struct event_constraint intel_gen_event_constraints[] __read_mostly = 165{ 166 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 167 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 168 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 169 EVENT_CONSTRAINT_END 170}; 171 172static struct event_constraint intel_slm_event_constraints[] __read_mostly = 173{ 174 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 175 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 176 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ 177 EVENT_CONSTRAINT_END 178}; 179 180static struct extra_reg intel_snb_extra_regs[] __read_mostly = { 181 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 182 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), 183 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), 184 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 185 EVENT_EXTRA_END 186}; 187 188static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { 189 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 190 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), 191 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), 192 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 193 EVENT_EXTRA_END 194}; 195 196EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); 197EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); 198EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); 199 200struct attribute *nhm_events_attrs[] = { 201 EVENT_PTR(mem_ld_nhm), 202 NULL, 203}; 204 205struct attribute *snb_events_attrs[] = { 206 EVENT_PTR(mem_ld_snb), 207 EVENT_PTR(mem_st_snb), 208 NULL, 209}; 210 211static struct event_constraint intel_hsw_event_constraints[] = { 212 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 213 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 214 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 215 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */ 216 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 217 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 218 /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 219 INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), 220 /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ 221 INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), 222 /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ 223 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), 224 225 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ 226 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 227 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 228 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 229 230 EVENT_CONSTRAINT_END 231}; 232 233struct event_constraint intel_bdw_event_constraints[] = { 234 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 235 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 236 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 237 INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ 238 INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */ 239 EVENT_CONSTRAINT_END 240}; 241 242static u64 intel_pmu_event_map(int hw_event) 243{ 244 return intel_perfmon_event_map[hw_event]; 245} 246 247#define SNB_DMND_DATA_RD (1ULL << 0) 248#define SNB_DMND_RFO (1ULL << 1) 249#define SNB_DMND_IFETCH (1ULL << 2) 250#define SNB_DMND_WB (1ULL << 3) 251#define SNB_PF_DATA_RD (1ULL << 4) 252#define SNB_PF_RFO (1ULL << 5) 253#define SNB_PF_IFETCH (1ULL << 6) 254#define SNB_LLC_DATA_RD (1ULL << 7) 255#define SNB_LLC_RFO (1ULL << 8) 256#define SNB_LLC_IFETCH (1ULL << 9) 257#define SNB_BUS_LOCKS (1ULL << 10) 258#define SNB_STRM_ST (1ULL << 11) 259#define SNB_OTHER (1ULL << 15) 260#define SNB_RESP_ANY (1ULL << 16) 261#define SNB_NO_SUPP (1ULL << 17) 262#define SNB_LLC_HITM (1ULL << 18) 263#define SNB_LLC_HITE (1ULL << 19) 264#define SNB_LLC_HITS (1ULL << 20) 265#define SNB_LLC_HITF (1ULL << 21) 266#define SNB_LOCAL (1ULL << 22) 267#define SNB_REMOTE (0xffULL << 23) 268#define SNB_SNP_NONE (1ULL << 31) 269#define SNB_SNP_NOT_NEEDED (1ULL << 32) 270#define SNB_SNP_MISS (1ULL << 33) 271#define SNB_NO_FWD (1ULL << 34) 272#define SNB_SNP_FWD (1ULL << 35) 273#define SNB_HITM (1ULL << 36) 274#define SNB_NON_DRAM (1ULL << 37) 275 276#define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD) 277#define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO) 278#define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) 279 280#define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \ 281 SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \ 282 SNB_HITM) 283 284#define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY) 285#define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY) 286 287#define SNB_L3_ACCESS SNB_RESP_ANY 288#define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM) 289 290static __initconst const u64 snb_hw_cache_extra_regs 291 [PERF_COUNT_HW_CACHE_MAX] 292 [PERF_COUNT_HW_CACHE_OP_MAX] 293 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 294{ 295 [ C(LL ) ] = { 296 [ C(OP_READ) ] = { 297 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS, 298 [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS, 299 }, 300 [ C(OP_WRITE) ] = { 301 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS, 302 [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS, 303 }, 304 [ C(OP_PREFETCH) ] = { 305 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS, 306 [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS, 307 }, 308 }, 309 [ C(NODE) ] = { 310 [ C(OP_READ) ] = { 311 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY, 312 [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE, 313 }, 314 [ C(OP_WRITE) ] = { 315 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY, 316 [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE, 317 }, 318 [ C(OP_PREFETCH) ] = { 319 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY, 320 [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE, 321 }, 322 }, 323}; 324 325static __initconst const u64 snb_hw_cache_event_ids 326 [PERF_COUNT_HW_CACHE_MAX] 327 [PERF_COUNT_HW_CACHE_OP_MAX] 328 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 329{ 330 [ C(L1D) ] = { 331 [ C(OP_READ) ] = { 332 [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ 333 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ 334 }, 335 [ C(OP_WRITE) ] = { 336 [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ 337 [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ 338 }, 339 [ C(OP_PREFETCH) ] = { 340 [ C(RESULT_ACCESS) ] = 0x0, 341 [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ 342 }, 343 }, 344 [ C(L1I ) ] = { 345 [ C(OP_READ) ] = { 346 [ C(RESULT_ACCESS) ] = 0x0, 347 [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ 348 }, 349 [ C(OP_WRITE) ] = { 350 [ C(RESULT_ACCESS) ] = -1, 351 [ C(RESULT_MISS) ] = -1, 352 }, 353 [ C(OP_PREFETCH) ] = { 354 [ C(RESULT_ACCESS) ] = 0x0, 355 [ C(RESULT_MISS) ] = 0x0, 356 }, 357 }, 358 [ C(LL ) ] = { 359 [ C(OP_READ) ] = { 360 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 361 [ C(RESULT_ACCESS) ] = 0x01b7, 362 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 363 [ C(RESULT_MISS) ] = 0x01b7, 364 }, 365 [ C(OP_WRITE) ] = { 366 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 367 [ C(RESULT_ACCESS) ] = 0x01b7, 368 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 369 [ C(RESULT_MISS) ] = 0x01b7, 370 }, 371 [ C(OP_PREFETCH) ] = { 372 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 373 [ C(RESULT_ACCESS) ] = 0x01b7, 374 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 375 [ C(RESULT_MISS) ] = 0x01b7, 376 }, 377 }, 378 [ C(DTLB) ] = { 379 [ C(OP_READ) ] = { 380 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ 381 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ 382 }, 383 [ C(OP_WRITE) ] = { 384 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ 385 [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ 386 }, 387 [ C(OP_PREFETCH) ] = { 388 [ C(RESULT_ACCESS) ] = 0x0, 389 [ C(RESULT_MISS) ] = 0x0, 390 }, 391 }, 392 [ C(ITLB) ] = { 393 [ C(OP_READ) ] = { 394 [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ 395 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ 396 }, 397 [ C(OP_WRITE) ] = { 398 [ C(RESULT_ACCESS) ] = -1, 399 [ C(RESULT_MISS) ] = -1, 400 }, 401 [ C(OP_PREFETCH) ] = { 402 [ C(RESULT_ACCESS) ] = -1, 403 [ C(RESULT_MISS) ] = -1, 404 }, 405 }, 406 [ C(BPU ) ] = { 407 [ C(OP_READ) ] = { 408 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 409 [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 410 }, 411 [ C(OP_WRITE) ] = { 412 [ C(RESULT_ACCESS) ] = -1, 413 [ C(RESULT_MISS) ] = -1, 414 }, 415 [ C(OP_PREFETCH) ] = { 416 [ C(RESULT_ACCESS) ] = -1, 417 [ C(RESULT_MISS) ] = -1, 418 }, 419 }, 420 [ C(NODE) ] = { 421 [ C(OP_READ) ] = { 422 [ C(RESULT_ACCESS) ] = 0x01b7, 423 [ C(RESULT_MISS) ] = 0x01b7, 424 }, 425 [ C(OP_WRITE) ] = { 426 [ C(RESULT_ACCESS) ] = 0x01b7, 427 [ C(RESULT_MISS) ] = 0x01b7, 428 }, 429 [ C(OP_PREFETCH) ] = { 430 [ C(RESULT_ACCESS) ] = 0x01b7, 431 [ C(RESULT_MISS) ] = 0x01b7, 432 }, 433 }, 434 435}; 436 437/* 438 * Notes on the events: 439 * - data reads do not include code reads (comparable to earlier tables) 440 * - data counts include speculative execution (except L1 write, dtlb, bpu) 441 * - remote node access includes remote memory, remote cache, remote mmio. 442 * - prefetches are not included in the counts because they are not 443 * reliably counted. 444 */ 445 446#define HSW_DEMAND_DATA_RD BIT_ULL(0) 447#define HSW_DEMAND_RFO BIT_ULL(1) 448#define HSW_ANY_RESPONSE BIT_ULL(16) 449#define HSW_SUPPLIER_NONE BIT_ULL(17) 450#define HSW_L3_MISS_LOCAL_DRAM BIT_ULL(22) 451#define HSW_L3_MISS_REMOTE_HOP0 BIT_ULL(27) 452#define HSW_L3_MISS_REMOTE_HOP1 BIT_ULL(28) 453#define HSW_L3_MISS_REMOTE_HOP2P BIT_ULL(29) 454#define HSW_L3_MISS (HSW_L3_MISS_LOCAL_DRAM| \ 455 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ 456 HSW_L3_MISS_REMOTE_HOP2P) 457#define HSW_SNOOP_NONE BIT_ULL(31) 458#define HSW_SNOOP_NOT_NEEDED BIT_ULL(32) 459#define HSW_SNOOP_MISS BIT_ULL(33) 460#define HSW_SNOOP_HIT_NO_FWD BIT_ULL(34) 461#define HSW_SNOOP_HIT_WITH_FWD BIT_ULL(35) 462#define HSW_SNOOP_HITM BIT_ULL(36) 463#define HSW_SNOOP_NON_DRAM BIT_ULL(37) 464#define HSW_ANY_SNOOP (HSW_SNOOP_NONE| \ 465 HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \ 466 HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \ 467 HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM) 468#define HSW_SNOOP_DRAM (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM) 469#define HSW_DEMAND_READ HSW_DEMAND_DATA_RD 470#define HSW_DEMAND_WRITE HSW_DEMAND_RFO 471#define HSW_L3_MISS_REMOTE (HSW_L3_MISS_REMOTE_HOP0|\ 472 HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P) 473#define HSW_LLC_ACCESS HSW_ANY_RESPONSE 474 475#define BDW_L3_MISS_LOCAL BIT(26) 476#define BDW_L3_MISS (BDW_L3_MISS_LOCAL| \ 477 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ 478 HSW_L3_MISS_REMOTE_HOP2P) 479 480 481static __initconst const u64 hsw_hw_cache_event_ids 482 [PERF_COUNT_HW_CACHE_MAX] 483 [PERF_COUNT_HW_CACHE_OP_MAX] 484 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 485{ 486 [ C(L1D ) ] = { 487 [ C(OP_READ) ] = { 488 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 489 [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ 490 }, 491 [ C(OP_WRITE) ] = { 492 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 493 [ C(RESULT_MISS) ] = 0x0, 494 }, 495 [ C(OP_PREFETCH) ] = { 496 [ C(RESULT_ACCESS) ] = 0x0, 497 [ C(RESULT_MISS) ] = 0x0, 498 }, 499 }, 500 [ C(L1I ) ] = { 501 [ C(OP_READ) ] = { 502 [ C(RESULT_ACCESS) ] = 0x0, 503 [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */ 504 }, 505 [ C(OP_WRITE) ] = { 506 [ C(RESULT_ACCESS) ] = -1, 507 [ C(RESULT_MISS) ] = -1, 508 }, 509 [ C(OP_PREFETCH) ] = { 510 [ C(RESULT_ACCESS) ] = 0x0, 511 [ C(RESULT_MISS) ] = 0x0, 512 }, 513 }, 514 [ C(LL ) ] = { 515 [ C(OP_READ) ] = { 516 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 517 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 518 }, 519 [ C(OP_WRITE) ] = { 520 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 521 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 522 }, 523 [ C(OP_PREFETCH) ] = { 524 [ C(RESULT_ACCESS) ] = 0x0, 525 [ C(RESULT_MISS) ] = 0x0, 526 }, 527 }, 528 [ C(DTLB) ] = { 529 [ C(OP_READ) ] = { 530 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 531 [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */ 532 }, 533 [ C(OP_WRITE) ] = { 534 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 535 [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ 536 }, 537 [ C(OP_PREFETCH) ] = { 538 [ C(RESULT_ACCESS) ] = 0x0, 539 [ C(RESULT_MISS) ] = 0x0, 540 }, 541 }, 542 [ C(ITLB) ] = { 543 [ C(OP_READ) ] = { 544 [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */ 545 [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */ 546 }, 547 [ C(OP_WRITE) ] = { 548 [ C(RESULT_ACCESS) ] = -1, 549 [ C(RESULT_MISS) ] = -1, 550 }, 551 [ C(OP_PREFETCH) ] = { 552 [ C(RESULT_ACCESS) ] = -1, 553 [ C(RESULT_MISS) ] = -1, 554 }, 555 }, 556 [ C(BPU ) ] = { 557 [ C(OP_READ) ] = { 558 [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ 559 [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 560 }, 561 [ C(OP_WRITE) ] = { 562 [ C(RESULT_ACCESS) ] = -1, 563 [ C(RESULT_MISS) ] = -1, 564 }, 565 [ C(OP_PREFETCH) ] = { 566 [ C(RESULT_ACCESS) ] = -1, 567 [ C(RESULT_MISS) ] = -1, 568 }, 569 }, 570 [ C(NODE) ] = { 571 [ C(OP_READ) ] = { 572 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 573 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 574 }, 575 [ C(OP_WRITE) ] = { 576 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 577 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 578 }, 579 [ C(OP_PREFETCH) ] = { 580 [ C(RESULT_ACCESS) ] = 0x0, 581 [ C(RESULT_MISS) ] = 0x0, 582 }, 583 }, 584}; 585 586static __initconst const u64 hsw_hw_cache_extra_regs 587 [PERF_COUNT_HW_CACHE_MAX] 588 [PERF_COUNT_HW_CACHE_OP_MAX] 589 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 590{ 591 [ C(LL ) ] = { 592 [ C(OP_READ) ] = { 593 [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| 594 HSW_LLC_ACCESS, 595 [ C(RESULT_MISS) ] = HSW_DEMAND_READ| 596 HSW_L3_MISS|HSW_ANY_SNOOP, 597 }, 598 [ C(OP_WRITE) ] = { 599 [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| 600 HSW_LLC_ACCESS, 601 [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| 602 HSW_L3_MISS|HSW_ANY_SNOOP, 603 }, 604 [ C(OP_PREFETCH) ] = { 605 [ C(RESULT_ACCESS) ] = 0x0, 606 [ C(RESULT_MISS) ] = 0x0, 607 }, 608 }, 609 [ C(NODE) ] = { 610 [ C(OP_READ) ] = { 611 [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| 612 HSW_L3_MISS_LOCAL_DRAM| 613 HSW_SNOOP_DRAM, 614 [ C(RESULT_MISS) ] = HSW_DEMAND_READ| 615 HSW_L3_MISS_REMOTE| 616 HSW_SNOOP_DRAM, 617 }, 618 [ C(OP_WRITE) ] = { 619 [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| 620 HSW_L3_MISS_LOCAL_DRAM| 621 HSW_SNOOP_DRAM, 622 [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| 623 HSW_L3_MISS_REMOTE| 624 HSW_SNOOP_DRAM, 625 }, 626 [ C(OP_PREFETCH) ] = { 627 [ C(RESULT_ACCESS) ] = 0x0, 628 [ C(RESULT_MISS) ] = 0x0, 629 }, 630 }, 631}; 632 633static __initconst const u64 westmere_hw_cache_event_ids 634 [PERF_COUNT_HW_CACHE_MAX] 635 [PERF_COUNT_HW_CACHE_OP_MAX] 636 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 637{ 638 [ C(L1D) ] = { 639 [ C(OP_READ) ] = { 640 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ 641 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ 642 }, 643 [ C(OP_WRITE) ] = { 644 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ 645 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ 646 }, 647 [ C(OP_PREFETCH) ] = { 648 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ 649 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ 650 }, 651 }, 652 [ C(L1I ) ] = { 653 [ C(OP_READ) ] = { 654 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ 655 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ 656 }, 657 [ C(OP_WRITE) ] = { 658 [ C(RESULT_ACCESS) ] = -1, 659 [ C(RESULT_MISS) ] = -1, 660 }, 661 [ C(OP_PREFETCH) ] = { 662 [ C(RESULT_ACCESS) ] = 0x0, 663 [ C(RESULT_MISS) ] = 0x0, 664 }, 665 }, 666 [ C(LL ) ] = { 667 [ C(OP_READ) ] = { 668 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 669 [ C(RESULT_ACCESS) ] = 0x01b7, 670 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 671 [ C(RESULT_MISS) ] = 0x01b7, 672 }, 673 /* 674 * Use RFO, not WRITEBACK, because a write miss would typically occur 675 * on RFO. 676 */ 677 [ C(OP_WRITE) ] = { 678 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 679 [ C(RESULT_ACCESS) ] = 0x01b7, 680 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 681 [ C(RESULT_MISS) ] = 0x01b7, 682 }, 683 [ C(OP_PREFETCH) ] = { 684 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 685 [ C(RESULT_ACCESS) ] = 0x01b7, 686 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 687 [ C(RESULT_MISS) ] = 0x01b7, 688 }, 689 }, 690 [ C(DTLB) ] = { 691 [ C(OP_READ) ] = { 692 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ 693 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ 694 }, 695 [ C(OP_WRITE) ] = { 696 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ 697 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ 698 }, 699 [ C(OP_PREFETCH) ] = { 700 [ C(RESULT_ACCESS) ] = 0x0, 701 [ C(RESULT_MISS) ] = 0x0, 702 }, 703 }, 704 [ C(ITLB) ] = { 705 [ C(OP_READ) ] = { 706 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ 707 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ 708 }, 709 [ C(OP_WRITE) ] = { 710 [ C(RESULT_ACCESS) ] = -1, 711 [ C(RESULT_MISS) ] = -1, 712 }, 713 [ C(OP_PREFETCH) ] = { 714 [ C(RESULT_ACCESS) ] = -1, 715 [ C(RESULT_MISS) ] = -1, 716 }, 717 }, 718 [ C(BPU ) ] = { 719 [ C(OP_READ) ] = { 720 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 721 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ 722 }, 723 [ C(OP_WRITE) ] = { 724 [ C(RESULT_ACCESS) ] = -1, 725 [ C(RESULT_MISS) ] = -1, 726 }, 727 [ C(OP_PREFETCH) ] = { 728 [ C(RESULT_ACCESS) ] = -1, 729 [ C(RESULT_MISS) ] = -1, 730 }, 731 }, 732 [ C(NODE) ] = { 733 [ C(OP_READ) ] = { 734 [ C(RESULT_ACCESS) ] = 0x01b7, 735 [ C(RESULT_MISS) ] = 0x01b7, 736 }, 737 [ C(OP_WRITE) ] = { 738 [ C(RESULT_ACCESS) ] = 0x01b7, 739 [ C(RESULT_MISS) ] = 0x01b7, 740 }, 741 [ C(OP_PREFETCH) ] = { 742 [ C(RESULT_ACCESS) ] = 0x01b7, 743 [ C(RESULT_MISS) ] = 0x01b7, 744 }, 745 }, 746}; 747 748/* 749 * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; 750 * See IA32 SDM Vol 3B 30.6.1.3 751 */ 752 753#define NHM_DMND_DATA_RD (1 << 0) 754#define NHM_DMND_RFO (1 << 1) 755#define NHM_DMND_IFETCH (1 << 2) 756#define NHM_DMND_WB (1 << 3) 757#define NHM_PF_DATA_RD (1 << 4) 758#define NHM_PF_DATA_RFO (1 << 5) 759#define NHM_PF_IFETCH (1 << 6) 760#define NHM_OFFCORE_OTHER (1 << 7) 761#define NHM_UNCORE_HIT (1 << 8) 762#define NHM_OTHER_CORE_HIT_SNP (1 << 9) 763#define NHM_OTHER_CORE_HITM (1 << 10) 764 /* reserved */ 765#define NHM_REMOTE_CACHE_FWD (1 << 12) 766#define NHM_REMOTE_DRAM (1 << 13) 767#define NHM_LOCAL_DRAM (1 << 14) 768#define NHM_NON_DRAM (1 << 15) 769 770#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD) 771#define NHM_REMOTE (NHM_REMOTE_DRAM) 772 773#define NHM_DMND_READ (NHM_DMND_DATA_RD) 774#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) 775#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) 776 777#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) 778#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD) 779#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) 780 781static __initconst const u64 nehalem_hw_cache_extra_regs 782 [PERF_COUNT_HW_CACHE_MAX] 783 [PERF_COUNT_HW_CACHE_OP_MAX] 784 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 785{ 786 [ C(LL ) ] = { 787 [ C(OP_READ) ] = { 788 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, 789 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, 790 }, 791 [ C(OP_WRITE) ] = { 792 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, 793 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, 794 }, 795 [ C(OP_PREFETCH) ] = { 796 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, 797 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, 798 }, 799 }, 800 [ C(NODE) ] = { 801 [ C(OP_READ) ] = { 802 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE, 803 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE, 804 }, 805 [ C(OP_WRITE) ] = { 806 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE, 807 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE, 808 }, 809 [ C(OP_PREFETCH) ] = { 810 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE, 811 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE, 812 }, 813 }, 814}; 815 816static __initconst const u64 nehalem_hw_cache_event_ids 817 [PERF_COUNT_HW_CACHE_MAX] 818 [PERF_COUNT_HW_CACHE_OP_MAX] 819 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 820{ 821 [ C(L1D) ] = { 822 [ C(OP_READ) ] = { 823 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ 824 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ 825 }, 826 [ C(OP_WRITE) ] = { 827 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ 828 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ 829 }, 830 [ C(OP_PREFETCH) ] = { 831 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ 832 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ 833 }, 834 }, 835 [ C(L1I ) ] = { 836 [ C(OP_READ) ] = { 837 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ 838 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ 839 }, 840 [ C(OP_WRITE) ] = { 841 [ C(RESULT_ACCESS) ] = -1, 842 [ C(RESULT_MISS) ] = -1, 843 }, 844 [ C(OP_PREFETCH) ] = { 845 [ C(RESULT_ACCESS) ] = 0x0, 846 [ C(RESULT_MISS) ] = 0x0, 847 }, 848 }, 849 [ C(LL ) ] = { 850 [ C(OP_READ) ] = { 851 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 852 [ C(RESULT_ACCESS) ] = 0x01b7, 853 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 854 [ C(RESULT_MISS) ] = 0x01b7, 855 }, 856 /* 857 * Use RFO, not WRITEBACK, because a write miss would typically occur 858 * on RFO. 859 */ 860 [ C(OP_WRITE) ] = { 861 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 862 [ C(RESULT_ACCESS) ] = 0x01b7, 863 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 864 [ C(RESULT_MISS) ] = 0x01b7, 865 }, 866 [ C(OP_PREFETCH) ] = { 867 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 868 [ C(RESULT_ACCESS) ] = 0x01b7, 869 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 870 [ C(RESULT_MISS) ] = 0x01b7, 871 }, 872 }, 873 [ C(DTLB) ] = { 874 [ C(OP_READ) ] = { 875 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ 876 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ 877 }, 878 [ C(OP_WRITE) ] = { 879 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ 880 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ 881 }, 882 [ C(OP_PREFETCH) ] = { 883 [ C(RESULT_ACCESS) ] = 0x0, 884 [ C(RESULT_MISS) ] = 0x0, 885 }, 886 }, 887 [ C(ITLB) ] = { 888 [ C(OP_READ) ] = { 889 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ 890 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ 891 }, 892 [ C(OP_WRITE) ] = { 893 [ C(RESULT_ACCESS) ] = -1, 894 [ C(RESULT_MISS) ] = -1, 895 }, 896 [ C(OP_PREFETCH) ] = { 897 [ C(RESULT_ACCESS) ] = -1, 898 [ C(RESULT_MISS) ] = -1, 899 }, 900 }, 901 [ C(BPU ) ] = { 902 [ C(OP_READ) ] = { 903 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 904 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ 905 }, 906 [ C(OP_WRITE) ] = { 907 [ C(RESULT_ACCESS) ] = -1, 908 [ C(RESULT_MISS) ] = -1, 909 }, 910 [ C(OP_PREFETCH) ] = { 911 [ C(RESULT_ACCESS) ] = -1, 912 [ C(RESULT_MISS) ] = -1, 913 }, 914 }, 915 [ C(NODE) ] = { 916 [ C(OP_READ) ] = { 917 [ C(RESULT_ACCESS) ] = 0x01b7, 918 [ C(RESULT_MISS) ] = 0x01b7, 919 }, 920 [ C(OP_WRITE) ] = { 921 [ C(RESULT_ACCESS) ] = 0x01b7, 922 [ C(RESULT_MISS) ] = 0x01b7, 923 }, 924 [ C(OP_PREFETCH) ] = { 925 [ C(RESULT_ACCESS) ] = 0x01b7, 926 [ C(RESULT_MISS) ] = 0x01b7, 927 }, 928 }, 929}; 930 931static __initconst const u64 core2_hw_cache_event_ids 932 [PERF_COUNT_HW_CACHE_MAX] 933 [PERF_COUNT_HW_CACHE_OP_MAX] 934 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 935{ 936 [ C(L1D) ] = { 937 [ C(OP_READ) ] = { 938 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ 939 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ 940 }, 941 [ C(OP_WRITE) ] = { 942 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ 943 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ 944 }, 945 [ C(OP_PREFETCH) ] = { 946 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ 947 [ C(RESULT_MISS) ] = 0, 948 }, 949 }, 950 [ C(L1I ) ] = { 951 [ C(OP_READ) ] = { 952 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ 953 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ 954 }, 955 [ C(OP_WRITE) ] = { 956 [ C(RESULT_ACCESS) ] = -1, 957 [ C(RESULT_MISS) ] = -1, 958 }, 959 [ C(OP_PREFETCH) ] = { 960 [ C(RESULT_ACCESS) ] = 0, 961 [ C(RESULT_MISS) ] = 0, 962 }, 963 }, 964 [ C(LL ) ] = { 965 [ C(OP_READ) ] = { 966 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ 967 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ 968 }, 969 [ C(OP_WRITE) ] = { 970 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ 971 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ 972 }, 973 [ C(OP_PREFETCH) ] = { 974 [ C(RESULT_ACCESS) ] = 0, 975 [ C(RESULT_MISS) ] = 0, 976 }, 977 }, 978 [ C(DTLB) ] = { 979 [ C(OP_READ) ] = { 980 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ 981 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ 982 }, 983 [ C(OP_WRITE) ] = { 984 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ 985 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ 986 }, 987 [ C(OP_PREFETCH) ] = { 988 [ C(RESULT_ACCESS) ] = 0, 989 [ C(RESULT_MISS) ] = 0, 990 }, 991 }, 992 [ C(ITLB) ] = { 993 [ C(OP_READ) ] = { 994 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ 995 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ 996 }, 997 [ C(OP_WRITE) ] = { 998 [ C(RESULT_ACCESS) ] = -1, 999 [ C(RESULT_MISS) ] = -1, 1000 }, 1001 [ C(OP_PREFETCH) ] = { 1002 [ C(RESULT_ACCESS) ] = -1, 1003 [ C(RESULT_MISS) ] = -1, 1004 }, 1005 }, 1006 [ C(BPU ) ] = { 1007 [ C(OP_READ) ] = { 1008 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ 1009 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ 1010 }, 1011 [ C(OP_WRITE) ] = { 1012 [ C(RESULT_ACCESS) ] = -1, 1013 [ C(RESULT_MISS) ] = -1, 1014 }, 1015 [ C(OP_PREFETCH) ] = { 1016 [ C(RESULT_ACCESS) ] = -1, 1017 [ C(RESULT_MISS) ] = -1, 1018 }, 1019 }, 1020}; 1021 1022static __initconst const u64 atom_hw_cache_event_ids 1023 [PERF_COUNT_HW_CACHE_MAX] 1024 [PERF_COUNT_HW_CACHE_OP_MAX] 1025 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1026{ 1027 [ C(L1D) ] = { 1028 [ C(OP_READ) ] = { 1029 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ 1030 [ C(RESULT_MISS) ] = 0, 1031 }, 1032 [ C(OP_WRITE) ] = { 1033 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ 1034 [ C(RESULT_MISS) ] = 0, 1035 }, 1036 [ C(OP_PREFETCH) ] = { 1037 [ C(RESULT_ACCESS) ] = 0x0, 1038 [ C(RESULT_MISS) ] = 0, 1039 }, 1040 }, 1041 [ C(L1I ) ] = { 1042 [ C(OP_READ) ] = { 1043 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ 1044 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ 1045 }, 1046 [ C(OP_WRITE) ] = { 1047 [ C(RESULT_ACCESS) ] = -1, 1048 [ C(RESULT_MISS) ] = -1, 1049 }, 1050 [ C(OP_PREFETCH) ] = { 1051 [ C(RESULT_ACCESS) ] = 0, 1052 [ C(RESULT_MISS) ] = 0, 1053 }, 1054 }, 1055 [ C(LL ) ] = { 1056 [ C(OP_READ) ] = { 1057 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ 1058 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ 1059 }, 1060 [ C(OP_WRITE) ] = { 1061 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ 1062 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ 1063 }, 1064 [ C(OP_PREFETCH) ] = { 1065 [ C(RESULT_ACCESS) ] = 0, 1066 [ C(RESULT_MISS) ] = 0, 1067 }, 1068 }, 1069 [ C(DTLB) ] = { 1070 [ C(OP_READ) ] = { 1071 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ 1072 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ 1073 }, 1074 [ C(OP_WRITE) ] = { 1075 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ 1076 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ 1077 }, 1078 [ C(OP_PREFETCH) ] = { 1079 [ C(RESULT_ACCESS) ] = 0, 1080 [ C(RESULT_MISS) ] = 0, 1081 }, 1082 }, 1083 [ C(ITLB) ] = { 1084 [ C(OP_READ) ] = { 1085 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ 1086 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ 1087 }, 1088 [ C(OP_WRITE) ] = { 1089 [ C(RESULT_ACCESS) ] = -1, 1090 [ C(RESULT_MISS) ] = -1, 1091 }, 1092 [ C(OP_PREFETCH) ] = { 1093 [ C(RESULT_ACCESS) ] = -1, 1094 [ C(RESULT_MISS) ] = -1, 1095 }, 1096 }, 1097 [ C(BPU ) ] = { 1098 [ C(OP_READ) ] = { 1099 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ 1100 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ 1101 }, 1102 [ C(OP_WRITE) ] = { 1103 [ C(RESULT_ACCESS) ] = -1, 1104 [ C(RESULT_MISS) ] = -1, 1105 }, 1106 [ C(OP_PREFETCH) ] = { 1107 [ C(RESULT_ACCESS) ] = -1, 1108 [ C(RESULT_MISS) ] = -1, 1109 }, 1110 }, 1111}; 1112 1113static struct extra_reg intel_slm_extra_regs[] __read_mostly = 1114{ 1115 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 1116 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0), 1117 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1), 1118 EVENT_EXTRA_END 1119}; 1120 1121#define SLM_DMND_READ SNB_DMND_DATA_RD 1122#define SLM_DMND_WRITE SNB_DMND_RFO 1123#define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) 1124 1125#define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM) 1126#define SLM_LLC_ACCESS SNB_RESP_ANY 1127#define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM) 1128 1129static __initconst const u64 slm_hw_cache_extra_regs 1130 [PERF_COUNT_HW_CACHE_MAX] 1131 [PERF_COUNT_HW_CACHE_OP_MAX] 1132 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1133{ 1134 [ C(LL ) ] = { 1135 [ C(OP_READ) ] = { 1136 [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, 1137 [ C(RESULT_MISS) ] = 0, 1138 }, 1139 [ C(OP_WRITE) ] = { 1140 [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, 1141 [ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS, 1142 }, 1143 [ C(OP_PREFETCH) ] = { 1144 [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS, 1145 [ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS, 1146 }, 1147 }, 1148}; 1149 1150static __initconst const u64 slm_hw_cache_event_ids 1151 [PERF_COUNT_HW_CACHE_MAX] 1152 [PERF_COUNT_HW_CACHE_OP_MAX] 1153 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1154{ 1155 [ C(L1D) ] = { 1156 [ C(OP_READ) ] = { 1157 [ C(RESULT_ACCESS) ] = 0, 1158 [ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */ 1159 }, 1160 [ C(OP_WRITE) ] = { 1161 [ C(RESULT_ACCESS) ] = 0, 1162 [ C(RESULT_MISS) ] = 0, 1163 }, 1164 [ C(OP_PREFETCH) ] = { 1165 [ C(RESULT_ACCESS) ] = 0, 1166 [ C(RESULT_MISS) ] = 0, 1167 }, 1168 }, 1169 [ C(L1I ) ] = { 1170 [ C(OP_READ) ] = { 1171 [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */ 1172 [ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */ 1173 }, 1174 [ C(OP_WRITE) ] = { 1175 [ C(RESULT_ACCESS) ] = -1, 1176 [ C(RESULT_MISS) ] = -1, 1177 }, 1178 [ C(OP_PREFETCH) ] = { 1179 [ C(RESULT_ACCESS) ] = 0, 1180 [ C(RESULT_MISS) ] = 0, 1181 }, 1182 }, 1183 [ C(LL ) ] = { 1184 [ C(OP_READ) ] = { 1185 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 1186 [ C(RESULT_ACCESS) ] = 0x01b7, 1187 [ C(RESULT_MISS) ] = 0, 1188 }, 1189 [ C(OP_WRITE) ] = { 1190 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 1191 [ C(RESULT_ACCESS) ] = 0x01b7, 1192 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 1193 [ C(RESULT_MISS) ] = 0x01b7, 1194 }, 1195 [ C(OP_PREFETCH) ] = { 1196 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 1197 [ C(RESULT_ACCESS) ] = 0x01b7, 1198 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 1199 [ C(RESULT_MISS) ] = 0x01b7, 1200 }, 1201 }, 1202 [ C(DTLB) ] = { 1203 [ C(OP_READ) ] = { 1204 [ C(RESULT_ACCESS) ] = 0, 1205 [ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */ 1206 }, 1207 [ C(OP_WRITE) ] = { 1208 [ C(RESULT_ACCESS) ] = 0, 1209 [ C(RESULT_MISS) ] = 0, 1210 }, 1211 [ C(OP_PREFETCH) ] = { 1212 [ C(RESULT_ACCESS) ] = 0, 1213 [ C(RESULT_MISS) ] = 0, 1214 }, 1215 }, 1216 [ C(ITLB) ] = { 1217 [ C(OP_READ) ] = { 1218 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ 1219 [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */ 1220 }, 1221 [ C(OP_WRITE) ] = { 1222 [ C(RESULT_ACCESS) ] = -1, 1223 [ C(RESULT_MISS) ] = -1, 1224 }, 1225 [ C(OP_PREFETCH) ] = { 1226 [ C(RESULT_ACCESS) ] = -1, 1227 [ C(RESULT_MISS) ] = -1, 1228 }, 1229 }, 1230 [ C(BPU ) ] = { 1231 [ C(OP_READ) ] = { 1232 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ 1233 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ 1234 }, 1235 [ C(OP_WRITE) ] = { 1236 [ C(RESULT_ACCESS) ] = -1, 1237 [ C(RESULT_MISS) ] = -1, 1238 }, 1239 [ C(OP_PREFETCH) ] = { 1240 [ C(RESULT_ACCESS) ] = -1, 1241 [ C(RESULT_MISS) ] = -1, 1242 }, 1243 }, 1244}; 1245 1246/* 1247 * Use from PMIs where the LBRs are already disabled. 1248 */ 1249static void __intel_pmu_disable_all(void) 1250{ 1251 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1252 1253 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 1254 1255 if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) 1256 intel_pmu_disable_bts(); 1257 else 1258 intel_bts_disable_local(); 1259 1260 intel_pmu_pebs_disable_all(); 1261} 1262 1263static void intel_pmu_disable_all(void) 1264{ 1265 __intel_pmu_disable_all(); 1266 intel_pmu_lbr_disable_all(); 1267} 1268 1269static void __intel_pmu_enable_all(int added, bool pmi) 1270{ 1271 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1272 1273 intel_pmu_pebs_enable_all(); 1274 intel_pmu_lbr_enable_all(pmi); 1275 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 1276 x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); 1277 1278 if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 1279 struct perf_event *event = 1280 cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; 1281 1282 if (WARN_ON_ONCE(!event)) 1283 return; 1284 1285 intel_pmu_enable_bts(event->hw.config); 1286 } else 1287 intel_bts_enable_local(); 1288} 1289 1290static void intel_pmu_enable_all(int added) 1291{ 1292 __intel_pmu_enable_all(added, false); 1293} 1294 1295/* 1296 * Workaround for: 1297 * Intel Errata AAK100 (model 26) 1298 * Intel Errata AAP53 (model 30) 1299 * Intel Errata BD53 (model 44) 1300 * 1301 * The official story: 1302 * These chips need to be 'reset' when adding counters by programming the 1303 * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either 1304 * in sequence on the same PMC or on different PMCs. 1305 * 1306 * In practise it appears some of these events do in fact count, and 1307 * we need to programm all 4 events. 1308 */ 1309static void intel_pmu_nhm_workaround(void) 1310{ 1311 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1312 static const unsigned long nhm_magic[4] = { 1313 0x4300B5, 1314 0x4300D2, 1315 0x4300B1, 1316 0x4300B1 1317 }; 1318 struct perf_event *event; 1319 int i; 1320 1321 /* 1322 * The Errata requires below steps: 1323 * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; 1324 * 2) Configure 4 PERFEVTSELx with the magic events and clear 1325 * the corresponding PMCx; 1326 * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; 1327 * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; 1328 * 5) Clear 4 pairs of ERFEVTSELx and PMCx; 1329 */ 1330 1331 /* 1332 * The real steps we choose are a little different from above. 1333 * A) To reduce MSR operations, we don't run step 1) as they 1334 * are already cleared before this function is called; 1335 * B) Call x86_perf_event_update to save PMCx before configuring 1336 * PERFEVTSELx with magic number; 1337 * C) With step 5), we do clear only when the PERFEVTSELx is 1338 * not used currently. 1339 * D) Call x86_perf_event_set_period to restore PMCx; 1340 */ 1341 1342 /* We always operate 4 pairs of PERF Counters */ 1343 for (i = 0; i < 4; i++) { 1344 event = cpuc->events[i]; 1345 if (event) 1346 x86_perf_event_update(event); 1347 } 1348 1349 for (i = 0; i < 4; i++) { 1350 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); 1351 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); 1352 } 1353 1354 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); 1355 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); 1356 1357 for (i = 0; i < 4; i++) { 1358 event = cpuc->events[i]; 1359 1360 if (event) { 1361 x86_perf_event_set_period(event); 1362 __x86_pmu_enable_event(&event->hw, 1363 ARCH_PERFMON_EVENTSEL_ENABLE); 1364 } else 1365 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); 1366 } 1367} 1368 1369static void intel_pmu_nhm_enable_all(int added) 1370{ 1371 if (added) 1372 intel_pmu_nhm_workaround(); 1373 intel_pmu_enable_all(added); 1374} 1375 1376static inline u64 intel_pmu_get_status(void) 1377{ 1378 u64 status; 1379 1380 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 1381 1382 return status; 1383} 1384 1385static inline void intel_pmu_ack_status(u64 ack) 1386{ 1387 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 1388} 1389 1390static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) 1391{ 1392 int idx = hwc->idx - INTEL_PMC_IDX_FIXED; 1393 u64 ctrl_val, mask; 1394 1395 mask = 0xfULL << (idx * 4); 1396 1397 rdmsrl(hwc->config_base, ctrl_val); 1398 ctrl_val &= ~mask; 1399 wrmsrl(hwc->config_base, ctrl_val); 1400} 1401 1402static inline bool event_is_checkpointed(struct perf_event *event) 1403{ 1404 return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; 1405} 1406 1407static void intel_pmu_disable_event(struct perf_event *event) 1408{ 1409 struct hw_perf_event *hwc = &event->hw; 1410 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1411 1412 if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { 1413 intel_pmu_disable_bts(); 1414 intel_pmu_drain_bts_buffer(); 1415 return; 1416 } 1417 1418 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); 1419 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); 1420 cpuc->intel_cp_status &= ~(1ull << hwc->idx); 1421 1422 /* 1423 * must disable before any actual event 1424 * because any event may be combined with LBR 1425 */ 1426 if (needs_branch_stack(event)) 1427 intel_pmu_lbr_disable(event); 1428 1429 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 1430 intel_pmu_disable_fixed(hwc); 1431 return; 1432 } 1433 1434 x86_pmu_disable_event(event); 1435 1436 if (unlikely(event->attr.precise_ip)) 1437 intel_pmu_pebs_disable(event); 1438} 1439 1440static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) 1441{ 1442 int idx = hwc->idx - INTEL_PMC_IDX_FIXED; 1443 u64 ctrl_val, bits, mask; 1444 1445 /* 1446 * Enable IRQ generation (0x8), 1447 * and enable ring-3 counting (0x2) and ring-0 counting (0x1) 1448 * if requested: 1449 */ 1450 bits = 0x8ULL; 1451 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) 1452 bits |= 0x2; 1453 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) 1454 bits |= 0x1; 1455 1456 /* 1457 * ANY bit is supported in v3 and up 1458 */ 1459 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) 1460 bits |= 0x4; 1461 1462 bits <<= (idx * 4); 1463 mask = 0xfULL << (idx * 4); 1464 1465 rdmsrl(hwc->config_base, ctrl_val); 1466 ctrl_val &= ~mask; 1467 ctrl_val |= bits; 1468 wrmsrl(hwc->config_base, ctrl_val); 1469} 1470 1471static void intel_pmu_enable_event(struct perf_event *event) 1472{ 1473 struct hw_perf_event *hwc = &event->hw; 1474 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1475 1476 if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { 1477 if (!__this_cpu_read(cpu_hw_events.enabled)) 1478 return; 1479 1480 intel_pmu_enable_bts(hwc->config); 1481 return; 1482 } 1483 /* 1484 * must enabled before any actual event 1485 * because any event may be combined with LBR 1486 */ 1487 if (needs_branch_stack(event)) 1488 intel_pmu_lbr_enable(event); 1489 1490 if (event->attr.exclude_host) 1491 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); 1492 if (event->attr.exclude_guest) 1493 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); 1494 1495 if (unlikely(event_is_checkpointed(event))) 1496 cpuc->intel_cp_status |= (1ull << hwc->idx); 1497 1498 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 1499 intel_pmu_enable_fixed(hwc); 1500 return; 1501 } 1502 1503 if (unlikely(event->attr.precise_ip)) 1504 intel_pmu_pebs_enable(event); 1505 1506 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); 1507} 1508 1509/* 1510 * Save and restart an expired event. Called by NMI contexts, 1511 * so it has to be careful about preempting normal event ops: 1512 */ 1513int intel_pmu_save_and_restart(struct perf_event *event) 1514{ 1515 x86_perf_event_update(event); 1516 /* 1517 * For a checkpointed counter always reset back to 0. This 1518 * avoids a situation where the counter overflows, aborts the 1519 * transaction and is then set back to shortly before the 1520 * overflow, and overflows and aborts again. 1521 */ 1522 if (unlikely(event_is_checkpointed(event))) { 1523 /* No race with NMIs because the counter should not be armed */ 1524 wrmsrl(event->hw.event_base, 0); 1525 local64_set(&event->hw.prev_count, 0); 1526 } 1527 return x86_perf_event_set_period(event); 1528} 1529 1530static void intel_pmu_reset(void) 1531{ 1532 struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); 1533 unsigned long flags; 1534 int idx; 1535 1536 if (!x86_pmu.num_counters) 1537 return; 1538 1539 local_irq_save(flags); 1540 1541 pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); 1542 1543 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1544 wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); 1545 wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); 1546 } 1547 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) 1548 wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); 1549 1550 if (ds) 1551 ds->bts_index = ds->bts_buffer_base; 1552 1553 /* Ack all overflows and disable fixed counters */ 1554 if (x86_pmu.version >= 2) { 1555 intel_pmu_ack_status(intel_pmu_get_status()); 1556 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 1557 } 1558 1559 /* Reset LBRs and LBR freezing */ 1560 if (x86_pmu.lbr_nr) { 1561 update_debugctlmsr(get_debugctlmsr() & 1562 ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR)); 1563 } 1564 1565 local_irq_restore(flags); 1566} 1567 1568/* 1569 * This handler is triggered by the local APIC, so the APIC IRQ handling 1570 * rules apply: 1571 */ 1572static int intel_pmu_handle_irq(struct pt_regs *regs) 1573{ 1574 struct perf_sample_data data; 1575 struct cpu_hw_events *cpuc; 1576 int bit, loops; 1577 u64 status; 1578 int handled; 1579 1580 cpuc = this_cpu_ptr(&cpu_hw_events); 1581 1582 /* 1583 * No known reason to not always do late ACK, 1584 * but just in case do it opt-in. 1585 */ 1586 if (!x86_pmu.late_ack) 1587 apic_write(APIC_LVTPC, APIC_DM_NMI); 1588 __intel_pmu_disable_all(); 1589 handled = intel_pmu_drain_bts_buffer(); 1590 handled += intel_bts_interrupt(); 1591 status = intel_pmu_get_status(); 1592 if (!status) 1593 goto done; 1594 1595 loops = 0; 1596again: 1597 intel_pmu_ack_status(status); 1598 if (++loops > 100) { 1599 static bool warned = false; 1600 if (!warned) { 1601 WARN(1, "perfevents: irq loop stuck!\n"); 1602 perf_event_print_debug(); 1603 warned = true; 1604 } 1605 intel_pmu_reset(); 1606 goto done; 1607 } 1608 1609 inc_irq_stat(apic_perf_irqs); 1610 1611 intel_pmu_lbr_read(); 1612 1613 /* 1614 * CondChgd bit 63 doesn't mean any overflow status. Ignore 1615 * and clear the bit. 1616 */ 1617 if (__test_and_clear_bit(63, (unsigned long *)&status)) { 1618 if (!status) 1619 goto done; 1620 } 1621 1622 /* 1623 * PEBS overflow sets bit 62 in the global status register 1624 */ 1625 if (__test_and_clear_bit(62, (unsigned long *)&status)) { 1626 handled++; 1627 x86_pmu.drain_pebs(regs); 1628 } 1629 1630 /* 1631 * Intel PT 1632 */ 1633 if (__test_and_clear_bit(55, (unsigned long *)&status)) { 1634 handled++; 1635 intel_pt_interrupt(); 1636 } 1637 1638 /* 1639 * Checkpointed counters can lead to 'spurious' PMIs because the 1640 * rollback caused by the PMI will have cleared the overflow status 1641 * bit. Therefore always force probe these counters. 1642 */ 1643 status |= cpuc->intel_cp_status; 1644 1645 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 1646 struct perf_event *event = cpuc->events[bit]; 1647 1648 handled++; 1649 1650 if (!test_bit(bit, cpuc->active_mask)) 1651 continue; 1652 1653 if (!intel_pmu_save_and_restart(event)) 1654 continue; 1655 1656 perf_sample_data_init(&data, 0, event->hw.last_period); 1657 1658 if (has_branch_stack(event)) 1659 data.br_stack = &cpuc->lbr_stack; 1660 1661 if (perf_event_overflow(event, &data, regs)) 1662 x86_pmu_stop(event, 0); 1663 } 1664 1665 /* 1666 * Repeat if there is more work to be done: 1667 */ 1668 status = intel_pmu_get_status(); 1669 if (status) 1670 goto again; 1671 1672done: 1673 __intel_pmu_enable_all(0, true); 1674 /* 1675 * Only unmask the NMI after the overflow counters 1676 * have been reset. This avoids spurious NMIs on 1677 * Haswell CPUs. 1678 */ 1679 if (x86_pmu.late_ack) 1680 apic_write(APIC_LVTPC, APIC_DM_NMI); 1681 return handled; 1682} 1683 1684static struct event_constraint * 1685intel_bts_constraints(struct perf_event *event) 1686{ 1687 struct hw_perf_event *hwc = &event->hw; 1688 unsigned int hw_event, bts_event; 1689 1690 if (event->attr.freq) 1691 return NULL; 1692 1693 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; 1694 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); 1695 1696 if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) 1697 return &bts_constraint; 1698 1699 return NULL; 1700} 1701 1702static int intel_alt_er(int idx) 1703{ 1704 if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1)) 1705 return idx; 1706 1707 if (idx == EXTRA_REG_RSP_0) 1708 return EXTRA_REG_RSP_1; 1709 1710 if (idx == EXTRA_REG_RSP_1) 1711 return EXTRA_REG_RSP_0; 1712 1713 return idx; 1714} 1715 1716static void intel_fixup_er(struct perf_event *event, int idx) 1717{ 1718 event->hw.extra_reg.idx = idx; 1719 1720 if (idx == EXTRA_REG_RSP_0) { 1721 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1722 event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event; 1723 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; 1724 } else if (idx == EXTRA_REG_RSP_1) { 1725 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1726 event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event; 1727 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; 1728 } 1729} 1730 1731/* 1732 * manage allocation of shared extra msr for certain events 1733 * 1734 * sharing can be: 1735 * per-cpu: to be shared between the various events on a single PMU 1736 * per-core: per-cpu + shared by HT threads 1737 */ 1738static struct event_constraint * 1739__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, 1740 struct perf_event *event, 1741 struct hw_perf_event_extra *reg) 1742{ 1743 struct event_constraint *c = &emptyconstraint; 1744 struct er_account *era; 1745 unsigned long flags; 1746 int idx = reg->idx; 1747 1748 /* 1749 * reg->alloc can be set due to existing state, so for fake cpuc we 1750 * need to ignore this, otherwise we might fail to allocate proper fake 1751 * state for this extra reg constraint. Also see the comment below. 1752 */ 1753 if (reg->alloc && !cpuc->is_fake) 1754 return NULL; /* call x86_get_event_constraint() */ 1755 1756again: 1757 era = &cpuc->shared_regs->regs[idx]; 1758 /* 1759 * we use spin_lock_irqsave() to avoid lockdep issues when 1760 * passing a fake cpuc 1761 */ 1762 raw_spin_lock_irqsave(&era->lock, flags); 1763 1764 if (!atomic_read(&era->ref) || era->config == reg->config) { 1765 1766 /* 1767 * If its a fake cpuc -- as per validate_{group,event}() we 1768 * shouldn't touch event state and we can avoid doing so 1769 * since both will only call get_event_constraints() once 1770 * on each event, this avoids the need for reg->alloc. 1771 * 1772 * Not doing the ER fixup will only result in era->reg being 1773 * wrong, but since we won't actually try and program hardware 1774 * this isn't a problem either. 1775 */ 1776 if (!cpuc->is_fake) { 1777 if (idx != reg->idx) 1778 intel_fixup_er(event, idx); 1779 1780 /* 1781 * x86_schedule_events() can call get_event_constraints() 1782 * multiple times on events in the case of incremental 1783 * scheduling(). reg->alloc ensures we only do the ER 1784 * allocation once. 1785 */ 1786 reg->alloc = 1; 1787 } 1788 1789 /* lock in msr value */ 1790 era->config = reg->config; 1791 era->reg = reg->reg; 1792 1793 /* one more user */ 1794 atomic_inc(&era->ref); 1795 1796 /* 1797 * need to call x86_get_event_constraint() 1798 * to check if associated event has constraints 1799 */ 1800 c = NULL; 1801 } else { 1802 idx = intel_alt_er(idx); 1803 if (idx != reg->idx) { 1804 raw_spin_unlock_irqrestore(&era->lock, flags); 1805 goto again; 1806 } 1807 } 1808 raw_spin_unlock_irqrestore(&era->lock, flags); 1809 1810 return c; 1811} 1812 1813static void 1814__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, 1815 struct hw_perf_event_extra *reg) 1816{ 1817 struct er_account *era; 1818 1819 /* 1820 * Only put constraint if extra reg was actually allocated. Also takes 1821 * care of event which do not use an extra shared reg. 1822 * 1823 * Also, if this is a fake cpuc we shouldn't touch any event state 1824 * (reg->alloc) and we don't care about leaving inconsistent cpuc state 1825 * either since it'll be thrown out. 1826 */ 1827 if (!reg->alloc || cpuc->is_fake) 1828 return; 1829 1830 era = &cpuc->shared_regs->regs[reg->idx]; 1831 1832 /* one fewer user */ 1833 atomic_dec(&era->ref); 1834 1835 /* allocate again next time */ 1836 reg->alloc = 0; 1837} 1838 1839static struct event_constraint * 1840intel_shared_regs_constraints(struct cpu_hw_events *cpuc, 1841 struct perf_event *event) 1842{ 1843 struct event_constraint *c = NULL, *d; 1844 struct hw_perf_event_extra *xreg, *breg; 1845 1846 xreg = &event->hw.extra_reg; 1847 if (xreg->idx != EXTRA_REG_NONE) { 1848 c = __intel_shared_reg_get_constraints(cpuc, event, xreg); 1849 if (c == &emptyconstraint) 1850 return c; 1851 } 1852 breg = &event->hw.branch_reg; 1853 if (breg->idx != EXTRA_REG_NONE) { 1854 d = __intel_shared_reg_get_constraints(cpuc, event, breg); 1855 if (d == &emptyconstraint) { 1856 __intel_shared_reg_put_constraints(cpuc, xreg); 1857 c = d; 1858 } 1859 } 1860 return c; 1861} 1862 1863struct event_constraint * 1864x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 1865 struct perf_event *event) 1866{ 1867 struct event_constraint *c; 1868 1869 if (x86_pmu.event_constraints) { 1870 for_each_event_constraint(c, x86_pmu.event_constraints) { 1871 if ((event->hw.config & c->cmask) == c->code) { 1872 event->hw.flags |= c->flags; 1873 return c; 1874 } 1875 } 1876 } 1877 1878 return &unconstrained; 1879} 1880 1881static struct event_constraint * 1882__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 1883 struct perf_event *event) 1884{ 1885 struct event_constraint *c; 1886 1887 c = intel_bts_constraints(event); 1888 if (c) 1889 return c; 1890 1891 c = intel_shared_regs_constraints(cpuc, event); 1892 if (c) 1893 return c; 1894 1895 c = intel_pebs_constraints(event); 1896 if (c) 1897 return c; 1898 1899 return x86_get_event_constraints(cpuc, idx, event); 1900} 1901 1902static void 1903intel_start_scheduling(struct cpu_hw_events *cpuc) 1904{ 1905 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 1906 struct intel_excl_states *xl, *xlo; 1907 int tid = cpuc->excl_thread_id; 1908 int o_tid = 1 - tid; /* sibling thread */ 1909 1910 /* 1911 * nothing needed if in group validation mode 1912 */ 1913 if (cpuc->is_fake || !is_ht_workaround_enabled()) 1914 return; 1915 1916 /* 1917 * no exclusion needed 1918 */ 1919 if (!excl_cntrs) 1920 return; 1921 1922 xlo = &excl_cntrs->states[o_tid]; 1923 xl = &excl_cntrs->states[tid]; 1924 1925 xl->sched_started = true; 1926 /* 1927 * lock shared state until we are done scheduling 1928 * in stop_event_scheduling() 1929 * makes scheduling appear as a transaction 1930 */ 1931 WARN_ON_ONCE(!irqs_disabled()); 1932 raw_spin_lock(&excl_cntrs->lock); 1933 1934 /* 1935 * save initial state of sibling thread 1936 */ 1937 memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state)); 1938} 1939 1940static void 1941intel_stop_scheduling(struct cpu_hw_events *cpuc) 1942{ 1943 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 1944 struct intel_excl_states *xl, *xlo; 1945 int tid = cpuc->excl_thread_id; 1946 int o_tid = 1 - tid; /* sibling thread */ 1947 1948 /* 1949 * nothing needed if in group validation mode 1950 */ 1951 if (cpuc->is_fake || !is_ht_workaround_enabled()) 1952 return; 1953 /* 1954 * no exclusion needed 1955 */ 1956 if (!excl_cntrs) 1957 return; 1958 1959 xlo = &excl_cntrs->states[o_tid]; 1960 xl = &excl_cntrs->states[tid]; 1961 1962 /* 1963 * make new sibling thread state visible 1964 */ 1965 memcpy(xlo->state, xlo->init_state, sizeof(xlo->state)); 1966 1967 xl->sched_started = false; 1968 /* 1969 * release shared state lock (acquired in intel_start_scheduling()) 1970 */ 1971 raw_spin_unlock(&excl_cntrs->lock); 1972} 1973 1974static struct event_constraint * 1975intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, 1976 int idx, struct event_constraint *c) 1977{ 1978 struct event_constraint *cx; 1979 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 1980 struct intel_excl_states *xl, *xlo; 1981 int is_excl, i; 1982 int tid = cpuc->excl_thread_id; 1983 int o_tid = 1 - tid; /* alternate */ 1984 1985 /* 1986 * validating a group does not require 1987 * enforcing cross-thread exclusion 1988 */ 1989 if (cpuc->is_fake || !is_ht_workaround_enabled()) 1990 return c; 1991 1992 /* 1993 * no exclusion needed 1994 */ 1995 if (!excl_cntrs) 1996 return c; 1997 /* 1998 * event requires exclusive counter access 1999 * across HT threads 2000 */ 2001 is_excl = c->flags & PERF_X86_EVENT_EXCL; 2002 if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { 2003 event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; 2004 if (!cpuc->n_excl++) 2005 WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); 2006 } 2007 2008 /* 2009 * xl = state of current HT 2010 * xlo = state of sibling HT 2011 */ 2012 xl = &excl_cntrs->states[tid]; 2013 xlo = &excl_cntrs->states[o_tid]; 2014 2015 cx = c; 2016 2017 /* 2018 * because we modify the constraint, we need 2019 * to make a copy. Static constraints come 2020 * from static const tables. 2021 * 2022 * only needed when constraint has not yet 2023 * been cloned (marked dynamic) 2024 */ 2025 if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { 2026 2027 /* sanity check */ 2028 if (idx < 0) 2029 return &emptyconstraint; 2030 2031 /* 2032 * grab pre-allocated constraint entry 2033 */ 2034 cx = &cpuc->constraint_list[idx]; 2035 2036 /* 2037 * initialize dynamic constraint 2038 * with static constraint 2039 */ 2040 memcpy(cx, c, sizeof(*cx)); 2041 2042 /* 2043 * mark constraint as dynamic, so we 2044 * can free it later on 2045 */ 2046 cx->flags |= PERF_X86_EVENT_DYNAMIC; 2047 } 2048 2049 /* 2050 * From here on, the constraint is dynamic. 2051 * Either it was just allocated above, or it 2052 * was allocated during a earlier invocation 2053 * of this function 2054 */ 2055 2056 /* 2057 * Modify static constraint with current dynamic 2058 * state of thread 2059 * 2060 * EXCLUSIVE: sibling counter measuring exclusive event 2061 * SHARED : sibling counter measuring non-exclusive event 2062 * UNUSED : sibling counter unused 2063 */ 2064 for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) { 2065 /* 2066 * exclusive event in sibling counter 2067 * our corresponding counter cannot be used 2068 * regardless of our event 2069 */ 2070 if (xl->state[i] == INTEL_EXCL_EXCLUSIVE) 2071 __clear_bit(i, cx->idxmsk); 2072 /* 2073 * if measuring an exclusive event, sibling 2074 * measuring non-exclusive, then counter cannot 2075 * be used 2076 */ 2077 if (is_excl && xl->state[i] == INTEL_EXCL_SHARED) 2078 __clear_bit(i, cx->idxmsk); 2079 } 2080 2081 /* 2082 * recompute actual bit weight for scheduling algorithm 2083 */ 2084 cx->weight = hweight64(cx->idxmsk64); 2085 2086 /* 2087 * if we return an empty mask, then switch 2088 * back to static empty constraint to avoid 2089 * the cost of freeing later on 2090 */ 2091 if (cx->weight == 0) 2092 cx = &emptyconstraint; 2093 2094 return cx; 2095} 2096 2097static struct event_constraint * 2098intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 2099 struct perf_event *event) 2100{ 2101 struct event_constraint *c1 = NULL; 2102 struct event_constraint *c2; 2103 2104 if (idx >= 0) /* fake does < 0 */ 2105 c1 = cpuc->event_constraint[idx]; 2106 2107 /* 2108 * first time only 2109 * - static constraint: no change across incremental scheduling calls 2110 * - dynamic constraint: handled by intel_get_excl_constraints() 2111 */ 2112 c2 = __intel_get_event_constraints(cpuc, idx, event); 2113 if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) { 2114 bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX); 2115 c1->weight = c2->weight; 2116 c2 = c1; 2117 } 2118 2119 if (cpuc->excl_cntrs) 2120 return intel_get_excl_constraints(cpuc, event, idx, c2); 2121 2122 return c2; 2123} 2124 2125static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, 2126 struct perf_event *event) 2127{ 2128 struct hw_perf_event *hwc = &event->hw; 2129 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 2130 struct intel_excl_states *xlo, *xl; 2131 unsigned long flags = 0; /* keep compiler happy */ 2132 int tid = cpuc->excl_thread_id; 2133 int o_tid = 1 - tid; 2134 2135 /* 2136 * nothing needed if in group validation mode 2137 */ 2138 if (cpuc->is_fake) 2139 return; 2140 2141 WARN_ON_ONCE(!excl_cntrs); 2142 2143 if (!excl_cntrs) 2144 return; 2145 2146 xl = &excl_cntrs->states[tid]; 2147 xlo = &excl_cntrs->states[o_tid]; 2148 if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { 2149 hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; 2150 if (!--cpuc->n_excl) 2151 WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); 2152 } 2153 2154 /* 2155 * put_constraint may be called from x86_schedule_events() 2156 * which already has the lock held so here make locking 2157 * conditional 2158 */ 2159 if (!xl->sched_started) 2160 raw_spin_lock_irqsave(&excl_cntrs->lock, flags); 2161 2162 /* 2163 * if event was actually assigned, then mark the 2164 * counter state as unused now 2165 */ 2166 if (hwc->idx >= 0) 2167 xlo->state[hwc->idx] = INTEL_EXCL_UNUSED; 2168 2169 if (!xl->sched_started) 2170 raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags); 2171} 2172 2173static void 2174intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, 2175 struct perf_event *event) 2176{ 2177 struct hw_perf_event_extra *reg; 2178 2179 reg = &event->hw.extra_reg; 2180 if (reg->idx != EXTRA_REG_NONE) 2181 __intel_shared_reg_put_constraints(cpuc, reg); 2182 2183 reg = &event->hw.branch_reg; 2184 if (reg->idx != EXTRA_REG_NONE) 2185 __intel_shared_reg_put_constraints(cpuc, reg); 2186} 2187 2188static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 2189 struct perf_event *event) 2190{ 2191 intel_put_shared_regs_event_constraints(cpuc, event); 2192 2193 /* 2194 * is PMU has exclusive counter restrictions, then 2195 * all events are subject to and must call the 2196 * put_excl_constraints() routine 2197 */ 2198 if (cpuc->excl_cntrs) 2199 intel_put_excl_constraints(cpuc, event); 2200} 2201 2202static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) 2203{ 2204 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 2205 struct event_constraint *c = cpuc->event_constraint[idx]; 2206 struct intel_excl_states *xlo, *xl; 2207 int tid = cpuc->excl_thread_id; 2208 int o_tid = 1 - tid; 2209 int is_excl; 2210 2211 if (cpuc->is_fake || !c) 2212 return; 2213 2214 is_excl = c->flags & PERF_X86_EVENT_EXCL; 2215 2216 if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) 2217 return; 2218 2219 WARN_ON_ONCE(!excl_cntrs); 2220 2221 if (!excl_cntrs) 2222 return; 2223 2224 xl = &excl_cntrs->states[tid]; 2225 xlo = &excl_cntrs->states[o_tid]; 2226 2227 WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock)); 2228 2229 if (cntr >= 0) { 2230 if (is_excl) 2231 xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; 2232 else 2233 xlo->init_state[cntr] = INTEL_EXCL_SHARED; 2234 } 2235} 2236 2237static void intel_pebs_aliases_core2(struct perf_event *event) 2238{ 2239 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { 2240 /* 2241 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 2242 * (0x003c) so that we can use it with PEBS. 2243 * 2244 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't 2245 * PEBS capable. However we can use INST_RETIRED.ANY_P 2246 * (0x00c0), which is a PEBS capable event, to get the same 2247 * count. 2248 * 2249 * INST_RETIRED.ANY_P counts the number of cycles that retires 2250 * CNTMASK instructions. By setting CNTMASK to a value (16) 2251 * larger than the maximum number of instructions that can be 2252 * retired per cycle (4) and then inverting the condition, we 2253 * count all cycles that retire 16 or less instructions, which 2254 * is every cycle. 2255 * 2256 * Thereby we gain a PEBS capable cycle counter. 2257 */ 2258 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); 2259 2260 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 2261 event->hw.config = alt_config; 2262 } 2263} 2264 2265static void intel_pebs_aliases_snb(struct perf_event *event) 2266{ 2267 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { 2268 /* 2269 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 2270 * (0x003c) so that we can use it with PEBS. 2271 * 2272 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't 2273 * PEBS capable. However we can use UOPS_RETIRED.ALL 2274 * (0x01c2), which is a PEBS capable event, to get the same 2275 * count. 2276 * 2277 * UOPS_RETIRED.ALL counts the number of cycles that retires 2278 * CNTMASK micro-ops. By setting CNTMASK to a value (16) 2279 * larger than the maximum number of micro-ops that can be 2280 * retired per cycle (4) and then inverting the condition, we 2281 * count all cycles that retire 16 or less micro-ops, which 2282 * is every cycle. 2283 * 2284 * Thereby we gain a PEBS capable cycle counter. 2285 */ 2286 u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); 2287 2288 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 2289 event->hw.config = alt_config; 2290 } 2291} 2292 2293static int intel_pmu_hw_config(struct perf_event *event) 2294{ 2295 int ret = x86_pmu_hw_config(event); 2296 2297 if (ret) 2298 return ret; 2299 2300 if (event->attr.precise_ip && x86_pmu.pebs_aliases) 2301 x86_pmu.pebs_aliases(event); 2302 2303 if (needs_branch_stack(event)) { 2304 ret = intel_pmu_setup_lbr_filter(event); 2305 if (ret) 2306 return ret; 2307 2308 /* 2309 * BTS is set up earlier in this path, so don't account twice 2310 */ 2311 if (!intel_pmu_has_bts(event)) { 2312 /* disallow lbr if conflicting events are present */ 2313 if (x86_add_exclusive(x86_lbr_exclusive_lbr)) 2314 return -EBUSY; 2315 2316 event->destroy = hw_perf_lbr_event_destroy; 2317 } 2318 } 2319 2320 if (event->attr.type != PERF_TYPE_RAW) 2321 return 0; 2322 2323 if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) 2324 return 0; 2325 2326 if (x86_pmu.version < 3) 2327 return -EINVAL; 2328 2329 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) 2330 return -EACCES; 2331 2332 event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; 2333 2334 return 0; 2335} 2336 2337struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) 2338{ 2339 if (x86_pmu.guest_get_msrs) 2340 return x86_pmu.guest_get_msrs(nr); 2341 *nr = 0; 2342 return NULL; 2343} 2344EXPORT_SYMBOL_GPL(perf_guest_get_msrs); 2345 2346static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) 2347{ 2348 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2349 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; 2350 2351 arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; 2352 arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; 2353 arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; 2354 /* 2355 * If PMU counter has PEBS enabled it is not enough to disable counter 2356 * on a guest entry since PEBS memory write can overshoot guest entry 2357 * and corrupt guest memory. Disabling PEBS solves the problem. 2358 */ 2359 arr[1].msr = MSR_IA32_PEBS_ENABLE; 2360 arr[1].host = cpuc->pebs_enabled; 2361 arr[1].guest = 0; 2362 2363 *nr = 2; 2364 return arr; 2365} 2366 2367static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) 2368{ 2369 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2370 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; 2371 int idx; 2372 2373 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 2374 struct perf_event *event = cpuc->events[idx]; 2375 2376 arr[idx].msr = x86_pmu_config_addr(idx); 2377 arr[idx].host = arr[idx].guest = 0; 2378 2379 if (!test_bit(idx, cpuc->active_mask)) 2380 continue; 2381 2382 arr[idx].host = arr[idx].guest = 2383 event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE; 2384 2385 if (event->attr.exclude_host) 2386 arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 2387 else if (event->attr.exclude_guest) 2388 arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 2389 } 2390 2391 *nr = x86_pmu.num_counters; 2392 return arr; 2393} 2394 2395static void core_pmu_enable_event(struct perf_event *event) 2396{ 2397 if (!event->attr.exclude_host) 2398 x86_pmu_enable_event(event); 2399} 2400 2401static void core_pmu_enable_all(int added) 2402{ 2403 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2404 int idx; 2405 2406 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 2407 struct hw_perf_event *hwc = &cpuc->events[idx]->hw; 2408 2409 if (!test_bit(idx, cpuc->active_mask) || 2410 cpuc->events[idx]->attr.exclude_host) 2411 continue; 2412 2413 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); 2414 } 2415} 2416 2417static int hsw_hw_config(struct perf_event *event) 2418{ 2419 int ret = intel_pmu_hw_config(event); 2420 2421 if (ret) 2422 return ret; 2423 if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE)) 2424 return 0; 2425 event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); 2426 2427 /* 2428 * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with 2429 * PEBS or in ANY thread mode. Since the results are non-sensical forbid 2430 * this combination. 2431 */ 2432 if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) && 2433 ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) || 2434 event->attr.precise_ip > 0)) 2435 return -EOPNOTSUPP; 2436 2437 if (event_is_checkpointed(event)) { 2438 /* 2439 * Sampling of checkpointed events can cause situations where 2440 * the CPU constantly aborts because of a overflow, which is 2441 * then checkpointed back and ignored. Forbid checkpointing 2442 * for sampling. 2443 * 2444 * But still allow a long sampling period, so that perf stat 2445 * from KVM works. 2446 */ 2447 if (event->attr.sample_period > 0 && 2448 event->attr.sample_period < 0x7fffffff) 2449 return -EOPNOTSUPP; 2450 } 2451 return 0; 2452} 2453 2454static struct event_constraint counter2_constraint = 2455 EVENT_CONSTRAINT(0, 0x4, 0); 2456 2457static struct event_constraint * 2458hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 2459 struct perf_event *event) 2460{ 2461 struct event_constraint *c; 2462 2463 c = intel_get_event_constraints(cpuc, idx, event); 2464 2465 /* Handle special quirk on in_tx_checkpointed only in counter 2 */ 2466 if (event->hw.config & HSW_IN_TX_CHECKPOINTED) { 2467 if (c->idxmsk64 & (1U << 2)) 2468 return &counter2_constraint; 2469 return &emptyconstraint; 2470 } 2471 2472 return c; 2473} 2474 2475/* 2476 * Broadwell: 2477 * 2478 * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared 2479 * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine 2480 * the two to enforce a minimum period of 128 (the smallest value that has bits 2481 * 0-5 cleared and >= 100). 2482 * 2483 * Because of how the code in x86_perf_event_set_period() works, the truncation 2484 * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period 2485 * to make up for the 'lost' events due to carrying the 'error' in period_left. 2486 * 2487 * Therefore the effective (average) period matches the requested period, 2488 * despite coarser hardware granularity. 2489 */ 2490static unsigned bdw_limit_period(struct perf_event *event, unsigned left) 2491{ 2492 if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == 2493 X86_CONFIG(.event=0xc0, .umask=0x01)) { 2494 if (left < 128) 2495 left = 128; 2496 left &= ~0x3fu; 2497 } 2498 return left; 2499} 2500 2501PMU_FORMAT_ATTR(event, "config:0-7" ); 2502PMU_FORMAT_ATTR(umask, "config:8-15" ); 2503PMU_FORMAT_ATTR(edge, "config:18" ); 2504PMU_FORMAT_ATTR(pc, "config:19" ); 2505PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ 2506PMU_FORMAT_ATTR(inv, "config:23" ); 2507PMU_FORMAT_ATTR(cmask, "config:24-31" ); 2508PMU_FORMAT_ATTR(in_tx, "config:32"); 2509PMU_FORMAT_ATTR(in_tx_cp, "config:33"); 2510 2511static struct attribute *intel_arch_formats_attr[] = { 2512 &format_attr_event.attr, 2513 &format_attr_umask.attr, 2514 &format_attr_edge.attr, 2515 &format_attr_pc.attr, 2516 &format_attr_inv.attr, 2517 &format_attr_cmask.attr, 2518 NULL, 2519}; 2520 2521ssize_t intel_event_sysfs_show(char *page, u64 config) 2522{ 2523 u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT); 2524 2525 return x86_event_sysfs_show(page, config, event); 2526} 2527 2528struct intel_shared_regs *allocate_shared_regs(int cpu) 2529{ 2530 struct intel_shared_regs *regs; 2531 int i; 2532 2533 regs = kzalloc_node(sizeof(struct intel_shared_regs), 2534 GFP_KERNEL, cpu_to_node(cpu)); 2535 if (regs) { 2536 /* 2537 * initialize the locks to keep lockdep happy 2538 */ 2539 for (i = 0; i < EXTRA_REG_MAX; i++) 2540 raw_spin_lock_init(®s->regs[i].lock); 2541 2542 regs->core_id = -1; 2543 } 2544 return regs; 2545} 2546 2547static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) 2548{ 2549 struct intel_excl_cntrs *c; 2550 int i; 2551 2552 c = kzalloc_node(sizeof(struct intel_excl_cntrs), 2553 GFP_KERNEL, cpu_to_node(cpu)); 2554 if (c) { 2555 raw_spin_lock_init(&c->lock); 2556 for (i = 0; i < X86_PMC_IDX_MAX; i++) { 2557 c->states[0].state[i] = INTEL_EXCL_UNUSED; 2558 c->states[0].init_state[i] = INTEL_EXCL_UNUSED; 2559 2560 c->states[1].state[i] = INTEL_EXCL_UNUSED; 2561 c->states[1].init_state[i] = INTEL_EXCL_UNUSED; 2562 } 2563 c->core_id = -1; 2564 } 2565 return c; 2566} 2567 2568static int intel_pmu_cpu_prepare(int cpu) 2569{ 2570 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 2571 2572 if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { 2573 cpuc->shared_regs = allocate_shared_regs(cpu); 2574 if (!cpuc->shared_regs) 2575 return NOTIFY_BAD; 2576 } 2577 2578 if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { 2579 size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); 2580 2581 cpuc->constraint_list = kzalloc(sz, GFP_KERNEL); 2582 if (!cpuc->constraint_list) 2583 return NOTIFY_BAD; 2584 2585 cpuc->excl_cntrs = allocate_excl_cntrs(cpu); 2586 if (!cpuc->excl_cntrs) { 2587 kfree(cpuc->constraint_list); 2588 kfree(cpuc->shared_regs); 2589 return NOTIFY_BAD; 2590 } 2591 cpuc->excl_thread_id = 0; 2592 } 2593 2594 return NOTIFY_OK; 2595} 2596 2597static void intel_pmu_cpu_starting(int cpu) 2598{ 2599 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 2600 int core_id = topology_core_id(cpu); 2601 int i; 2602 2603 init_debug_store_on_cpu(cpu); 2604 /* 2605 * Deal with CPUs that don't clear their LBRs on power-up. 2606 */ 2607 intel_pmu_lbr_reset(); 2608 2609 cpuc->lbr_sel = NULL; 2610 2611 if (!cpuc->shared_regs) 2612 return; 2613 2614 if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) { 2615 void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; 2616 2617 for_each_cpu(i, topology_thread_cpumask(cpu)) { 2618 struct intel_shared_regs *pc; 2619 2620 pc = per_cpu(cpu_hw_events, i).shared_regs; 2621 if (pc && pc->core_id == core_id) { 2622 *onln = cpuc->shared_regs; 2623 cpuc->shared_regs = pc; 2624 break; 2625 } 2626 } 2627 cpuc->shared_regs->core_id = core_id; 2628 cpuc->shared_regs->refcnt++; 2629 } 2630 2631 if (x86_pmu.lbr_sel_map) 2632 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; 2633 2634 if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { 2635 for_each_cpu(i, topology_thread_cpumask(cpu)) { 2636 struct intel_excl_cntrs *c; 2637 2638 c = per_cpu(cpu_hw_events, i).excl_cntrs; 2639 if (c && c->core_id == core_id) { 2640 cpuc->kfree_on_online[1] = cpuc->excl_cntrs; 2641 cpuc->excl_cntrs = c; 2642 cpuc->excl_thread_id = 1; 2643 break; 2644 } 2645 } 2646 cpuc->excl_cntrs->core_id = core_id; 2647 cpuc->excl_cntrs->refcnt++; 2648 } 2649} 2650 2651static void free_excl_cntrs(int cpu) 2652{ 2653 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 2654 struct intel_excl_cntrs *c; 2655 2656 c = cpuc->excl_cntrs; 2657 if (c) { 2658 if (c->core_id == -1 || --c->refcnt == 0) 2659 kfree(c); 2660 cpuc->excl_cntrs = NULL; 2661 kfree(cpuc->constraint_list); 2662 cpuc->constraint_list = NULL; 2663 } 2664} 2665 2666static void intel_pmu_cpu_dying(int cpu) 2667{ 2668 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 2669 struct intel_shared_regs *pc; 2670 2671 pc = cpuc->shared_regs; 2672 if (pc) { 2673 if (pc->core_id == -1 || --pc->refcnt == 0) 2674 kfree(pc); 2675 cpuc->shared_regs = NULL; 2676 } 2677 2678 free_excl_cntrs(cpu); 2679 2680 fini_debug_store_on_cpu(cpu); 2681} 2682 2683PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); 2684 2685PMU_FORMAT_ATTR(ldlat, "config1:0-15"); 2686 2687static struct attribute *intel_arch3_formats_attr[] = { 2688 &format_attr_event.attr, 2689 &format_attr_umask.attr, 2690 &format_attr_edge.attr, 2691 &format_attr_pc.attr, 2692 &format_attr_any.attr, 2693 &format_attr_inv.attr, 2694 &format_attr_cmask.attr, 2695 &format_attr_in_tx.attr, 2696 &format_attr_in_tx_cp.attr, 2697 2698 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ 2699 &format_attr_ldlat.attr, /* PEBS load latency */ 2700 NULL, 2701}; 2702 2703static __initconst const struct x86_pmu core_pmu = { 2704 .name = "core", 2705 .handle_irq = x86_pmu_handle_irq, 2706 .disable_all = x86_pmu_disable_all, 2707 .enable_all = core_pmu_enable_all, 2708 .enable = core_pmu_enable_event, 2709 .disable = x86_pmu_disable_event, 2710 .hw_config = x86_pmu_hw_config, 2711 .schedule_events = x86_schedule_events, 2712 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 2713 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 2714 .event_map = intel_pmu_event_map, 2715 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 2716 .apic = 1, 2717 /* 2718 * Intel PMCs cannot be accessed sanely above 32-bit width, 2719 * so we install an artificial 1<<31 period regardless of 2720 * the generic event period: 2721 */ 2722 .max_period = (1ULL<<31) - 1, 2723 .get_event_constraints = intel_get_event_constraints, 2724 .put_event_constraints = intel_put_event_constraints, 2725 .event_constraints = intel_core_event_constraints, 2726 .guest_get_msrs = core_guest_get_msrs, 2727 .format_attrs = intel_arch_formats_attr, 2728 .events_sysfs_show = intel_event_sysfs_show, 2729 2730 /* 2731 * Virtual (or funny metal) CPU can define x86_pmu.extra_regs 2732 * together with PMU version 1 and thus be using core_pmu with 2733 * shared_regs. We need following callbacks here to allocate 2734 * it properly. 2735 */ 2736 .cpu_prepare = intel_pmu_cpu_prepare, 2737 .cpu_starting = intel_pmu_cpu_starting, 2738 .cpu_dying = intel_pmu_cpu_dying, 2739}; 2740 2741static __initconst const struct x86_pmu intel_pmu = { 2742 .name = "Intel", 2743 .handle_irq = intel_pmu_handle_irq, 2744 .disable_all = intel_pmu_disable_all, 2745 .enable_all = intel_pmu_enable_all, 2746 .enable = intel_pmu_enable_event, 2747 .disable = intel_pmu_disable_event, 2748 .hw_config = intel_pmu_hw_config, 2749 .schedule_events = x86_schedule_events, 2750 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 2751 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 2752 .event_map = intel_pmu_event_map, 2753 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 2754 .apic = 1, 2755 /* 2756 * Intel PMCs cannot be accessed sanely above 32 bit width, 2757 * so we install an artificial 1<<31 period regardless of 2758 * the generic event period: 2759 */ 2760 .max_period = (1ULL << 31) - 1, 2761 .get_event_constraints = intel_get_event_constraints, 2762 .put_event_constraints = intel_put_event_constraints, 2763 .pebs_aliases = intel_pebs_aliases_core2, 2764 2765 .format_attrs = intel_arch3_formats_attr, 2766 .events_sysfs_show = intel_event_sysfs_show, 2767 2768 .cpu_prepare = intel_pmu_cpu_prepare, 2769 .cpu_starting = intel_pmu_cpu_starting, 2770 .cpu_dying = intel_pmu_cpu_dying, 2771 .guest_get_msrs = intel_guest_get_msrs, 2772 .sched_task = intel_pmu_lbr_sched_task, 2773}; 2774 2775static __init void intel_clovertown_quirk(void) 2776{ 2777 /* 2778 * PEBS is unreliable due to: 2779 * 2780 * AJ67 - PEBS may experience CPL leaks 2781 * AJ68 - PEBS PMI may be delayed by one event 2782 * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] 2783 * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS 2784 * 2785 * AJ67 could be worked around by restricting the OS/USR flags. 2786 * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. 2787 * 2788 * AJ106 could possibly be worked around by not allowing LBR 2789 * usage from PEBS, including the fixup. 2790 * AJ68 could possibly be worked around by always programming 2791 * a pebs_event_reset[0] value and coping with the lost events. 2792 * 2793 * But taken together it might just make sense to not enable PEBS on 2794 * these chips. 2795 */ 2796 pr_warn("PEBS disabled due to CPU errata\n"); 2797 x86_pmu.pebs = 0; 2798 x86_pmu.pebs_constraints = NULL; 2799} 2800 2801static int intel_snb_pebs_broken(int cpu) 2802{ 2803 u32 rev = UINT_MAX; /* default to broken for unknown models */ 2804 2805 switch (cpu_data(cpu).x86_model) { 2806 case 42: /* SNB */ 2807 rev = 0x28; 2808 break; 2809 2810 case 45: /* SNB-EP */ 2811 switch (cpu_data(cpu).x86_mask) { 2812 case 6: rev = 0x618; break; 2813 case 7: rev = 0x70c; break; 2814 } 2815 } 2816 2817 return (cpu_data(cpu).microcode < rev); 2818} 2819 2820static void intel_snb_check_microcode(void) 2821{ 2822 int pebs_broken = 0; 2823 int cpu; 2824 2825 get_online_cpus(); 2826 for_each_online_cpu(cpu) { 2827 if ((pebs_broken = intel_snb_pebs_broken(cpu))) 2828 break; 2829 } 2830 put_online_cpus(); 2831 2832 if (pebs_broken == x86_pmu.pebs_broken) 2833 return; 2834 2835 /* 2836 * Serialized by the microcode lock.. 2837 */ 2838 if (x86_pmu.pebs_broken) { 2839 pr_info("PEBS enabled due to microcode update\n"); 2840 x86_pmu.pebs_broken = 0; 2841 } else { 2842 pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n"); 2843 x86_pmu.pebs_broken = 1; 2844 } 2845} 2846 2847/* 2848 * Under certain circumstances, access certain MSR may cause #GP. 2849 * The function tests if the input MSR can be safely accessed. 2850 */ 2851static bool check_msr(unsigned long msr, u64 mask) 2852{ 2853 u64 val_old, val_new, val_tmp; 2854 2855 /* 2856 * Read the current value, change it and read it back to see if it 2857 * matches, this is needed to detect certain hardware emulators 2858 * (qemu/kvm) that don't trap on the MSR access and always return 0s. 2859 */ 2860 if (rdmsrl_safe(msr, &val_old)) 2861 return false; 2862 2863 /* 2864 * Only change the bits which can be updated by wrmsrl. 2865 */ 2866 val_tmp = val_old ^ mask; 2867 if (wrmsrl_safe(msr, val_tmp) || 2868 rdmsrl_safe(msr, &val_new)) 2869 return false; 2870 2871 if (val_new != val_tmp) 2872 return false; 2873 2874 /* Here it's sure that the MSR can be safely accessed. 2875 * Restore the old value and return. 2876 */ 2877 wrmsrl(msr, val_old); 2878 2879 return true; 2880} 2881 2882static __init void intel_sandybridge_quirk(void) 2883{ 2884 x86_pmu.check_microcode = intel_snb_check_microcode; 2885 intel_snb_check_microcode(); 2886} 2887 2888static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { 2889 { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, 2890 { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, 2891 { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, 2892 { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, 2893 { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, 2894 { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, 2895 { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, 2896}; 2897 2898static __init void intel_arch_events_quirk(void) 2899{ 2900 int bit; 2901 2902 /* disable event that reported as not presend by cpuid */ 2903 for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { 2904 intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; 2905 pr_warn("CPUID marked event: \'%s\' unavailable\n", 2906 intel_arch_events_map[bit].name); 2907 } 2908} 2909 2910static __init void intel_nehalem_quirk(void) 2911{ 2912 union cpuid10_ebx ebx; 2913 2914 ebx.full = x86_pmu.events_maskl; 2915 if (ebx.split.no_branch_misses_retired) { 2916 /* 2917 * Erratum AAJ80 detected, we work it around by using 2918 * the BR_MISP_EXEC.ANY event. This will over-count 2919 * branch-misses, but it's still much better than the 2920 * architectural event which is often completely bogus: 2921 */ 2922 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; 2923 ebx.split.no_branch_misses_retired = 0; 2924 x86_pmu.events_maskl = ebx.full; 2925 pr_info("CPU erratum AAJ80 worked around\n"); 2926 } 2927} 2928 2929/* 2930 * enable software workaround for errata: 2931 * SNB: BJ122 2932 * IVB: BV98 2933 * HSW: HSD29 2934 * 2935 * Only needed when HT is enabled. However detecting 2936 * if HT is enabled is difficult (model specific). So instead, 2937 * we enable the workaround in the early boot, and verify if 2938 * it is needed in a later initcall phase once we have valid 2939 * topology information to check if HT is actually enabled 2940 */ 2941static __init void intel_ht_bug(void) 2942{ 2943 x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED; 2944 2945 x86_pmu.commit_scheduling = intel_commit_scheduling; 2946 x86_pmu.start_scheduling = intel_start_scheduling; 2947 x86_pmu.stop_scheduling = intel_stop_scheduling; 2948} 2949 2950EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); 2951EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") 2952 2953/* Haswell special events */ 2954EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); 2955EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); 2956EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); 2957EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); 2958EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); 2959EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); 2960EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); 2961EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); 2962EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); 2963EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); 2964EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); 2965EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1"); 2966 2967static struct attribute *hsw_events_attrs[] = { 2968 EVENT_PTR(tx_start), 2969 EVENT_PTR(tx_commit), 2970 EVENT_PTR(tx_abort), 2971 EVENT_PTR(tx_capacity), 2972 EVENT_PTR(tx_conflict), 2973 EVENT_PTR(el_start), 2974 EVENT_PTR(el_commit), 2975 EVENT_PTR(el_abort), 2976 EVENT_PTR(el_capacity), 2977 EVENT_PTR(el_conflict), 2978 EVENT_PTR(cycles_t), 2979 EVENT_PTR(cycles_ct), 2980 EVENT_PTR(mem_ld_hsw), 2981 EVENT_PTR(mem_st_hsw), 2982 NULL 2983}; 2984 2985__init int intel_pmu_init(void) 2986{ 2987 union cpuid10_edx edx; 2988 union cpuid10_eax eax; 2989 union cpuid10_ebx ebx; 2990 struct event_constraint *c; 2991 unsigned int unused; 2992 struct extra_reg *er; 2993 int version, i; 2994 2995 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 2996 switch (boot_cpu_data.x86) { 2997 case 0x6: 2998 return p6_pmu_init(); 2999 case 0xb: 3000 return knc_pmu_init(); 3001 case 0xf: 3002 return p4_pmu_init(); 3003 } 3004 return -ENODEV; 3005 } 3006 3007 /* 3008 * Check whether the Architectural PerfMon supports 3009 * Branch Misses Retired hw_event or not. 3010 */ 3011 cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); 3012 if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) 3013 return -ENODEV; 3014 3015 version = eax.split.version_id; 3016 if (version < 2) 3017 x86_pmu = core_pmu; 3018 else 3019 x86_pmu = intel_pmu; 3020 3021 x86_pmu.version = version; 3022 x86_pmu.num_counters = eax.split.num_counters; 3023 x86_pmu.cntval_bits = eax.split.bit_width; 3024 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; 3025 3026 x86_pmu.events_maskl = ebx.full; 3027 x86_pmu.events_mask_len = eax.split.mask_length; 3028 3029 x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); 3030 3031 /* 3032 * Quirk: v2 perfmon does not report fixed-purpose events, so 3033 * assume at least 3 events: 3034 */ 3035 if (version > 1) 3036 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); 3037 3038 if (boot_cpu_has(X86_FEATURE_PDCM)) { 3039 u64 capabilities; 3040 3041 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); 3042 x86_pmu.intel_cap.capabilities = capabilities; 3043 } 3044 3045 intel_ds_init(); 3046 3047 x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ 3048 3049 /* 3050 * Install the hw-cache-events table: 3051 */ 3052 switch (boot_cpu_data.x86_model) { 3053 case 14: /* 65nm Core "Yonah" */ 3054 pr_cont("Core events, "); 3055 break; 3056 3057 case 15: /* 65nm Core2 "Merom" */ 3058 x86_add_quirk(intel_clovertown_quirk); 3059 case 22: /* 65nm Core2 "Merom-L" */ 3060 case 23: /* 45nm Core2 "Penryn" */ 3061 case 29: /* 45nm Core2 "Dunnington (MP) */ 3062 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, 3063 sizeof(hw_cache_event_ids)); 3064 3065 intel_pmu_lbr_init_core(); 3066 3067 x86_pmu.event_constraints = intel_core2_event_constraints; 3068 x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; 3069 pr_cont("Core2 events, "); 3070 break; 3071 3072 case 30: /* 45nm Nehalem */ 3073 case 26: /* 45nm Nehalem-EP */ 3074 case 46: /* 45nm Nehalem-EX */ 3075 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 3076 sizeof(hw_cache_event_ids)); 3077 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, 3078 sizeof(hw_cache_extra_regs)); 3079 3080 intel_pmu_lbr_init_nhm(); 3081 3082 x86_pmu.event_constraints = intel_nehalem_event_constraints; 3083 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; 3084 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 3085 x86_pmu.extra_regs = intel_nehalem_extra_regs; 3086 3087 x86_pmu.cpu_events = nhm_events_attrs; 3088 3089 /* UOPS_ISSUED.STALLED_CYCLES */ 3090 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 3091 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 3092 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 3093 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 3094 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); 3095 3096 x86_add_quirk(intel_nehalem_quirk); 3097 3098 pr_cont("Nehalem events, "); 3099 break; 3100 3101 case 28: /* 45nm Atom "Pineview" */ 3102 case 38: /* 45nm Atom "Lincroft" */ 3103 case 39: /* 32nm Atom "Penwell" */ 3104 case 53: /* 32nm Atom "Cloverview" */ 3105 case 54: /* 32nm Atom "Cedarview" */ 3106 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 3107 sizeof(hw_cache_event_ids)); 3108 3109 intel_pmu_lbr_init_atom(); 3110 3111 x86_pmu.event_constraints = intel_gen_event_constraints; 3112 x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; 3113 pr_cont("Atom events, "); 3114 break; 3115 3116 case 55: /* 22nm Atom "Silvermont" */ 3117 case 76: /* 14nm Atom "Airmont" */ 3118 case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ 3119 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, 3120 sizeof(hw_cache_event_ids)); 3121 memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, 3122 sizeof(hw_cache_extra_regs)); 3123 3124 intel_pmu_lbr_init_atom(); 3125 3126 x86_pmu.event_constraints = intel_slm_event_constraints; 3127 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; 3128 x86_pmu.extra_regs = intel_slm_extra_regs; 3129 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3130 pr_cont("Silvermont events, "); 3131 break; 3132 3133 case 37: /* 32nm Westmere */ 3134 case 44: /* 32nm Westmere-EP */ 3135 case 47: /* 32nm Westmere-EX */ 3136 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 3137 sizeof(hw_cache_event_ids)); 3138 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, 3139 sizeof(hw_cache_extra_regs)); 3140 3141 intel_pmu_lbr_init_nhm(); 3142 3143 x86_pmu.event_constraints = intel_westmere_event_constraints; 3144 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 3145 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; 3146 x86_pmu.extra_regs = intel_westmere_extra_regs; 3147 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3148 3149 x86_pmu.cpu_events = nhm_events_attrs; 3150 3151 /* UOPS_ISSUED.STALLED_CYCLES */ 3152 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 3153 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 3154 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 3155 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 3156 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); 3157 3158 pr_cont("Westmere events, "); 3159 break; 3160 3161 case 42: /* 32nm SandyBridge */ 3162 case 45: /* 32nm SandyBridge-E/EN/EP */ 3163 x86_add_quirk(intel_sandybridge_quirk); 3164 x86_add_quirk(intel_ht_bug); 3165 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 3166 sizeof(hw_cache_event_ids)); 3167 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, 3168 sizeof(hw_cache_extra_regs)); 3169 3170 intel_pmu_lbr_init_snb(); 3171 3172 x86_pmu.event_constraints = intel_snb_event_constraints; 3173 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 3174 x86_pmu.pebs_aliases = intel_pebs_aliases_snb; 3175 if (boot_cpu_data.x86_model == 45) 3176 x86_pmu.extra_regs = intel_snbep_extra_regs; 3177 else 3178 x86_pmu.extra_regs = intel_snb_extra_regs; 3179 3180 3181 /* all extra regs are per-cpu when HT is on */ 3182 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3183 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3184 3185 x86_pmu.cpu_events = snb_events_attrs; 3186 3187 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 3188 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 3189 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 3190 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ 3191 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 3192 X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); 3193 3194 pr_cont("SandyBridge events, "); 3195 break; 3196 3197 case 58: /* 22nm IvyBridge */ 3198 case 62: /* 22nm IvyBridge-EP/EX */ 3199 x86_add_quirk(intel_ht_bug); 3200 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 3201 sizeof(hw_cache_event_ids)); 3202 /* dTLB-load-misses on IVB is different than SNB */ 3203 hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ 3204 3205 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, 3206 sizeof(hw_cache_extra_regs)); 3207 3208 intel_pmu_lbr_init_snb(); 3209 3210 x86_pmu.event_constraints = intel_ivb_event_constraints; 3211 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; 3212 x86_pmu.pebs_aliases = intel_pebs_aliases_snb; 3213 if (boot_cpu_data.x86_model == 62) 3214 x86_pmu.extra_regs = intel_snbep_extra_regs; 3215 else 3216 x86_pmu.extra_regs = intel_snb_extra_regs; 3217 /* all extra regs are per-cpu when HT is on */ 3218 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3219 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3220 3221 x86_pmu.cpu_events = snb_events_attrs; 3222 3223 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 3224 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 3225 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 3226 3227 pr_cont("IvyBridge events, "); 3228 break; 3229 3230 3231 case 60: /* 22nm Haswell Core */ 3232 case 63: /* 22nm Haswell Server */ 3233 case 69: /* 22nm Haswell ULT */ 3234 case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ 3235 x86_add_quirk(intel_ht_bug); 3236 x86_pmu.late_ack = true; 3237 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 3238 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 3239 3240 intel_pmu_lbr_init_hsw(); 3241 3242 x86_pmu.event_constraints = intel_hsw_event_constraints; 3243 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; 3244 x86_pmu.extra_regs = intel_snbep_extra_regs; 3245 x86_pmu.pebs_aliases = intel_pebs_aliases_snb; 3246 /* all extra regs are per-cpu when HT is on */ 3247 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3248 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3249 3250 x86_pmu.hw_config = hsw_hw_config; 3251 x86_pmu.get_event_constraints = hsw_get_event_constraints; 3252 x86_pmu.cpu_events = hsw_events_attrs; 3253 x86_pmu.lbr_double_abort = true; 3254 pr_cont("Haswell events, "); 3255 break; 3256 3257 case 61: /* 14nm Broadwell Core-M */ 3258 case 86: /* 14nm Broadwell Xeon D */ 3259 case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ 3260 case 79: /* 14nm Broadwell Server */ 3261 x86_pmu.late_ack = true; 3262 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 3263 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 3264 3265 /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */ 3266 hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ | 3267 BDW_L3_MISS|HSW_SNOOP_DRAM; 3268 hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS| 3269 HSW_SNOOP_DRAM; 3270 hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ| 3271 BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; 3272 hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| 3273 BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; 3274 3275 intel_pmu_lbr_init_hsw(); 3276 3277 x86_pmu.event_constraints = intel_bdw_event_constraints; 3278 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; 3279 x86_pmu.extra_regs = intel_snbep_extra_regs; 3280 x86_pmu.pebs_aliases = intel_pebs_aliases_snb; 3281 /* all extra regs are per-cpu when HT is on */ 3282 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3283 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3284 3285 x86_pmu.hw_config = hsw_hw_config; 3286 x86_pmu.get_event_constraints = hsw_get_event_constraints; 3287 x86_pmu.cpu_events = hsw_events_attrs; 3288 x86_pmu.limit_period = bdw_limit_period; 3289 pr_cont("Broadwell events, "); 3290 break; 3291 3292 default: 3293 switch (x86_pmu.version) { 3294 case 1: 3295 x86_pmu.event_constraints = intel_v1_event_constraints; 3296 pr_cont("generic architected perfmon v1, "); 3297 break; 3298 default: 3299 /* 3300 * default constraints for v2 and up 3301 */ 3302 x86_pmu.event_constraints = intel_gen_event_constraints; 3303 pr_cont("generic architected perfmon, "); 3304 break; 3305 } 3306 } 3307 3308 if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { 3309 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 3310 x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); 3311 x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; 3312 } 3313 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; 3314 3315 if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { 3316 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 3317 x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED); 3318 x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; 3319 } 3320 3321 x86_pmu.intel_ctrl |= 3322 ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; 3323 3324 if (x86_pmu.event_constraints) { 3325 /* 3326 * event on fixed counter2 (REF_CYCLES) only works on this 3327 * counter, so do not extend mask to generic counters 3328 */ 3329 for_each_event_constraint(c, x86_pmu.event_constraints) { 3330 if (c->cmask == FIXED_EVENT_FLAGS 3331 && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) { 3332 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; 3333 } 3334 c->idxmsk64 &= 3335 ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); 3336 c->weight = hweight64(c->idxmsk64); 3337 } 3338 } 3339 3340 /* 3341 * Access LBR MSR may cause #GP under certain circumstances. 3342 * E.g. KVM doesn't support LBR MSR 3343 * Check all LBT MSR here. 3344 * Disable LBR access if any LBR MSRs can not be accessed. 3345 */ 3346 if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) 3347 x86_pmu.lbr_nr = 0; 3348 for (i = 0; i < x86_pmu.lbr_nr; i++) { 3349 if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && 3350 check_msr(x86_pmu.lbr_to + i, 0xffffUL))) 3351 x86_pmu.lbr_nr = 0; 3352 } 3353 3354 /* 3355 * Access extra MSR may cause #GP under certain circumstances. 3356 * E.g. KVM doesn't support offcore event 3357 * Check all extra_regs here. 3358 */ 3359 if (x86_pmu.extra_regs) { 3360 for (er = x86_pmu.extra_regs; er->msr; er++) { 3361 er->extra_msr_access = check_msr(er->msr, 0x1ffUL); 3362 /* Disable LBR select mapping */ 3363 if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) 3364 x86_pmu.lbr_sel_map = NULL; 3365 } 3366 } 3367 3368 /* Support full width counters using alternative MSR range */ 3369 if (x86_pmu.intel_cap.full_width_write) { 3370 x86_pmu.max_period = x86_pmu.cntval_mask; 3371 x86_pmu.perfctr = MSR_IA32_PMC0; 3372 pr_cont("full-width counters, "); 3373 } 3374 3375 return 0; 3376} 3377 3378/* 3379 * HT bug: phase 2 init 3380 * Called once we have valid topology information to check 3381 * whether or not HT is enabled 3382 * If HT is off, then we disable the workaround 3383 */ 3384static __init int fixup_ht_bug(void) 3385{ 3386 int cpu = smp_processor_id(); 3387 int w, c; 3388 /* 3389 * problem not present on this CPU model, nothing to do 3390 */ 3391 if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) 3392 return 0; 3393 3394 w = cpumask_weight(topology_thread_cpumask(cpu)); 3395 if (w > 1) { 3396 pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); 3397 return 0; 3398 } 3399 3400 watchdog_nmi_disable_all(); 3401 3402 x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); 3403 3404 x86_pmu.commit_scheduling = NULL; 3405 x86_pmu.start_scheduling = NULL; 3406 x86_pmu.stop_scheduling = NULL; 3407 3408 watchdog_nmi_enable_all(); 3409 3410 get_online_cpus(); 3411 3412 for_each_online_cpu(c) { 3413 free_excl_cntrs(c); 3414 } 3415 3416 put_online_cpus(); 3417 pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n"); 3418 return 0; 3419} 3420subsys_initcall(fixup_ht_bug) 3421