root/arch/x86/events/intel/bts.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. buf_nr_pages
  2. buf_size
  3. bts_buffer_setup_aux
  4. bts_buffer_free_aux
  5. bts_buffer_offset
  6. bts_config_buffer
  7. bts_buffer_pad_out
  8. bts_update
  9. __bts_event_start
  10. bts_event_start
  11. __bts_event_stop
  12. bts_event_stop
  13. intel_bts_enable_local
  14. intel_bts_disable_local
  15. bts_buffer_reset
  16. intel_bts_interrupt
  17. bts_event_del
  18. bts_event_add
  19. bts_event_destroy
  20. bts_event_init
  21. bts_event_read
  22. bts_init

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * BTS PMU driver for perf
   4  * Copyright (c) 2013-2014, Intel Corporation.
   5  */
   6 
   7 #undef DEBUG
   8 
   9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10 
  11 #include <linux/bitops.h>
  12 #include <linux/types.h>
  13 #include <linux/slab.h>
  14 #include <linux/debugfs.h>
  15 #include <linux/device.h>
  16 #include <linux/coredump.h>
  17 
  18 #include <linux/sizes.h>
  19 #include <asm/perf_event.h>
  20 
  21 #include "../perf_event.h"
  22 
  23 struct bts_ctx {
  24         struct perf_output_handle       handle;
  25         struct debug_store              ds_back;
  26         int                             state;
  27 };
  28 
  29 /* BTS context states: */
  30 enum {
  31         /* no ongoing AUX transactions */
  32         BTS_STATE_STOPPED = 0,
  33         /* AUX transaction is on, BTS tracing is disabled */
  34         BTS_STATE_INACTIVE,
  35         /* AUX transaction is on, BTS tracing is running */
  36         BTS_STATE_ACTIVE,
  37 };
  38 
  39 static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
  40 
  41 #define BTS_RECORD_SIZE         24
  42 #define BTS_SAFETY_MARGIN       4080
  43 
  44 struct bts_phys {
  45         struct page     *page;
  46         unsigned long   size;
  47         unsigned long   offset;
  48         unsigned long   displacement;
  49 };
  50 
  51 struct bts_buffer {
  52         size_t          real_size;      /* multiple of BTS_RECORD_SIZE */
  53         unsigned int    nr_pages;
  54         unsigned int    nr_bufs;
  55         unsigned int    cur_buf;
  56         bool            snapshot;
  57         local_t         data_size;
  58         local_t         head;
  59         unsigned long   end;
  60         void            **data_pages;
  61         struct bts_phys buf[0];
  62 };
  63 
  64 static struct pmu bts_pmu;
  65 
  66 static int buf_nr_pages(struct page *page)
  67 {
  68         if (!PagePrivate(page))
  69                 return 1;
  70 
  71         return 1 << page_private(page);
  72 }
  73 
  74 static size_t buf_size(struct page *page)
  75 {
  76         return buf_nr_pages(page) * PAGE_SIZE;
  77 }
  78 
  79 static void *
  80 bts_buffer_setup_aux(struct perf_event *event, void **pages,
  81                      int nr_pages, bool overwrite)
  82 {
  83         struct bts_buffer *buf;
  84         struct page *page;
  85         int cpu = event->cpu;
  86         int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
  87         unsigned long offset;
  88         size_t size = nr_pages << PAGE_SHIFT;
  89         int pg, nbuf, pad;
  90 
  91         /* count all the high order buffers */
  92         for (pg = 0, nbuf = 0; pg < nr_pages;) {
  93                 page = virt_to_page(pages[pg]);
  94                 pg += buf_nr_pages(page);
  95                 nbuf++;
  96         }
  97 
  98         /*
  99          * to avoid interrupts in overwrite mode, only allow one physical
 100          */
 101         if (overwrite && nbuf > 1)
 102                 return NULL;
 103 
 104         buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
 105         if (!buf)
 106                 return NULL;
 107 
 108         buf->nr_pages = nr_pages;
 109         buf->nr_bufs = nbuf;
 110         buf->snapshot = overwrite;
 111         buf->data_pages = pages;
 112         buf->real_size = size - size % BTS_RECORD_SIZE;
 113 
 114         for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
 115                 unsigned int __nr_pages;
 116 
 117                 page = virt_to_page(pages[pg]);
 118                 __nr_pages = buf_nr_pages(page);
 119                 buf->buf[nbuf].page = page;
 120                 buf->buf[nbuf].offset = offset;
 121                 buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
 122                 buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
 123                 pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
 124                 buf->buf[nbuf].size -= pad;
 125 
 126                 pg += __nr_pages;
 127                 offset += __nr_pages << PAGE_SHIFT;
 128         }
 129 
 130         return buf;
 131 }
 132 
 133 static void bts_buffer_free_aux(void *data)
 134 {
 135         kfree(data);
 136 }
 137 
 138 static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
 139 {
 140         return buf->buf[idx].offset + buf->buf[idx].displacement;
 141 }
 142 
 143 static void
 144 bts_config_buffer(struct bts_buffer *buf)
 145 {
 146         int cpu = raw_smp_processor_id();
 147         struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 148         struct bts_phys *phys = &buf->buf[buf->cur_buf];
 149         unsigned long index, thresh = 0, end = phys->size;
 150         struct page *page = phys->page;
 151 
 152         index = local_read(&buf->head);
 153 
 154         if (!buf->snapshot) {
 155                 if (buf->end < phys->offset + buf_size(page))
 156                         end = buf->end - phys->offset - phys->displacement;
 157 
 158                 index -= phys->offset + phys->displacement;
 159 
 160                 if (end - index > BTS_SAFETY_MARGIN)
 161                         thresh = end - BTS_SAFETY_MARGIN;
 162                 else if (end - index > BTS_RECORD_SIZE)
 163                         thresh = end - BTS_RECORD_SIZE;
 164                 else
 165                         thresh = end;
 166         }
 167 
 168         ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
 169         ds->bts_index = ds->bts_buffer_base + index;
 170         ds->bts_absolute_maximum = ds->bts_buffer_base + end;
 171         ds->bts_interrupt_threshold = !buf->snapshot
 172                 ? ds->bts_buffer_base + thresh
 173                 : ds->bts_absolute_maximum + BTS_RECORD_SIZE;
 174 }
 175 
 176 static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
 177 {
 178         unsigned long index = head - phys->offset;
 179 
 180         memset(page_address(phys->page) + index, 0, phys->size - index);
 181 }
 182 
 183 static void bts_update(struct bts_ctx *bts)
 184 {
 185         int cpu = raw_smp_processor_id();
 186         struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 187         struct bts_buffer *buf = perf_get_aux(&bts->handle);
 188         unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
 189 
 190         if (!buf)
 191                 return;
 192 
 193         head = index + bts_buffer_offset(buf, buf->cur_buf);
 194         old = local_xchg(&buf->head, head);
 195 
 196         if (!buf->snapshot) {
 197                 if (old == head)
 198                         return;
 199 
 200                 if (ds->bts_index >= ds->bts_absolute_maximum)
 201                         perf_aux_output_flag(&bts->handle,
 202                                              PERF_AUX_FLAG_TRUNCATED);
 203 
 204                 /*
 205                  * old and head are always in the same physical buffer, so we
 206                  * can subtract them to get the data size.
 207                  */
 208                 local_add(head - old, &buf->data_size);
 209         } else {
 210                 local_set(&buf->data_size, head);
 211         }
 212 }
 213 
 214 static int
 215 bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
 216 
 217 /*
 218  * Ordering PMU callbacks wrt themselves and the PMI is done by means
 219  * of bts::state, which:
 220  *  - is set when bts::handle::event is valid, that is, between
 221  *    perf_aux_output_begin() and perf_aux_output_end();
 222  *  - is zero otherwise;
 223  *  - is ordered against bts::handle::event with a compiler barrier.
 224  */
 225 
 226 static void __bts_event_start(struct perf_event *event)
 227 {
 228         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 229         struct bts_buffer *buf = perf_get_aux(&bts->handle);
 230         u64 config = 0;
 231 
 232         if (!buf->snapshot)
 233                 config |= ARCH_PERFMON_EVENTSEL_INT;
 234         if (!event->attr.exclude_kernel)
 235                 config |= ARCH_PERFMON_EVENTSEL_OS;
 236         if (!event->attr.exclude_user)
 237                 config |= ARCH_PERFMON_EVENTSEL_USR;
 238 
 239         bts_config_buffer(buf);
 240 
 241         /*
 242          * local barrier to make sure that ds configuration made it
 243          * before we enable BTS and bts::state goes ACTIVE
 244          */
 245         wmb();
 246 
 247         /* INACTIVE/STOPPED -> ACTIVE */
 248         WRITE_ONCE(bts->state, BTS_STATE_ACTIVE);
 249 
 250         intel_pmu_enable_bts(config);
 251 
 252 }
 253 
 254 static void bts_event_start(struct perf_event *event, int flags)
 255 {
 256         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 257         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 258         struct bts_buffer *buf;
 259 
 260         buf = perf_aux_output_begin(&bts->handle, event);
 261         if (!buf)
 262                 goto fail_stop;
 263 
 264         if (bts_buffer_reset(buf, &bts->handle))
 265                 goto fail_end_stop;
 266 
 267         bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
 268         bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
 269         bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
 270 
 271         perf_event_itrace_started(event);
 272         event->hw.state = 0;
 273 
 274         __bts_event_start(event);
 275 
 276         return;
 277 
 278 fail_end_stop:
 279         perf_aux_output_end(&bts->handle, 0);
 280 
 281 fail_stop:
 282         event->hw.state = PERF_HES_STOPPED;
 283 }
 284 
 285 static void __bts_event_stop(struct perf_event *event, int state)
 286 {
 287         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 288 
 289         /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */
 290         WRITE_ONCE(bts->state, state);
 291 
 292         /*
 293          * No extra synchronization is mandated by the documentation to have
 294          * BTS data stores globally visible.
 295          */
 296         intel_pmu_disable_bts();
 297 }
 298 
 299 static void bts_event_stop(struct perf_event *event, int flags)
 300 {
 301         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 302         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 303         struct bts_buffer *buf = NULL;
 304         int state = READ_ONCE(bts->state);
 305 
 306         if (state == BTS_STATE_ACTIVE)
 307                 __bts_event_stop(event, BTS_STATE_STOPPED);
 308 
 309         if (state != BTS_STATE_STOPPED)
 310                 buf = perf_get_aux(&bts->handle);
 311 
 312         event->hw.state |= PERF_HES_STOPPED;
 313 
 314         if (flags & PERF_EF_UPDATE) {
 315                 bts_update(bts);
 316 
 317                 if (buf) {
 318                         if (buf->snapshot)
 319                                 bts->handle.head =
 320                                         local_xchg(&buf->data_size,
 321                                                    buf->nr_pages << PAGE_SHIFT);
 322                         perf_aux_output_end(&bts->handle,
 323                                             local_xchg(&buf->data_size, 0));
 324                 }
 325 
 326                 cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
 327                 cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
 328                 cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
 329                 cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
 330         }
 331 }
 332 
 333 void intel_bts_enable_local(void)
 334 {
 335         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 336         int state = READ_ONCE(bts->state);
 337 
 338         /*
 339          * Here we transition from INACTIVE to ACTIVE;
 340          * if we instead are STOPPED from the interrupt handler,
 341          * stay that way. Can't be ACTIVE here though.
 342          */
 343         if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE))
 344                 return;
 345 
 346         if (state == BTS_STATE_STOPPED)
 347                 return;
 348 
 349         if (bts->handle.event)
 350                 __bts_event_start(bts->handle.event);
 351 }
 352 
 353 void intel_bts_disable_local(void)
 354 {
 355         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 356 
 357         /*
 358          * Here we transition from ACTIVE to INACTIVE;
 359          * do nothing for STOPPED or INACTIVE.
 360          */
 361         if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE)
 362                 return;
 363 
 364         if (bts->handle.event)
 365                 __bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE);
 366 }
 367 
 368 static int
 369 bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
 370 {
 371         unsigned long head, space, next_space, pad, gap, skip, wakeup;
 372         unsigned int next_buf;
 373         struct bts_phys *phys, *next_phys;
 374         int ret;
 375 
 376         if (buf->snapshot)
 377                 return 0;
 378 
 379         head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
 380 
 381         phys = &buf->buf[buf->cur_buf];
 382         space = phys->offset + phys->displacement + phys->size - head;
 383         pad = space;
 384         if (space > handle->size) {
 385                 space = handle->size;
 386                 space -= space % BTS_RECORD_SIZE;
 387         }
 388         if (space <= BTS_SAFETY_MARGIN) {
 389                 /* See if next phys buffer has more space */
 390                 next_buf = buf->cur_buf + 1;
 391                 if (next_buf >= buf->nr_bufs)
 392                         next_buf = 0;
 393                 next_phys = &buf->buf[next_buf];
 394                 gap = buf_size(phys->page) - phys->displacement - phys->size +
 395                       next_phys->displacement;
 396                 skip = pad + gap;
 397                 if (handle->size >= skip) {
 398                         next_space = next_phys->size;
 399                         if (next_space + skip > handle->size) {
 400                                 next_space = handle->size - skip;
 401                                 next_space -= next_space % BTS_RECORD_SIZE;
 402                         }
 403                         if (next_space > space || !space) {
 404                                 if (pad)
 405                                         bts_buffer_pad_out(phys, head);
 406                                 ret = perf_aux_output_skip(handle, skip);
 407                                 if (ret)
 408                                         return ret;
 409                                 /* Advance to next phys buffer */
 410                                 phys = next_phys;
 411                                 space = next_space;
 412                                 head = phys->offset + phys->displacement;
 413                                 /*
 414                                  * After this, cur_buf and head won't match ds
 415                                  * anymore, so we must not be racing with
 416                                  * bts_update().
 417                                  */
 418                                 buf->cur_buf = next_buf;
 419                                 local_set(&buf->head, head);
 420                         }
 421                 }
 422         }
 423 
 424         /* Don't go far beyond wakeup watermark */
 425         wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
 426                  handle->head;
 427         if (space > wakeup) {
 428                 space = wakeup;
 429                 space -= space % BTS_RECORD_SIZE;
 430         }
 431 
 432         buf->end = head + space;
 433 
 434         /*
 435          * If we have no space, the lost notification would have been sent when
 436          * we hit absolute_maximum - see bts_update()
 437          */
 438         if (!space)
 439                 return -ENOSPC;
 440 
 441         return 0;
 442 }
 443 
 444 int intel_bts_interrupt(void)
 445 {
 446         struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds;
 447         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 448         struct perf_event *event = bts->handle.event;
 449         struct bts_buffer *buf;
 450         s64 old_head;
 451         int err = -ENOSPC, handled = 0;
 452 
 453         /*
 454          * The only surefire way of knowing if this NMI is ours is by checking
 455          * the write ptr against the PMI threshold.
 456          */
 457         if (ds && (ds->bts_index >= ds->bts_interrupt_threshold))
 458                 handled = 1;
 459 
 460         /*
 461          * this is wrapped in intel_bts_enable_local/intel_bts_disable_local,
 462          * so we can only be INACTIVE or STOPPED
 463          */
 464         if (READ_ONCE(bts->state) == BTS_STATE_STOPPED)
 465                 return handled;
 466 
 467         buf = perf_get_aux(&bts->handle);
 468         if (!buf)
 469                 return handled;
 470 
 471         /*
 472          * Skip snapshot counters: they don't use the interrupt, but
 473          * there's no other way of telling, because the pointer will
 474          * keep moving
 475          */
 476         if (buf->snapshot)
 477                 return 0;
 478 
 479         old_head = local_read(&buf->head);
 480         bts_update(bts);
 481 
 482         /* no new data */
 483         if (old_head == local_read(&buf->head))
 484                 return handled;
 485 
 486         perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0));
 487 
 488         buf = perf_aux_output_begin(&bts->handle, event);
 489         if (buf)
 490                 err = bts_buffer_reset(buf, &bts->handle);
 491 
 492         if (err) {
 493                 WRITE_ONCE(bts->state, BTS_STATE_STOPPED);
 494 
 495                 if (buf) {
 496                         /*
 497                          * BTS_STATE_STOPPED should be visible before
 498                          * cleared handle::event
 499                          */
 500                         barrier();
 501                         perf_aux_output_end(&bts->handle, 0);
 502                 }
 503         }
 504 
 505         return 1;
 506 }
 507 
 508 static void bts_event_del(struct perf_event *event, int mode)
 509 {
 510         bts_event_stop(event, PERF_EF_UPDATE);
 511 }
 512 
 513 static int bts_event_add(struct perf_event *event, int mode)
 514 {
 515         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 516         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 517         struct hw_perf_event *hwc = &event->hw;
 518 
 519         event->hw.state = PERF_HES_STOPPED;
 520 
 521         if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
 522                 return -EBUSY;
 523 
 524         if (bts->handle.event)
 525                 return -EBUSY;
 526 
 527         if (mode & PERF_EF_START) {
 528                 bts_event_start(event, 0);
 529                 if (hwc->state & PERF_HES_STOPPED)
 530                         return -EINVAL;
 531         }
 532 
 533         return 0;
 534 }
 535 
 536 static void bts_event_destroy(struct perf_event *event)
 537 {
 538         x86_release_hardware();
 539         x86_del_exclusive(x86_lbr_exclusive_bts);
 540 }
 541 
 542 static int bts_event_init(struct perf_event *event)
 543 {
 544         int ret;
 545 
 546         if (event->attr.type != bts_pmu.type)
 547                 return -ENOENT;
 548 
 549         /*
 550          * BTS leaks kernel addresses even when CPL0 tracing is
 551          * disabled, so disallow intel_bts driver for unprivileged
 552          * users on paranoid systems since it provides trace data
 553          * to the user in a zero-copy fashion.
 554          *
 555          * Note that the default paranoia setting permits unprivileged
 556          * users to profile the kernel.
 557          */
 558         if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
 559             !capable(CAP_SYS_ADMIN))
 560                 return -EACCES;
 561 
 562         if (x86_add_exclusive(x86_lbr_exclusive_bts))
 563                 return -EBUSY;
 564 
 565         ret = x86_reserve_hardware();
 566         if (ret) {
 567                 x86_del_exclusive(x86_lbr_exclusive_bts);
 568                 return ret;
 569         }
 570 
 571         event->destroy = bts_event_destroy;
 572 
 573         return 0;
 574 }
 575 
 576 static void bts_event_read(struct perf_event *event)
 577 {
 578 }
 579 
 580 static __init int bts_init(void)
 581 {
 582         if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
 583                 return -ENODEV;
 584 
 585         if (boot_cpu_has(X86_FEATURE_PTI)) {
 586                 /*
 587                  * BTS hardware writes through a virtual memory map we must
 588                  * either use the kernel physical map, or the user mapping of
 589                  * the AUX buffer.
 590                  *
 591                  * However, since this driver supports per-CPU and per-task inherit
 592                  * we cannot use the user mapping since it will not be available
 593                  * if we're not running the owning process.
 594                  *
 595                  * With PTI we can't use the kernal map either, because its not
 596                  * there when we run userspace.
 597                  *
 598                  * For now, disable this driver when using PTI.
 599                  */
 600                 return -ENODEV;
 601         }
 602 
 603         bts_pmu.capabilities    = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
 604                                   PERF_PMU_CAP_EXCLUSIVE;
 605         bts_pmu.task_ctx_nr     = perf_sw_context;
 606         bts_pmu.event_init      = bts_event_init;
 607         bts_pmu.add             = bts_event_add;
 608         bts_pmu.del             = bts_event_del;
 609         bts_pmu.start           = bts_event_start;
 610         bts_pmu.stop            = bts_event_stop;
 611         bts_pmu.read            = bts_event_read;
 612         bts_pmu.setup_aux       = bts_buffer_setup_aux;
 613         bts_pmu.free_aux        = bts_buffer_free_aux;
 614 
 615         return perf_pmu_register(&bts_pmu, "intel_bts", -1);
 616 }
 617 arch_initcall(bts_init);

/* [<][>][^][v][top][bottom][index][help] */