root/arch/powerpc/perf/imc-pmu.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. imc_event_to_pmu
  2. imc_pmu_cpumask_get_attr
  3. device_str_attr_create
  4. imc_parse_event
  5. imc_free_events
  6. update_events_in_group
  7. get_nest_pmu_ref
  8. nest_change_cpu_context
  9. ppc_nest_imc_cpu_offline
  10. ppc_nest_imc_cpu_online
  11. nest_pmu_cpumask_init
  12. nest_imc_counters_release
  13. nest_imc_event_init
  14. core_imc_mem_init
  15. is_core_imc_mem_inited
  16. ppc_core_imc_cpu_online
  17. ppc_core_imc_cpu_offline
  18. core_imc_pmu_cpumask_init
  19. core_imc_counters_release
  20. core_imc_event_init
  21. thread_imc_mem_alloc
  22. ppc_thread_imc_cpu_online
  23. ppc_thread_imc_cpu_offline
  24. thread_imc_cpu_init
  25. thread_imc_event_init
  26. is_thread_imc_pmu
  27. get_event_base_addr
  28. thread_imc_pmu_start_txn
  29. thread_imc_pmu_cancel_txn
  30. thread_imc_pmu_commit_txn
  31. imc_read_counter
  32. imc_event_update
  33. imc_event_start
  34. imc_event_stop
  35. imc_event_add
  36. thread_imc_event_add
  37. thread_imc_event_del
  38. trace_imc_mem_alloc
  39. ppc_trace_imc_cpu_online
  40. ppc_trace_imc_cpu_offline
  41. trace_imc_cpu_init
  42. get_trace_imc_event_base_addr
  43. trace_imc_prepare_sample
  44. dump_trace_imc_data
  45. trace_imc_event_add
  46. trace_imc_event_read
  47. trace_imc_event_stop
  48. trace_imc_event_start
  49. trace_imc_event_del
  50. trace_imc_event_init
  51. update_pmu_ops
  52. init_nest_pmu_ref
  53. cleanup_all_core_imc_memory
  54. thread_imc_ldbar_disable
  55. thread_imc_disable
  56. cleanup_all_thread_imc_memory
  57. cleanup_all_trace_imc_memory
  58. imc_common_mem_free
  59. imc_common_cpuhp_mem_free
  60. unregister_thread_imc
  61. imc_mem_init
  62. init_imc_pmu

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * In-Memory Collection (IMC) Performance Monitor counter support.
   4  *
   5  * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
   6  *           (C) 2017 Anju T Sudhakar, IBM Corporation.
   7  *           (C) 2017 Hemant K Shaw, IBM Corporation.
   8  */
   9 #include <linux/perf_event.h>
  10 #include <linux/slab.h>
  11 #include <asm/opal.h>
  12 #include <asm/imc-pmu.h>
  13 #include <asm/cputhreads.h>
  14 #include <asm/smp.h>
  15 #include <linux/string.h>
  16 
  17 /* Nest IMC data structures and variables */
  18 
  19 /*
  20  * Used to avoid races in counting the nest-pmu units during hotplug
  21  * register and unregister
  22  */
  23 static DEFINE_MUTEX(nest_init_lock);
  24 static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
  25 static struct imc_pmu **per_nest_pmu_arr;
  26 static cpumask_t nest_imc_cpumask;
  27 static struct imc_pmu_ref *nest_imc_refc;
  28 static int nest_pmus;
  29 
  30 /* Core IMC data structures and variables */
  31 
  32 static cpumask_t core_imc_cpumask;
  33 static struct imc_pmu_ref *core_imc_refc;
  34 static struct imc_pmu *core_imc_pmu;
  35 
  36 /* Thread IMC data structures and variables */
  37 
  38 static DEFINE_PER_CPU(u64 *, thread_imc_mem);
  39 static struct imc_pmu *thread_imc_pmu;
  40 static int thread_imc_mem_size;
  41 
  42 /* Trace IMC data structures */
  43 static DEFINE_PER_CPU(u64 *, trace_imc_mem);
  44 static struct imc_pmu_ref *trace_imc_refc;
  45 static int trace_imc_mem_size;
  46 
  47 static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
  48 {
  49         return container_of(event->pmu, struct imc_pmu, pmu);
  50 }
  51 
  52 PMU_FORMAT_ATTR(event, "config:0-61");
  53 PMU_FORMAT_ATTR(offset, "config:0-31");
  54 PMU_FORMAT_ATTR(rvalue, "config:32");
  55 PMU_FORMAT_ATTR(mode, "config:33-40");
  56 static struct attribute *imc_format_attrs[] = {
  57         &format_attr_event.attr,
  58         &format_attr_offset.attr,
  59         &format_attr_rvalue.attr,
  60         &format_attr_mode.attr,
  61         NULL,
  62 };
  63 
  64 static struct attribute_group imc_format_group = {
  65         .name = "format",
  66         .attrs = imc_format_attrs,
  67 };
  68 
  69 /* Format attribute for imc trace-mode */
  70 PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19");
  71 PMU_FORMAT_ATTR(cpmc_event, "config:20-27");
  72 PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29");
  73 PMU_FORMAT_ATTR(cpmc_load, "config:30-61");
  74 static struct attribute *trace_imc_format_attrs[] = {
  75         &format_attr_event.attr,
  76         &format_attr_cpmc_reserved.attr,
  77         &format_attr_cpmc_event.attr,
  78         &format_attr_cpmc_samplesel.attr,
  79         &format_attr_cpmc_load.attr,
  80         NULL,
  81 };
  82 
  83 static struct attribute_group trace_imc_format_group = {
  84 .name = "format",
  85 .attrs = trace_imc_format_attrs,
  86 };
  87 
  88 /* Get the cpumask printed to a buffer "buf" */
  89 static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
  90                                         struct device_attribute *attr,
  91                                         char *buf)
  92 {
  93         struct pmu *pmu = dev_get_drvdata(dev);
  94         struct imc_pmu *imc_pmu = container_of(pmu, struct imc_pmu, pmu);
  95         cpumask_t *active_mask;
  96 
  97         switch(imc_pmu->domain){
  98         case IMC_DOMAIN_NEST:
  99                 active_mask = &nest_imc_cpumask;
 100                 break;
 101         case IMC_DOMAIN_CORE:
 102                 active_mask = &core_imc_cpumask;
 103                 break;
 104         default:
 105                 return 0;
 106         }
 107 
 108         return cpumap_print_to_pagebuf(true, buf, active_mask);
 109 }
 110 
 111 static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL);
 112 
 113 static struct attribute *imc_pmu_cpumask_attrs[] = {
 114         &dev_attr_cpumask.attr,
 115         NULL,
 116 };
 117 
 118 static struct attribute_group imc_pmu_cpumask_attr_group = {
 119         .attrs = imc_pmu_cpumask_attrs,
 120 };
 121 
 122 /* device_str_attr_create : Populate event "name" and string "str" in attribute */
 123 static struct attribute *device_str_attr_create(const char *name, const char *str)
 124 {
 125         struct perf_pmu_events_attr *attr;
 126 
 127         attr = kzalloc(sizeof(*attr), GFP_KERNEL);
 128         if (!attr)
 129                 return NULL;
 130         sysfs_attr_init(&attr->attr.attr);
 131 
 132         attr->event_str = str;
 133         attr->attr.attr.name = name;
 134         attr->attr.attr.mode = 0444;
 135         attr->attr.show = perf_event_sysfs_show;
 136 
 137         return &attr->attr.attr;
 138 }
 139 
 140 static int imc_parse_event(struct device_node *np, const char *scale,
 141                                   const char *unit, const char *prefix,
 142                                   u32 base, struct imc_events *event)
 143 {
 144         const char *s;
 145         u32 reg;
 146 
 147         if (of_property_read_u32(np, "reg", &reg))
 148                 goto error;
 149         /* Add the base_reg value to the "reg" */
 150         event->value = base + reg;
 151 
 152         if (of_property_read_string(np, "event-name", &s))
 153                 goto error;
 154 
 155         event->name = kasprintf(GFP_KERNEL, "%s%s", prefix, s);
 156         if (!event->name)
 157                 goto error;
 158 
 159         if (of_property_read_string(np, "scale", &s))
 160                 s = scale;
 161 
 162         if (s) {
 163                 event->scale = kstrdup(s, GFP_KERNEL);
 164                 if (!event->scale)
 165                         goto error;
 166         }
 167 
 168         if (of_property_read_string(np, "unit", &s))
 169                 s = unit;
 170 
 171         if (s) {
 172                 event->unit = kstrdup(s, GFP_KERNEL);
 173                 if (!event->unit)
 174                         goto error;
 175         }
 176 
 177         return 0;
 178 error:
 179         kfree(event->unit);
 180         kfree(event->scale);
 181         kfree(event->name);
 182         return -EINVAL;
 183 }
 184 
 185 /*
 186  * imc_free_events: Function to cleanup the events list, having
 187  *                  "nr_entries".
 188  */
 189 static void imc_free_events(struct imc_events *events, int nr_entries)
 190 {
 191         int i;
 192 
 193         /* Nothing to clean, return */
 194         if (!events)
 195                 return;
 196         for (i = 0; i < nr_entries; i++) {
 197                 kfree(events[i].unit);
 198                 kfree(events[i].scale);
 199                 kfree(events[i].name);
 200         }
 201 
 202         kfree(events);
 203 }
 204 
 205 /*
 206  * update_events_in_group: Update the "events" information in an attr_group
 207  *                         and assign the attr_group to the pmu "pmu".
 208  */
 209 static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
 210 {
 211         struct attribute_group *attr_group;
 212         struct attribute **attrs, *dev_str;
 213         struct device_node *np, *pmu_events;
 214         u32 handle, base_reg;
 215         int i = 0, j = 0, ct, ret;
 216         const char *prefix, *g_scale, *g_unit;
 217         const char *ev_val_str, *ev_scale_str, *ev_unit_str;
 218 
 219         if (!of_property_read_u32(node, "events", &handle))
 220                 pmu_events = of_find_node_by_phandle(handle);
 221         else
 222                 return 0;
 223 
 224         /* Did not find any node with a given phandle */
 225         if (!pmu_events)
 226                 return 0;
 227 
 228         /* Get a count of number of child nodes */
 229         ct = of_get_child_count(pmu_events);
 230 
 231         /* Get the event prefix */
 232         if (of_property_read_string(node, "events-prefix", &prefix))
 233                 return 0;
 234 
 235         /* Get a global unit and scale data if available */
 236         if (of_property_read_string(node, "scale", &g_scale))
 237                 g_scale = NULL;
 238 
 239         if (of_property_read_string(node, "unit", &g_unit))
 240                 g_unit = NULL;
 241 
 242         /* "reg" property gives out the base offset of the counters data */
 243         of_property_read_u32(node, "reg", &base_reg);
 244 
 245         /* Allocate memory for the events */
 246         pmu->events = kcalloc(ct, sizeof(struct imc_events), GFP_KERNEL);
 247         if (!pmu->events)
 248                 return -ENOMEM;
 249 
 250         ct = 0;
 251         /* Parse the events and update the struct */
 252         for_each_child_of_node(pmu_events, np) {
 253                 ret = imc_parse_event(np, g_scale, g_unit, prefix, base_reg, &pmu->events[ct]);
 254                 if (!ret)
 255                         ct++;
 256         }
 257 
 258         /* Allocate memory for attribute group */
 259         attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
 260         if (!attr_group) {
 261                 imc_free_events(pmu->events, ct);
 262                 return -ENOMEM;
 263         }
 264 
 265         /*
 266          * Allocate memory for attributes.
 267          * Since we have count of events for this pmu, we also allocate
 268          * memory for the scale and unit attribute for now.
 269          * "ct" has the total event structs added from the events-parent node.
 270          * So allocate three times the "ct" (this includes event, event_scale and
 271          * event_unit).
 272          */
 273         attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL);
 274         if (!attrs) {
 275                 kfree(attr_group);
 276                 imc_free_events(pmu->events, ct);
 277                 return -ENOMEM;
 278         }
 279 
 280         attr_group->name = "events";
 281         attr_group->attrs = attrs;
 282         do {
 283                 ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value);
 284                 dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str);
 285                 if (!dev_str)
 286                         continue;
 287 
 288                 attrs[j++] = dev_str;
 289                 if (pmu->events[i].scale) {
 290                         ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name);
 291                         dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale);
 292                         if (!dev_str)
 293                                 continue;
 294 
 295                         attrs[j++] = dev_str;
 296                 }
 297 
 298                 if (pmu->events[i].unit) {
 299                         ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name);
 300                         dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit);
 301                         if (!dev_str)
 302                                 continue;
 303 
 304                         attrs[j++] = dev_str;
 305                 }
 306         } while (++i < ct);
 307 
 308         /* Save the event attribute */
 309         pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
 310 
 311         return 0;
 312 }
 313 
 314 /* get_nest_pmu_ref: Return the imc_pmu_ref struct for the given node */
 315 static struct imc_pmu_ref *get_nest_pmu_ref(int cpu)
 316 {
 317         return per_cpu(local_nest_imc_refc, cpu);
 318 }
 319 
 320 static void nest_change_cpu_context(int old_cpu, int new_cpu)
 321 {
 322         struct imc_pmu **pn = per_nest_pmu_arr;
 323 
 324         if (old_cpu < 0 || new_cpu < 0)
 325                 return;
 326 
 327         while (*pn) {
 328                 perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
 329                 pn++;
 330         }
 331 }
 332 
 333 static int ppc_nest_imc_cpu_offline(unsigned int cpu)
 334 {
 335         int nid, target = -1;
 336         const struct cpumask *l_cpumask;
 337         struct imc_pmu_ref *ref;
 338 
 339         /*
 340          * Check in the designated list for this cpu. Dont bother
 341          * if not one of them.
 342          */
 343         if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
 344                 return 0;
 345 
 346         /*
 347          * Check whether nest_imc is registered. We could end up here if the
 348          * cpuhotplug callback registration fails. i.e, callback invokes the
 349          * offline path for all successfully registered nodes. At this stage,
 350          * nest_imc pmu will not be registered and we should return here.
 351          *
 352          * We return with a zero since this is not an offline failure. And
 353          * cpuhp_setup_state() returns the actual failure reason to the caller,
 354          * which in turn will call the cleanup routine.
 355          */
 356         if (!nest_pmus)
 357                 return 0;
 358 
 359         /*
 360          * Now that this cpu is one of the designated,
 361          * find a next cpu a) which is online and b) in same chip.
 362          */
 363         nid = cpu_to_node(cpu);
 364         l_cpumask = cpumask_of_node(nid);
 365         target = cpumask_last(l_cpumask);
 366 
 367         /*
 368          * If this(target) is the last cpu in the cpumask for this chip,
 369          * check for any possible online cpu in the chip.
 370          */
 371         if (unlikely(target == cpu))
 372                 target = cpumask_any_but(l_cpumask, cpu);
 373 
 374         /*
 375          * Update the cpumask with the target cpu and
 376          * migrate the context if needed
 377          */
 378         if (target >= 0 && target < nr_cpu_ids) {
 379                 cpumask_set_cpu(target, &nest_imc_cpumask);
 380                 nest_change_cpu_context(cpu, target);
 381         } else {
 382                 opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
 383                                        get_hard_smp_processor_id(cpu));
 384                 /*
 385                  * If this is the last cpu in this chip then, skip the reference
 386                  * count mutex lock and make the reference count on this chip zero.
 387                  */
 388                 ref = get_nest_pmu_ref(cpu);
 389                 if (!ref)
 390                         return -EINVAL;
 391 
 392                 ref->refc = 0;
 393         }
 394         return 0;
 395 }
 396 
 397 static int ppc_nest_imc_cpu_online(unsigned int cpu)
 398 {
 399         const struct cpumask *l_cpumask;
 400         static struct cpumask tmp_mask;
 401         int res;
 402 
 403         /* Get the cpumask of this node */
 404         l_cpumask = cpumask_of_node(cpu_to_node(cpu));
 405 
 406         /*
 407          * If this is not the first online CPU on this node, then
 408          * just return.
 409          */
 410         if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask))
 411                 return 0;
 412 
 413         /*
 414          * If this is the first online cpu on this node
 415          * disable the nest counters by making an OPAL call.
 416          */
 417         res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
 418                                      get_hard_smp_processor_id(cpu));
 419         if (res)
 420                 return res;
 421 
 422         /* Make this CPU the designated target for counter collection */
 423         cpumask_set_cpu(cpu, &nest_imc_cpumask);
 424         return 0;
 425 }
 426 
 427 static int nest_pmu_cpumask_init(void)
 428 {
 429         return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
 430                                  "perf/powerpc/imc:online",
 431                                  ppc_nest_imc_cpu_online,
 432                                  ppc_nest_imc_cpu_offline);
 433 }
 434 
 435 static void nest_imc_counters_release(struct perf_event *event)
 436 {
 437         int rc, node_id;
 438         struct imc_pmu_ref *ref;
 439 
 440         if (event->cpu < 0)
 441                 return;
 442 
 443         node_id = cpu_to_node(event->cpu);
 444 
 445         /*
 446          * See if we need to disable the nest PMU.
 447          * If no events are currently in use, then we have to take a
 448          * mutex to ensure that we don't race with another task doing
 449          * enable or disable the nest counters.
 450          */
 451         ref = get_nest_pmu_ref(event->cpu);
 452         if (!ref)
 453                 return;
 454 
 455         /* Take the mutex lock for this node and then decrement the reference count */
 456         mutex_lock(&ref->lock);
 457         if (ref->refc == 0) {
 458                 /*
 459                  * The scenario where this is true is, when perf session is
 460                  * started, followed by offlining of all cpus in a given node.
 461                  *
 462                  * In the cpuhotplug offline path, ppc_nest_imc_cpu_offline()
 463                  * function set the ref->count to zero, if the cpu which is
 464                  * about to offline is the last cpu in a given node and make
 465                  * an OPAL call to disable the engine in that node.
 466                  *
 467                  */
 468                 mutex_unlock(&ref->lock);
 469                 return;
 470         }
 471         ref->refc--;
 472         if (ref->refc == 0) {
 473                 rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
 474                                             get_hard_smp_processor_id(event->cpu));
 475                 if (rc) {
 476                         mutex_unlock(&ref->lock);
 477                         pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id);
 478                         return;
 479                 }
 480         } else if (ref->refc < 0) {
 481                 WARN(1, "nest-imc: Invalid event reference count\n");
 482                 ref->refc = 0;
 483         }
 484         mutex_unlock(&ref->lock);
 485 }
 486 
 487 static int nest_imc_event_init(struct perf_event *event)
 488 {
 489         int chip_id, rc, node_id;
 490         u32 l_config, config = event->attr.config;
 491         struct imc_mem_info *pcni;
 492         struct imc_pmu *pmu;
 493         struct imc_pmu_ref *ref;
 494         bool flag = false;
 495 
 496         if (event->attr.type != event->pmu->type)
 497                 return -ENOENT;
 498 
 499         /* Sampling not supported */
 500         if (event->hw.sample_period)
 501                 return -EINVAL;
 502 
 503         if (event->cpu < 0)
 504                 return -EINVAL;
 505 
 506         pmu = imc_event_to_pmu(event);
 507 
 508         /* Sanity check for config (event offset) */
 509         if ((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size)
 510                 return -EINVAL;
 511 
 512         /*
 513          * Nest HW counter memory resides in a per-chip reserve-memory (HOMER).
 514          * Get the base memory addresss for this cpu.
 515          */
 516         chip_id = cpu_to_chip_id(event->cpu);
 517 
 518         /* Return, if chip_id is not valid */
 519         if (chip_id < 0)
 520                 return -ENODEV;
 521 
 522         pcni = pmu->mem_info;
 523         do {
 524                 if (pcni->id == chip_id) {
 525                         flag = true;
 526                         break;
 527                 }
 528                 pcni++;
 529         } while (pcni->vbase != 0);
 530 
 531         if (!flag)
 532                 return -ENODEV;
 533 
 534         /*
 535          * Add the event offset to the base address.
 536          */
 537         l_config = config & IMC_EVENT_OFFSET_MASK;
 538         event->hw.event_base = (u64)pcni->vbase + l_config;
 539         node_id = cpu_to_node(event->cpu);
 540 
 541         /*
 542          * Get the imc_pmu_ref struct for this node.
 543          * Take the mutex lock and then increment the count of nest pmu events
 544          * inited.
 545          */
 546         ref = get_nest_pmu_ref(event->cpu);
 547         if (!ref)
 548                 return -EINVAL;
 549 
 550         mutex_lock(&ref->lock);
 551         if (ref->refc == 0) {
 552                 rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST,
 553                                              get_hard_smp_processor_id(event->cpu));
 554                 if (rc) {
 555                         mutex_unlock(&ref->lock);
 556                         pr_err("nest-imc: Unable to start the counters for node %d\n",
 557                                                                         node_id);
 558                         return rc;
 559                 }
 560         }
 561         ++ref->refc;
 562         mutex_unlock(&ref->lock);
 563 
 564         event->destroy = nest_imc_counters_release;
 565         return 0;
 566 }
 567 
 568 /*
 569  * core_imc_mem_init : Initializes memory for the current core.
 570  *
 571  * Uses alloc_pages_node() and uses the returned address as an argument to
 572  * an opal call to configure the pdbar. The address sent as an argument is
 573  * converted to physical address before the opal call is made. This is the
 574  * base address at which the core imc counters are populated.
 575  */
 576 static int core_imc_mem_init(int cpu, int size)
 577 {
 578         int nid, rc = 0, core_id = (cpu / threads_per_core);
 579         struct imc_mem_info *mem_info;
 580         struct page *page;
 581 
 582         /*
 583          * alloc_pages_node() will allocate memory for core in the
 584          * local node only.
 585          */
 586         nid = cpu_to_node(cpu);
 587         mem_info = &core_imc_pmu->mem_info[core_id];
 588         mem_info->id = core_id;
 589 
 590         /* We need only vbase for core counters */
 591         page = alloc_pages_node(nid,
 592                                 GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
 593                                 __GFP_NOWARN, get_order(size));
 594         if (!page)
 595                 return -ENOMEM;
 596         mem_info->vbase = page_address(page);
 597 
 598         /* Init the mutex */
 599         core_imc_refc[core_id].id = core_id;
 600         mutex_init(&core_imc_refc[core_id].lock);
 601 
 602         rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
 603                                 __pa((void *)mem_info->vbase),
 604                                 get_hard_smp_processor_id(cpu));
 605         if (rc) {
 606                 free_pages((u64)mem_info->vbase, get_order(size));
 607                 mem_info->vbase = NULL;
 608         }
 609 
 610         return rc;
 611 }
 612 
 613 static bool is_core_imc_mem_inited(int cpu)
 614 {
 615         struct imc_mem_info *mem_info;
 616         int core_id = (cpu / threads_per_core);
 617 
 618         mem_info = &core_imc_pmu->mem_info[core_id];
 619         if (!mem_info->vbase)
 620                 return false;
 621 
 622         return true;
 623 }
 624 
 625 static int ppc_core_imc_cpu_online(unsigned int cpu)
 626 {
 627         const struct cpumask *l_cpumask;
 628         static struct cpumask tmp_mask;
 629         int ret = 0;
 630 
 631         /* Get the cpumask for this core */
 632         l_cpumask = cpu_sibling_mask(cpu);
 633 
 634         /* If a cpu for this core is already set, then, don't do anything */
 635         if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask))
 636                 return 0;
 637 
 638         if (!is_core_imc_mem_inited(cpu)) {
 639                 ret = core_imc_mem_init(cpu, core_imc_pmu->counter_mem_size);
 640                 if (ret) {
 641                         pr_info("core_imc memory allocation for cpu %d failed\n", cpu);
 642                         return ret;
 643                 }
 644         }
 645 
 646         /* set the cpu in the mask */
 647         cpumask_set_cpu(cpu, &core_imc_cpumask);
 648         return 0;
 649 }
 650 
 651 static int ppc_core_imc_cpu_offline(unsigned int cpu)
 652 {
 653         unsigned int core_id;
 654         int ncpu;
 655         struct imc_pmu_ref *ref;
 656 
 657         /*
 658          * clear this cpu out of the mask, if not present in the mask,
 659          * don't bother doing anything.
 660          */
 661         if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask))
 662                 return 0;
 663 
 664         /*
 665          * Check whether core_imc is registered. We could end up here
 666          * if the cpuhotplug callback registration fails. i.e, callback
 667          * invokes the offline path for all sucessfully registered cpus.
 668          * At this stage, core_imc pmu will not be registered and we
 669          * should return here.
 670          *
 671          * We return with a zero since this is not an offline failure.
 672          * And cpuhp_setup_state() returns the actual failure reason
 673          * to the caller, which inturn will call the cleanup routine.
 674          */
 675         if (!core_imc_pmu->pmu.event_init)
 676                 return 0;
 677 
 678         /* Find any online cpu in that core except the current "cpu" */
 679         ncpu = cpumask_last(cpu_sibling_mask(cpu));
 680 
 681         if (unlikely(ncpu == cpu))
 682                 ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
 683 
 684         if (ncpu >= 0 && ncpu < nr_cpu_ids) {
 685                 cpumask_set_cpu(ncpu, &core_imc_cpumask);
 686                 perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
 687         } else {
 688                 /*
 689                  * If this is the last cpu in this core then, skip taking refernce
 690                  * count mutex lock for this core and directly zero "refc" for
 691                  * this core.
 692                  */
 693                 opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
 694                                        get_hard_smp_processor_id(cpu));
 695                 core_id = cpu / threads_per_core;
 696                 ref = &core_imc_refc[core_id];
 697                 if (!ref)
 698                         return -EINVAL;
 699 
 700                 ref->refc = 0;
 701         }
 702         return 0;
 703 }
 704 
 705 static int core_imc_pmu_cpumask_init(void)
 706 {
 707         return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
 708                                  "perf/powerpc/imc_core:online",
 709                                  ppc_core_imc_cpu_online,
 710                                  ppc_core_imc_cpu_offline);
 711 }
 712 
 713 static void core_imc_counters_release(struct perf_event *event)
 714 {
 715         int rc, core_id;
 716         struct imc_pmu_ref *ref;
 717 
 718         if (event->cpu < 0)
 719                 return;
 720         /*
 721          * See if we need to disable the IMC PMU.
 722          * If no events are currently in use, then we have to take a
 723          * mutex to ensure that we don't race with another task doing
 724          * enable or disable the core counters.
 725          */
 726         core_id = event->cpu / threads_per_core;
 727 
 728         /* Take the mutex lock and decrement the refernce count for this core */
 729         ref = &core_imc_refc[core_id];
 730         if (!ref)
 731                 return;
 732 
 733         mutex_lock(&ref->lock);
 734         if (ref->refc == 0) {
 735                 /*
 736                  * The scenario where this is true is, when perf session is
 737                  * started, followed by offlining of all cpus in a given core.
 738                  *
 739                  * In the cpuhotplug offline path, ppc_core_imc_cpu_offline()
 740                  * function set the ref->count to zero, if the cpu which is
 741                  * about to offline is the last cpu in a given core and make
 742                  * an OPAL call to disable the engine in that core.
 743                  *
 744                  */
 745                 mutex_unlock(&ref->lock);
 746                 return;
 747         }
 748         ref->refc--;
 749         if (ref->refc == 0) {
 750                 rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
 751                                             get_hard_smp_processor_id(event->cpu));
 752                 if (rc) {
 753                         mutex_unlock(&ref->lock);
 754                         pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
 755                         return;
 756                 }
 757         } else if (ref->refc < 0) {
 758                 WARN(1, "core-imc: Invalid event reference count\n");
 759                 ref->refc = 0;
 760         }
 761         mutex_unlock(&ref->lock);
 762 }
 763 
 764 static int core_imc_event_init(struct perf_event *event)
 765 {
 766         int core_id, rc;
 767         u64 config = event->attr.config;
 768         struct imc_mem_info *pcmi;
 769         struct imc_pmu *pmu;
 770         struct imc_pmu_ref *ref;
 771 
 772         if (event->attr.type != event->pmu->type)
 773                 return -ENOENT;
 774 
 775         /* Sampling not supported */
 776         if (event->hw.sample_period)
 777                 return -EINVAL;
 778 
 779         if (event->cpu < 0)
 780                 return -EINVAL;
 781 
 782         event->hw.idx = -1;
 783         pmu = imc_event_to_pmu(event);
 784 
 785         /* Sanity check for config (event offset) */
 786         if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
 787                 return -EINVAL;
 788 
 789         if (!is_core_imc_mem_inited(event->cpu))
 790                 return -ENODEV;
 791 
 792         core_id = event->cpu / threads_per_core;
 793         pcmi = &core_imc_pmu->mem_info[core_id];
 794         if ((!pcmi->vbase))
 795                 return -ENODEV;
 796 
 797         /* Get the core_imc mutex for this core */
 798         ref = &core_imc_refc[core_id];
 799         if (!ref)
 800                 return -EINVAL;
 801 
 802         /*
 803          * Core pmu units are enabled only when it is used.
 804          * See if this is triggered for the first time.
 805          * If yes, take the mutex lock and enable the core counters.
 806          * If not, just increment the count in core_imc_refc struct.
 807          */
 808         mutex_lock(&ref->lock);
 809         if (ref->refc == 0) {
 810                 rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
 811                                              get_hard_smp_processor_id(event->cpu));
 812                 if (rc) {
 813                         mutex_unlock(&ref->lock);
 814                         pr_err("core-imc: Unable to start the counters for core %d\n",
 815                                                                         core_id);
 816                         return rc;
 817                 }
 818         }
 819         ++ref->refc;
 820         mutex_unlock(&ref->lock);
 821 
 822         event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
 823         event->destroy = core_imc_counters_release;
 824         return 0;
 825 }
 826 
 827 /*
 828  * Allocates a page of memory for each of the online cpus, and load
 829  * LDBAR with 0.
 830  * The physical base address of the page allocated for a cpu will be
 831  * written to the LDBAR for that cpu, when the thread-imc event
 832  * is added.
 833  *
 834  * LDBAR Register Layout:
 835  *
 836  *  0          4         8         12        16        20        24        28
 837  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
 838  *   | |       [   ]    [                   Counter Address [8:50]
 839  *   | * Mode    |
 840  *   |           * PB Scope
 841  *   * Enable/Disable
 842  *
 843  *  32        36        40        44        48        52        56        60
 844  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
 845  *           Counter Address [8:50]              ]
 846  *
 847  */
 848 static int thread_imc_mem_alloc(int cpu_id, int size)
 849 {
 850         u64 *local_mem = per_cpu(thread_imc_mem, cpu_id);
 851         int nid = cpu_to_node(cpu_id);
 852 
 853         if (!local_mem) {
 854                 struct page *page;
 855                 /*
 856                  * This case could happen only once at start, since we dont
 857                  * free the memory in cpu offline path.
 858                  */
 859                 page = alloc_pages_node(nid,
 860                                   GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
 861                                   __GFP_NOWARN, get_order(size));
 862                 if (!page)
 863                         return -ENOMEM;
 864                 local_mem = page_address(page);
 865 
 866                 per_cpu(thread_imc_mem, cpu_id) = local_mem;
 867         }
 868 
 869         mtspr(SPRN_LDBAR, 0);
 870         return 0;
 871 }
 872 
 873 static int ppc_thread_imc_cpu_online(unsigned int cpu)
 874 {
 875         return thread_imc_mem_alloc(cpu, thread_imc_mem_size);
 876 }
 877 
 878 static int ppc_thread_imc_cpu_offline(unsigned int cpu)
 879 {
 880         mtspr(SPRN_LDBAR, 0);
 881         return 0;
 882 }
 883 
 884 static int thread_imc_cpu_init(void)
 885 {
 886         return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
 887                           "perf/powerpc/imc_thread:online",
 888                           ppc_thread_imc_cpu_online,
 889                           ppc_thread_imc_cpu_offline);
 890 }
 891 
 892 static int thread_imc_event_init(struct perf_event *event)
 893 {
 894         u32 config = event->attr.config;
 895         struct task_struct *target;
 896         struct imc_pmu *pmu;
 897 
 898         if (event->attr.type != event->pmu->type)
 899                 return -ENOENT;
 900 
 901         if (!capable(CAP_SYS_ADMIN))
 902                 return -EACCES;
 903 
 904         /* Sampling not supported */
 905         if (event->hw.sample_period)
 906                 return -EINVAL;
 907 
 908         event->hw.idx = -1;
 909         pmu = imc_event_to_pmu(event);
 910 
 911         /* Sanity check for config offset */
 912         if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
 913                 return -EINVAL;
 914 
 915         target = event->hw.target;
 916         if (!target)
 917                 return -EINVAL;
 918 
 919         event->pmu->task_ctx_nr = perf_sw_context;
 920         return 0;
 921 }
 922 
 923 static bool is_thread_imc_pmu(struct perf_event *event)
 924 {
 925         if (!strncmp(event->pmu->name, "thread_imc", strlen("thread_imc")))
 926                 return true;
 927 
 928         return false;
 929 }
 930 
 931 static u64 * get_event_base_addr(struct perf_event *event)
 932 {
 933         u64 addr;
 934 
 935         if (is_thread_imc_pmu(event)) {
 936                 addr = (u64)per_cpu(thread_imc_mem, smp_processor_id());
 937                 return (u64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK));
 938         }
 939 
 940         return (u64 *)event->hw.event_base;
 941 }
 942 
 943 static void thread_imc_pmu_start_txn(struct pmu *pmu,
 944                                      unsigned int txn_flags)
 945 {
 946         if (txn_flags & ~PERF_PMU_TXN_ADD)
 947                 return;
 948         perf_pmu_disable(pmu);
 949 }
 950 
 951 static void thread_imc_pmu_cancel_txn(struct pmu *pmu)
 952 {
 953         perf_pmu_enable(pmu);
 954 }
 955 
 956 static int thread_imc_pmu_commit_txn(struct pmu *pmu)
 957 {
 958         perf_pmu_enable(pmu);
 959         return 0;
 960 }
 961 
 962 static u64 imc_read_counter(struct perf_event *event)
 963 {
 964         u64 *addr, data;
 965 
 966         /*
 967          * In-Memory Collection (IMC) counters are free flowing counters.
 968          * So we take a snapshot of the counter value on enable and save it
 969          * to calculate the delta at later stage to present the event counter
 970          * value.
 971          */
 972         addr = get_event_base_addr(event);
 973         data = be64_to_cpu(READ_ONCE(*addr));
 974         local64_set(&event->hw.prev_count, data);
 975 
 976         return data;
 977 }
 978 
 979 static void imc_event_update(struct perf_event *event)
 980 {
 981         u64 counter_prev, counter_new, final_count;
 982 
 983         counter_prev = local64_read(&event->hw.prev_count);
 984         counter_new = imc_read_counter(event);
 985         final_count = counter_new - counter_prev;
 986 
 987         /* Update the delta to the event count */
 988         local64_add(final_count, &event->count);
 989 }
 990 
 991 static void imc_event_start(struct perf_event *event, int flags)
 992 {
 993         /*
 994          * In Memory Counters are free flowing counters. HW or the microcode
 995          * keeps adding to the counter offset in memory. To get event
 996          * counter value, we snapshot the value here and we calculate
 997          * delta at later point.
 998          */
 999         imc_read_counter(event);
1000 }
1001 
1002 static void imc_event_stop(struct perf_event *event, int flags)
1003 {
1004         /*
1005          * Take a snapshot and calculate the delta and update
1006          * the event counter values.
1007          */
1008         imc_event_update(event);
1009 }
1010 
1011 static int imc_event_add(struct perf_event *event, int flags)
1012 {
1013         if (flags & PERF_EF_START)
1014                 imc_event_start(event, flags);
1015 
1016         return 0;
1017 }
1018 
1019 static int thread_imc_event_add(struct perf_event *event, int flags)
1020 {
1021         int core_id;
1022         struct imc_pmu_ref *ref;
1023         u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id());
1024 
1025         if (flags & PERF_EF_START)
1026                 imc_event_start(event, flags);
1027 
1028         if (!is_core_imc_mem_inited(smp_processor_id()))
1029                 return -EINVAL;
1030 
1031         core_id = smp_processor_id() / threads_per_core;
1032         ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
1033         mtspr(SPRN_LDBAR, ldbar_value);
1034 
1035         /*
1036          * imc pmus are enabled only when it is used.
1037          * See if this is triggered for the first time.
1038          * If yes, take the mutex lock and enable the counters.
1039          * If not, just increment the count in ref count struct.
1040          */
1041         ref = &core_imc_refc[core_id];
1042         if (!ref)
1043                 return -EINVAL;
1044 
1045         mutex_lock(&ref->lock);
1046         if (ref->refc == 0) {
1047                 if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
1048                     get_hard_smp_processor_id(smp_processor_id()))) {
1049                         mutex_unlock(&ref->lock);
1050                         pr_err("thread-imc: Unable to start the counter\
1051                                 for core %d\n", core_id);
1052                         return -EINVAL;
1053                 }
1054         }
1055         ++ref->refc;
1056         mutex_unlock(&ref->lock);
1057         return 0;
1058 }
1059 
1060 static void thread_imc_event_del(struct perf_event *event, int flags)
1061 {
1062 
1063         int core_id;
1064         struct imc_pmu_ref *ref;
1065 
1066         mtspr(SPRN_LDBAR, 0);
1067 
1068         core_id = smp_processor_id() / threads_per_core;
1069         ref = &core_imc_refc[core_id];
1070 
1071         mutex_lock(&ref->lock);
1072         ref->refc--;
1073         if (ref->refc == 0) {
1074                 if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
1075                     get_hard_smp_processor_id(smp_processor_id()))) {
1076                         mutex_unlock(&ref->lock);
1077                         pr_err("thread-imc: Unable to stop the counters\
1078                                 for core %d\n", core_id);
1079                         return;
1080                 }
1081         } else if (ref->refc < 0) {
1082                 ref->refc = 0;
1083         }
1084         mutex_unlock(&ref->lock);
1085         /*
1086          * Take a snapshot and calculate the delta and update
1087          * the event counter values.
1088          */
1089         imc_event_update(event);
1090 }
1091 
1092 /*
1093  * Allocate a page of memory for each cpu, and load LDBAR with 0.
1094  */
1095 static int trace_imc_mem_alloc(int cpu_id, int size)
1096 {
1097         u64 *local_mem = per_cpu(trace_imc_mem, cpu_id);
1098         int phys_id = cpu_to_node(cpu_id), rc = 0;
1099         int core_id = (cpu_id / threads_per_core);
1100 
1101         if (!local_mem) {
1102                 struct page *page;
1103 
1104                 page = alloc_pages_node(phys_id,
1105                                 GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
1106                                 __GFP_NOWARN, get_order(size));
1107                 if (!page)
1108                         return -ENOMEM;
1109                 local_mem = page_address(page);
1110                 per_cpu(trace_imc_mem, cpu_id) = local_mem;
1111 
1112                 /* Initialise the counters for trace mode */
1113                 rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem),
1114                                             get_hard_smp_processor_id(cpu_id));
1115                 if (rc) {
1116                         pr_info("IMC:opal init failed for trace imc\n");
1117                         return rc;
1118                 }
1119         }
1120 
1121         /* Init the mutex, if not already */
1122         trace_imc_refc[core_id].id = core_id;
1123         mutex_init(&trace_imc_refc[core_id].lock);
1124 
1125         mtspr(SPRN_LDBAR, 0);
1126         return 0;
1127 }
1128 
1129 static int ppc_trace_imc_cpu_online(unsigned int cpu)
1130 {
1131         return trace_imc_mem_alloc(cpu, trace_imc_mem_size);
1132 }
1133 
1134 static int ppc_trace_imc_cpu_offline(unsigned int cpu)
1135 {
1136         mtspr(SPRN_LDBAR, 0);
1137         return 0;
1138 }
1139 
1140 static int trace_imc_cpu_init(void)
1141 {
1142         return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
1143                           "perf/powerpc/imc_trace:online",
1144                           ppc_trace_imc_cpu_online,
1145                           ppc_trace_imc_cpu_offline);
1146 }
1147 
1148 static u64 get_trace_imc_event_base_addr(void)
1149 {
1150         return (u64)per_cpu(trace_imc_mem, smp_processor_id());
1151 }
1152 
1153 /*
1154  * Function to parse trace-imc data obtained
1155  * and to prepare the perf sample.
1156  */
1157 static int trace_imc_prepare_sample(struct trace_imc_data *mem,
1158                                     struct perf_sample_data *data,
1159                                     u64 *prev_tb,
1160                                     struct perf_event_header *header,
1161                                     struct perf_event *event)
1162 {
1163         /* Sanity checks for a valid record */
1164         if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb)
1165                 *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1));
1166         else
1167                 return -EINVAL;
1168 
1169         if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) !=
1170                          be64_to_cpu(READ_ONCE(mem->tb2)))
1171                 return -EINVAL;
1172 
1173         /* Prepare perf sample */
1174         data->ip =  be64_to_cpu(READ_ONCE(mem->ip));
1175         data->period = event->hw.last_period;
1176 
1177         header->type = PERF_RECORD_SAMPLE;
1178         header->size = sizeof(*header) + event->header_size;
1179         header->misc = 0;
1180 
1181         if (is_kernel_addr(data->ip))
1182                 header->misc |= PERF_RECORD_MISC_KERNEL;
1183         else
1184                 header->misc |= PERF_RECORD_MISC_USER;
1185 
1186         perf_event_header__init_id(header, data, event);
1187 
1188         return 0;
1189 }
1190 
1191 static void dump_trace_imc_data(struct perf_event *event)
1192 {
1193         struct trace_imc_data *mem;
1194         int i, ret;
1195         u64 prev_tb = 0;
1196 
1197         mem = (struct trace_imc_data *)get_trace_imc_event_base_addr();
1198         for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data));
1199                 i++, mem++) {
1200                 struct perf_sample_data data;
1201                 struct perf_event_header header;
1202 
1203                 ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event);
1204                 if (ret) /* Exit, if not a valid record */
1205                         break;
1206                 else {
1207                         /* If this is a valid record, create the sample */
1208                         struct perf_output_handle handle;
1209 
1210                         if (perf_output_begin(&handle, event, header.size))
1211                                 return;
1212 
1213                         perf_output_sample(&handle, &header, &data, event);
1214                         perf_output_end(&handle);
1215                 }
1216         }
1217 }
1218 
1219 static int trace_imc_event_add(struct perf_event *event, int flags)
1220 {
1221         int core_id = smp_processor_id() / threads_per_core;
1222         struct imc_pmu_ref *ref = NULL;
1223         u64 local_mem, ldbar_value;
1224 
1225         /* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */
1226         local_mem = get_trace_imc_event_base_addr();
1227         ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE;
1228 
1229         if (core_imc_refc)
1230                 ref = &core_imc_refc[core_id];
1231         if (!ref) {
1232                 /* If core-imc is not enabled, use trace-imc reference count */
1233                 if (trace_imc_refc)
1234                         ref = &trace_imc_refc[core_id];
1235                 if (!ref)
1236                         return -EINVAL;
1237         }
1238         mtspr(SPRN_LDBAR, ldbar_value);
1239         mutex_lock(&ref->lock);
1240         if (ref->refc == 0) {
1241                 if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
1242                                 get_hard_smp_processor_id(smp_processor_id()))) {
1243                         mutex_unlock(&ref->lock);
1244                         pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
1245                         mtspr(SPRN_LDBAR, 0);
1246                         return -EINVAL;
1247                 }
1248         }
1249         ++ref->refc;
1250         mutex_unlock(&ref->lock);
1251 
1252         return 0;
1253 }
1254 
1255 static void trace_imc_event_read(struct perf_event *event)
1256 {
1257         return;
1258 }
1259 
1260 static void trace_imc_event_stop(struct perf_event *event, int flags)
1261 {
1262         u64 local_mem = get_trace_imc_event_base_addr();
1263         dump_trace_imc_data(event);
1264         memset((void *)local_mem, 0, sizeof(u64));
1265 }
1266 
1267 static void trace_imc_event_start(struct perf_event *event, int flags)
1268 {
1269         return;
1270 }
1271 
1272 static void trace_imc_event_del(struct perf_event *event, int flags)
1273 {
1274         int core_id = smp_processor_id() / threads_per_core;
1275         struct imc_pmu_ref *ref = NULL;
1276 
1277         if (core_imc_refc)
1278                 ref = &core_imc_refc[core_id];
1279         if (!ref) {
1280                 /* If core-imc is not enabled, use trace-imc reference count */
1281                 if (trace_imc_refc)
1282                         ref = &trace_imc_refc[core_id];
1283                 if (!ref)
1284                         return;
1285         }
1286         mtspr(SPRN_LDBAR, 0);
1287         mutex_lock(&ref->lock);
1288         ref->refc--;
1289         if (ref->refc == 0) {
1290                 if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
1291                                 get_hard_smp_processor_id(smp_processor_id()))) {
1292                         mutex_unlock(&ref->lock);
1293                         pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
1294                         return;
1295                 }
1296         } else if (ref->refc < 0) {
1297                 ref->refc = 0;
1298         }
1299         mutex_unlock(&ref->lock);
1300         trace_imc_event_stop(event, flags);
1301 }
1302 
1303 static int trace_imc_event_init(struct perf_event *event)
1304 {
1305         struct task_struct *target;
1306 
1307         if (event->attr.type != event->pmu->type)
1308                 return -ENOENT;
1309 
1310         if (!capable(CAP_SYS_ADMIN))
1311                 return -EACCES;
1312 
1313         /* Return if this is a couting event */
1314         if (event->attr.sample_period == 0)
1315                 return -ENOENT;
1316 
1317         event->hw.idx = -1;
1318         target = event->hw.target;
1319 
1320         event->pmu->task_ctx_nr = perf_hw_context;
1321         return 0;
1322 }
1323 
1324 /* update_pmu_ops : Populate the appropriate operations for "pmu" */
1325 static int update_pmu_ops(struct imc_pmu *pmu)
1326 {
1327         pmu->pmu.task_ctx_nr = perf_invalid_context;
1328         pmu->pmu.add = imc_event_add;
1329         pmu->pmu.del = imc_event_stop;
1330         pmu->pmu.start = imc_event_start;
1331         pmu->pmu.stop = imc_event_stop;
1332         pmu->pmu.read = imc_event_update;
1333         pmu->pmu.attr_groups = pmu->attr_groups;
1334         pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
1335         pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
1336 
1337         switch (pmu->domain) {
1338         case IMC_DOMAIN_NEST:
1339                 pmu->pmu.event_init = nest_imc_event_init;
1340                 pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
1341                 break;
1342         case IMC_DOMAIN_CORE:
1343                 pmu->pmu.event_init = core_imc_event_init;
1344                 pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
1345                 break;
1346         case IMC_DOMAIN_THREAD:
1347                 pmu->pmu.event_init = thread_imc_event_init;
1348                 pmu->pmu.add = thread_imc_event_add;
1349                 pmu->pmu.del = thread_imc_event_del;
1350                 pmu->pmu.start_txn = thread_imc_pmu_start_txn;
1351                 pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
1352                 pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
1353                 break;
1354         case IMC_DOMAIN_TRACE:
1355                 pmu->pmu.event_init = trace_imc_event_init;
1356                 pmu->pmu.add = trace_imc_event_add;
1357                 pmu->pmu.del = trace_imc_event_del;
1358                 pmu->pmu.start = trace_imc_event_start;
1359                 pmu->pmu.stop = trace_imc_event_stop;
1360                 pmu->pmu.read = trace_imc_event_read;
1361                 pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group;
1362         default:
1363                 break;
1364         }
1365 
1366         return 0;
1367 }
1368 
1369 /* init_nest_pmu_ref: Initialize the imc_pmu_ref struct for all the nodes */
1370 static int init_nest_pmu_ref(void)
1371 {
1372         int nid, i, cpu;
1373 
1374         nest_imc_refc = kcalloc(num_possible_nodes(), sizeof(*nest_imc_refc),
1375                                                                 GFP_KERNEL);
1376 
1377         if (!nest_imc_refc)
1378                 return -ENOMEM;
1379 
1380         i = 0;
1381         for_each_node(nid) {
1382                 /*
1383                  * Mutex lock to avoid races while tracking the number of
1384                  * sessions using the chip's nest pmu units.
1385                  */
1386                 mutex_init(&nest_imc_refc[i].lock);
1387 
1388                 /*
1389                  * Loop to init the "id" with the node_id. Variable "i" initialized to
1390                  * 0 and will be used as index to the array. "i" will not go off the
1391                  * end of the array since the "for_each_node" loops for "N_POSSIBLE"
1392                  * nodes only.
1393                  */
1394                 nest_imc_refc[i++].id = nid;
1395         }
1396 
1397         /*
1398          * Loop to init the per_cpu "local_nest_imc_refc" with the proper
1399          * "nest_imc_refc" index. This makes get_nest_pmu_ref() alot simple.
1400          */
1401         for_each_possible_cpu(cpu) {
1402                 nid = cpu_to_node(cpu);
1403                 for (i = 0; i < num_possible_nodes(); i++) {
1404                         if (nest_imc_refc[i].id == nid) {
1405                                 per_cpu(local_nest_imc_refc, cpu) = &nest_imc_refc[i];
1406                                 break;
1407                         }
1408                 }
1409         }
1410         return 0;
1411 }
1412 
1413 static void cleanup_all_core_imc_memory(void)
1414 {
1415         int i, nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
1416         struct imc_mem_info *ptr = core_imc_pmu->mem_info;
1417         int size = core_imc_pmu->counter_mem_size;
1418 
1419         /* mem_info will never be NULL */
1420         for (i = 0; i < nr_cores; i++) {
1421                 if (ptr[i].vbase)
1422                         free_pages((u64)ptr[i].vbase, get_order(size));
1423         }
1424 
1425         kfree(ptr);
1426         kfree(core_imc_refc);
1427 }
1428 
1429 static void thread_imc_ldbar_disable(void *dummy)
1430 {
1431         /*
1432          * By Zeroing LDBAR, we disable thread-imc
1433          * updates.
1434          */
1435         mtspr(SPRN_LDBAR, 0);
1436 }
1437 
1438 void thread_imc_disable(void)
1439 {
1440         on_each_cpu(thread_imc_ldbar_disable, NULL, 1);
1441 }
1442 
1443 static void cleanup_all_thread_imc_memory(void)
1444 {
1445         int i, order = get_order(thread_imc_mem_size);
1446 
1447         for_each_online_cpu(i) {
1448                 if (per_cpu(thread_imc_mem, i))
1449                         free_pages((u64)per_cpu(thread_imc_mem, i), order);
1450 
1451         }
1452 }
1453 
1454 static void cleanup_all_trace_imc_memory(void)
1455 {
1456         int i, order = get_order(trace_imc_mem_size);
1457 
1458         for_each_online_cpu(i) {
1459                 if (per_cpu(trace_imc_mem, i))
1460                         free_pages((u64)per_cpu(trace_imc_mem, i), order);
1461 
1462         }
1463         kfree(trace_imc_refc);
1464 }
1465 
1466 /* Function to free the attr_groups which are dynamically allocated */
1467 static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
1468 {
1469         if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
1470                 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
1471         kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
1472 }
1473 
1474 /*
1475  * Common function to unregister cpu hotplug callback and
1476  * free the memory.
1477  * TODO: Need to handle pmu unregistering, which will be
1478  * done in followup series.
1479  */
1480 static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
1481 {
1482         if (pmu_ptr->domain == IMC_DOMAIN_NEST) {
1483                 mutex_lock(&nest_init_lock);
1484                 if (nest_pmus == 1) {
1485                         cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
1486                         kfree(nest_imc_refc);
1487                         kfree(per_nest_pmu_arr);
1488                         per_nest_pmu_arr = NULL;
1489                 }
1490 
1491                 if (nest_pmus > 0)
1492                         nest_pmus--;
1493                 mutex_unlock(&nest_init_lock);
1494         }
1495 
1496         /* Free core_imc memory */
1497         if (pmu_ptr->domain == IMC_DOMAIN_CORE) {
1498                 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE);
1499                 cleanup_all_core_imc_memory();
1500         }
1501 
1502         /* Free thread_imc memory */
1503         if (pmu_ptr->domain == IMC_DOMAIN_THREAD) {
1504                 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
1505                 cleanup_all_thread_imc_memory();
1506         }
1507 
1508         if (pmu_ptr->domain == IMC_DOMAIN_TRACE) {
1509                 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE);
1510                 cleanup_all_trace_imc_memory();
1511         }
1512 }
1513 
1514 /*
1515  * Function to unregister thread-imc if core-imc
1516  * is not registered.
1517  */
1518 void unregister_thread_imc(void)
1519 {
1520         imc_common_cpuhp_mem_free(thread_imc_pmu);
1521         imc_common_mem_free(thread_imc_pmu);
1522         perf_pmu_unregister(&thread_imc_pmu->pmu);
1523 }
1524 
1525 /*
1526  * imc_mem_init : Function to support memory allocation for core imc.
1527  */
1528 static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
1529                                                                 int pmu_index)
1530 {
1531         const char *s;
1532         int nr_cores, cpu, res = -ENOMEM;
1533 
1534         if (of_property_read_string(parent, "name", &s))
1535                 return -ENODEV;
1536 
1537         switch (pmu_ptr->domain) {
1538         case IMC_DOMAIN_NEST:
1539                 /* Update the pmu name */
1540                 pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s_imc", "nest_", s);
1541                 if (!pmu_ptr->pmu.name)
1542                         goto err;
1543 
1544                 /* Needed for hotplug/migration */
1545                 if (!per_nest_pmu_arr) {
1546                         per_nest_pmu_arr = kcalloc(get_max_nest_dev() + 1,
1547                                                 sizeof(struct imc_pmu *),
1548                                                 GFP_KERNEL);
1549                         if (!per_nest_pmu_arr)
1550                                 goto err;
1551                 }
1552                 per_nest_pmu_arr[pmu_index] = pmu_ptr;
1553                 break;
1554         case IMC_DOMAIN_CORE:
1555                 /* Update the pmu name */
1556                 pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
1557                 if (!pmu_ptr->pmu.name)
1558                         goto err;
1559 
1560                 nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
1561                 pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info),
1562                                                                 GFP_KERNEL);
1563 
1564                 if (!pmu_ptr->mem_info)
1565                         goto err;
1566 
1567                 core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
1568                                                                 GFP_KERNEL);
1569 
1570                 if (!core_imc_refc) {
1571                         kfree(pmu_ptr->mem_info);
1572                         goto err;
1573                 }
1574 
1575                 core_imc_pmu = pmu_ptr;
1576                 break;
1577         case IMC_DOMAIN_THREAD:
1578                 /* Update the pmu name */
1579                 pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
1580                 if (!pmu_ptr->pmu.name)
1581                         goto err;
1582 
1583                 thread_imc_mem_size = pmu_ptr->counter_mem_size;
1584                 for_each_online_cpu(cpu) {
1585                         res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size);
1586                         if (res) {
1587                                 cleanup_all_thread_imc_memory();
1588                                 goto err;
1589                         }
1590                 }
1591 
1592                 thread_imc_pmu = pmu_ptr;
1593                 break;
1594         case IMC_DOMAIN_TRACE:
1595                 /* Update the pmu name */
1596                 pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
1597                 if (!pmu_ptr->pmu.name)
1598                         return -ENOMEM;
1599 
1600                 nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
1601                 trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
1602                                                                 GFP_KERNEL);
1603                 if (!trace_imc_refc)
1604                         return -ENOMEM;
1605 
1606                 trace_imc_mem_size = pmu_ptr->counter_mem_size;
1607                 for_each_online_cpu(cpu) {
1608                         res = trace_imc_mem_alloc(cpu, trace_imc_mem_size);
1609                         if (res) {
1610                                 cleanup_all_trace_imc_memory();
1611                                 goto err;
1612                         }
1613                 }
1614                 break;
1615         default:
1616                 return -EINVAL;
1617         }
1618 
1619         return 0;
1620 err:
1621         return res;
1622 }
1623 
1624 /*
1625  * init_imc_pmu : Setup and register the IMC pmu device.
1626  *
1627  * @parent:     Device tree unit node
1628  * @pmu_ptr:    memory allocated for this pmu
1629  * @pmu_idx:    Count of nest pmc registered
1630  *
1631  * init_imc_pmu() setup pmu cpumask and registers for a cpu hotplug callback.
1632  * Handles failure cases and accordingly frees memory.
1633  */
1634 int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_idx)
1635 {
1636         int ret;
1637 
1638         ret = imc_mem_init(pmu_ptr, parent, pmu_idx);
1639         if (ret)
1640                 goto err_free_mem;
1641 
1642         switch (pmu_ptr->domain) {
1643         case IMC_DOMAIN_NEST:
1644                 /*
1645                 * Nest imc pmu need only one cpu per chip, we initialize the
1646                 * cpumask for the first nest imc pmu and use the same for the
1647                 * rest. To handle the cpuhotplug callback unregister, we track
1648                 * the number of nest pmus in "nest_pmus".
1649                 */
1650                 mutex_lock(&nest_init_lock);
1651                 if (nest_pmus == 0) {
1652                         ret = init_nest_pmu_ref();
1653                         if (ret) {
1654                                 mutex_unlock(&nest_init_lock);
1655                                 kfree(per_nest_pmu_arr);
1656                                 per_nest_pmu_arr = NULL;
1657                                 goto err_free_mem;
1658                         }
1659                         /* Register for cpu hotplug notification. */
1660                         ret = nest_pmu_cpumask_init();
1661                         if (ret) {
1662                                 mutex_unlock(&nest_init_lock);
1663                                 kfree(nest_imc_refc);
1664                                 kfree(per_nest_pmu_arr);
1665                                 per_nest_pmu_arr = NULL;
1666                                 goto err_free_mem;
1667                         }
1668                 }
1669                 nest_pmus++;
1670                 mutex_unlock(&nest_init_lock);
1671                 break;
1672         case IMC_DOMAIN_CORE:
1673                 ret = core_imc_pmu_cpumask_init();
1674                 if (ret) {
1675                         cleanup_all_core_imc_memory();
1676                         goto err_free_mem;
1677                 }
1678 
1679                 break;
1680         case IMC_DOMAIN_THREAD:
1681                 ret = thread_imc_cpu_init();
1682                 if (ret) {
1683                         cleanup_all_thread_imc_memory();
1684                         goto err_free_mem;
1685                 }
1686 
1687                 break;
1688         case IMC_DOMAIN_TRACE:
1689                 ret = trace_imc_cpu_init();
1690                 if (ret) {
1691                         cleanup_all_trace_imc_memory();
1692                         goto err_free_mem;
1693                 }
1694 
1695                 break;
1696         default:
1697                 return  -EINVAL;        /* Unknown domain */
1698         }
1699 
1700         ret = update_events_in_group(parent, pmu_ptr);
1701         if (ret)
1702                 goto err_free_cpuhp_mem;
1703 
1704         ret = update_pmu_ops(pmu_ptr);
1705         if (ret)
1706                 goto err_free_cpuhp_mem;
1707 
1708         ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1);
1709         if (ret)
1710                 goto err_free_cpuhp_mem;
1711 
1712         pr_debug("%s performance monitor hardware support registered\n",
1713                                                         pmu_ptr->pmu.name);
1714 
1715         return 0;
1716 
1717 err_free_cpuhp_mem:
1718         imc_common_cpuhp_mem_free(pmu_ptr);
1719 err_free_mem:
1720         imc_common_mem_free(pmu_ptr);
1721         return ret;
1722 }

/* [<][>][^][v][top][bottom][index][help] */