root/arch/x86/kernel/cpu/cacheinfo.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. amd_cpuid4
  2. amd_calc_l3_indices
  3. amd_get_l3_disable_slot
  4. show_cache_disable
  5. SHOW_CACHE_DISABLE
  6. amd_set_l3_disable_slot
  7. store_cache_disable
  8. STORE_CACHE_DISABLE
  9. subcaches_store
  10. cache_private_attrs_is_visible
  11. init_amd_l3_attrs
  12. cache_get_priv_group
  13. amd_init_l3_cache
  14. cpuid4_cache_lookup_regs
  15. find_num_cache_leaves
  16. cacheinfo_amd_init_llc_id
  17. cacheinfo_hygon_init_llc_id
  18. init_amd_cacheinfo
  19. init_hygon_cacheinfo
  20. init_intel_cacheinfo
  21. __cache_amd_cpumap_setup
  22. __cache_cpumap_setup
  23. ci_leaf_init
  24. __init_cache_level
  25. get_cache_id
  26. __populate_cache_leaves

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  *      Routines to identify caches on Intel CPU.
   4  *
   5  *      Changes:
   6  *      Venkatesh Pallipadi     : Adding cache identification through cpuid(4)
   7  *      Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   8  *      Andi Kleen / Andreas Herrmann   : CPUID4 emulation on AMD.
   9  */
  10 
  11 #include <linux/slab.h>
  12 #include <linux/cacheinfo.h>
  13 #include <linux/cpu.h>
  14 #include <linux/sched.h>
  15 #include <linux/capability.h>
  16 #include <linux/sysfs.h>
  17 #include <linux/pci.h>
  18 
  19 #include <asm/cpufeature.h>
  20 #include <asm/cacheinfo.h>
  21 #include <asm/amd_nb.h>
  22 #include <asm/smp.h>
  23 
  24 #include "cpu.h"
  25 
  26 #define LVL_1_INST      1
  27 #define LVL_1_DATA      2
  28 #define LVL_2           3
  29 #define LVL_3           4
  30 #define LVL_TRACE       5
  31 
  32 struct _cache_table {
  33         unsigned char descriptor;
  34         char cache_type;
  35         short size;
  36 };
  37 
  38 #define MB(x)   ((x) * 1024)
  39 
  40 /* All the cache descriptor types we care about (no TLB or
  41    trace cache entries) */
  42 
  43 static const struct _cache_table cache_table[] =
  44 {
  45         { 0x06, LVL_1_INST, 8 },        /* 4-way set assoc, 32 byte line size */
  46         { 0x08, LVL_1_INST, 16 },       /* 4-way set assoc, 32 byte line size */
  47         { 0x09, LVL_1_INST, 32 },       /* 4-way set assoc, 64 byte line size */
  48         { 0x0a, LVL_1_DATA, 8 },        /* 2 way set assoc, 32 byte line size */
  49         { 0x0c, LVL_1_DATA, 16 },       /* 4-way set assoc, 32 byte line size */
  50         { 0x0d, LVL_1_DATA, 16 },       /* 4-way set assoc, 64 byte line size */
  51         { 0x0e, LVL_1_DATA, 24 },       /* 6-way set assoc, 64 byte line size */
  52         { 0x21, LVL_2,      256 },      /* 8-way set assoc, 64 byte line size */
  53         { 0x22, LVL_3,      512 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  54         { 0x23, LVL_3,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  55         { 0x25, LVL_3,      MB(2) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  56         { 0x29, LVL_3,      MB(4) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  57         { 0x2c, LVL_1_DATA, 32 },       /* 8-way set assoc, 64 byte line size */
  58         { 0x30, LVL_1_INST, 32 },       /* 8-way set assoc, 64 byte line size */
  59         { 0x39, LVL_2,      128 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  60         { 0x3a, LVL_2,      192 },      /* 6-way set assoc, sectored cache, 64 byte line size */
  61         { 0x3b, LVL_2,      128 },      /* 2-way set assoc, sectored cache, 64 byte line size */
  62         { 0x3c, LVL_2,      256 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  63         { 0x3d, LVL_2,      384 },      /* 6-way set assoc, sectored cache, 64 byte line size */
  64         { 0x3e, LVL_2,      512 },      /* 4-way set assoc, sectored cache, 64 byte line size */
  65         { 0x3f, LVL_2,      256 },      /* 2-way set assoc, 64 byte line size */
  66         { 0x41, LVL_2,      128 },      /* 4-way set assoc, 32 byte line size */
  67         { 0x42, LVL_2,      256 },      /* 4-way set assoc, 32 byte line size */
  68         { 0x43, LVL_2,      512 },      /* 4-way set assoc, 32 byte line size */
  69         { 0x44, LVL_2,      MB(1) },    /* 4-way set assoc, 32 byte line size */
  70         { 0x45, LVL_2,      MB(2) },    /* 4-way set assoc, 32 byte line size */
  71         { 0x46, LVL_3,      MB(4) },    /* 4-way set assoc, 64 byte line size */
  72         { 0x47, LVL_3,      MB(8) },    /* 8-way set assoc, 64 byte line size */
  73         { 0x48, LVL_2,      MB(3) },    /* 12-way set assoc, 64 byte line size */
  74         { 0x49, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
  75         { 0x4a, LVL_3,      MB(6) },    /* 12-way set assoc, 64 byte line size */
  76         { 0x4b, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
  77         { 0x4c, LVL_3,      MB(12) },   /* 12-way set assoc, 64 byte line size */
  78         { 0x4d, LVL_3,      MB(16) },   /* 16-way set assoc, 64 byte line size */
  79         { 0x4e, LVL_2,      MB(6) },    /* 24-way set assoc, 64 byte line size */
  80         { 0x60, LVL_1_DATA, 16 },       /* 8-way set assoc, sectored cache, 64 byte line size */
  81         { 0x66, LVL_1_DATA, 8 },        /* 4-way set assoc, sectored cache, 64 byte line size */
  82         { 0x67, LVL_1_DATA, 16 },       /* 4-way set assoc, sectored cache, 64 byte line size */
  83         { 0x68, LVL_1_DATA, 32 },       /* 4-way set assoc, sectored cache, 64 byte line size */
  84         { 0x70, LVL_TRACE,  12 },       /* 8-way set assoc */
  85         { 0x71, LVL_TRACE,  16 },       /* 8-way set assoc */
  86         { 0x72, LVL_TRACE,  32 },       /* 8-way set assoc */
  87         { 0x73, LVL_TRACE,  64 },       /* 8-way set assoc */
  88         { 0x78, LVL_2,      MB(1) },    /* 4-way set assoc, 64 byte line size */
  89         { 0x79, LVL_2,      128 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  90         { 0x7a, LVL_2,      256 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  91         { 0x7b, LVL_2,      512 },      /* 8-way set assoc, sectored cache, 64 byte line size */
  92         { 0x7c, LVL_2,      MB(1) },    /* 8-way set assoc, sectored cache, 64 byte line size */
  93         { 0x7d, LVL_2,      MB(2) },    /* 8-way set assoc, 64 byte line size */
  94         { 0x7f, LVL_2,      512 },      /* 2-way set assoc, 64 byte line size */
  95         { 0x80, LVL_2,      512 },      /* 8-way set assoc, 64 byte line size */
  96         { 0x82, LVL_2,      256 },      /* 8-way set assoc, 32 byte line size */
  97         { 0x83, LVL_2,      512 },      /* 8-way set assoc, 32 byte line size */
  98         { 0x84, LVL_2,      MB(1) },    /* 8-way set assoc, 32 byte line size */
  99         { 0x85, LVL_2,      MB(2) },    /* 8-way set assoc, 32 byte line size */
 100         { 0x86, LVL_2,      512 },      /* 4-way set assoc, 64 byte line size */
 101         { 0x87, LVL_2,      MB(1) },    /* 8-way set assoc, 64 byte line size */
 102         { 0xd0, LVL_3,      512 },      /* 4-way set assoc, 64 byte line size */
 103         { 0xd1, LVL_3,      MB(1) },    /* 4-way set assoc, 64 byte line size */
 104         { 0xd2, LVL_3,      MB(2) },    /* 4-way set assoc, 64 byte line size */
 105         { 0xd6, LVL_3,      MB(1) },    /* 8-way set assoc, 64 byte line size */
 106         { 0xd7, LVL_3,      MB(2) },    /* 8-way set assoc, 64 byte line size */
 107         { 0xd8, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
 108         { 0xdc, LVL_3,      MB(2) },    /* 12-way set assoc, 64 byte line size */
 109         { 0xdd, LVL_3,      MB(4) },    /* 12-way set assoc, 64 byte line size */
 110         { 0xde, LVL_3,      MB(8) },    /* 12-way set assoc, 64 byte line size */
 111         { 0xe2, LVL_3,      MB(2) },    /* 16-way set assoc, 64 byte line size */
 112         { 0xe3, LVL_3,      MB(4) },    /* 16-way set assoc, 64 byte line size */
 113         { 0xe4, LVL_3,      MB(8) },    /* 16-way set assoc, 64 byte line size */
 114         { 0xea, LVL_3,      MB(12) },   /* 24-way set assoc, 64 byte line size */
 115         { 0xeb, LVL_3,      MB(18) },   /* 24-way set assoc, 64 byte line size */
 116         { 0xec, LVL_3,      MB(24) },   /* 24-way set assoc, 64 byte line size */
 117         { 0x00, 0, 0}
 118 };
 119 
 120 
 121 enum _cache_type {
 122         CTYPE_NULL = 0,
 123         CTYPE_DATA = 1,
 124         CTYPE_INST = 2,
 125         CTYPE_UNIFIED = 3
 126 };
 127 
 128 union _cpuid4_leaf_eax {
 129         struct {
 130                 enum _cache_type        type:5;
 131                 unsigned int            level:3;
 132                 unsigned int            is_self_initializing:1;
 133                 unsigned int            is_fully_associative:1;
 134                 unsigned int            reserved:4;
 135                 unsigned int            num_threads_sharing:12;
 136                 unsigned int            num_cores_on_die:6;
 137         } split;
 138         u32 full;
 139 };
 140 
 141 union _cpuid4_leaf_ebx {
 142         struct {
 143                 unsigned int            coherency_line_size:12;
 144                 unsigned int            physical_line_partition:10;
 145                 unsigned int            ways_of_associativity:10;
 146         } split;
 147         u32 full;
 148 };
 149 
 150 union _cpuid4_leaf_ecx {
 151         struct {
 152                 unsigned int            number_of_sets:32;
 153         } split;
 154         u32 full;
 155 };
 156 
 157 struct _cpuid4_info_regs {
 158         union _cpuid4_leaf_eax eax;
 159         union _cpuid4_leaf_ebx ebx;
 160         union _cpuid4_leaf_ecx ecx;
 161         unsigned int id;
 162         unsigned long size;
 163         struct amd_northbridge *nb;
 164 };
 165 
 166 static unsigned short num_cache_leaves;
 167 
 168 /* AMD doesn't have CPUID4. Emulate it here to report the same
 169    information to the user.  This makes some assumptions about the machine:
 170    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 171 
 172    In theory the TLBs could be reported as fake type (they are in "dummy").
 173    Maybe later */
 174 union l1_cache {
 175         struct {
 176                 unsigned line_size:8;
 177                 unsigned lines_per_tag:8;
 178                 unsigned assoc:8;
 179                 unsigned size_in_kb:8;
 180         };
 181         unsigned val;
 182 };
 183 
 184 union l2_cache {
 185         struct {
 186                 unsigned line_size:8;
 187                 unsigned lines_per_tag:4;
 188                 unsigned assoc:4;
 189                 unsigned size_in_kb:16;
 190         };
 191         unsigned val;
 192 };
 193 
 194 union l3_cache {
 195         struct {
 196                 unsigned line_size:8;
 197                 unsigned lines_per_tag:4;
 198                 unsigned assoc:4;
 199                 unsigned res:2;
 200                 unsigned size_encoded:14;
 201         };
 202         unsigned val;
 203 };
 204 
 205 static const unsigned short assocs[] = {
 206         [1] = 1,
 207         [2] = 2,
 208         [4] = 4,
 209         [6] = 8,
 210         [8] = 16,
 211         [0xa] = 32,
 212         [0xb] = 48,
 213         [0xc] = 64,
 214         [0xd] = 96,
 215         [0xe] = 128,
 216         [0xf] = 0xffff /* fully associative - no way to show this currently */
 217 };
 218 
 219 static const unsigned char levels[] = { 1, 1, 2, 3 };
 220 static const unsigned char types[] = { 1, 2, 3, 3 };
 221 
 222 static const enum cache_type cache_type_map[] = {
 223         [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
 224         [CTYPE_DATA] = CACHE_TYPE_DATA,
 225         [CTYPE_INST] = CACHE_TYPE_INST,
 226         [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
 227 };
 228 
 229 static void
 230 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 231                      union _cpuid4_leaf_ebx *ebx,
 232                      union _cpuid4_leaf_ecx *ecx)
 233 {
 234         unsigned dummy;
 235         unsigned line_size, lines_per_tag, assoc, size_in_kb;
 236         union l1_cache l1i, l1d;
 237         union l2_cache l2;
 238         union l3_cache l3;
 239         union l1_cache *l1 = &l1d;
 240 
 241         eax->full = 0;
 242         ebx->full = 0;
 243         ecx->full = 0;
 244 
 245         cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 246         cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 247 
 248         switch (leaf) {
 249         case 1:
 250                 l1 = &l1i;
 251                 /* fall through */
 252         case 0:
 253                 if (!l1->val)
 254                         return;
 255                 assoc = assocs[l1->assoc];
 256                 line_size = l1->line_size;
 257                 lines_per_tag = l1->lines_per_tag;
 258                 size_in_kb = l1->size_in_kb;
 259                 break;
 260         case 2:
 261                 if (!l2.val)
 262                         return;
 263                 assoc = assocs[l2.assoc];
 264                 line_size = l2.line_size;
 265                 lines_per_tag = l2.lines_per_tag;
 266                 /* cpu_data has errata corrections for K7 applied */
 267                 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 268                 break;
 269         case 3:
 270                 if (!l3.val)
 271                         return;
 272                 assoc = assocs[l3.assoc];
 273                 line_size = l3.line_size;
 274                 lines_per_tag = l3.lines_per_tag;
 275                 size_in_kb = l3.size_encoded * 512;
 276                 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 277                         size_in_kb = size_in_kb >> 1;
 278                         assoc = assoc >> 1;
 279                 }
 280                 break;
 281         default:
 282                 return;
 283         }
 284 
 285         eax->split.is_self_initializing = 1;
 286         eax->split.type = types[leaf];
 287         eax->split.level = levels[leaf];
 288         eax->split.num_threads_sharing = 0;
 289         eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
 290 
 291 
 292         if (assoc == 0xffff)
 293                 eax->split.is_fully_associative = 1;
 294         ebx->split.coherency_line_size = line_size - 1;
 295         ebx->split.ways_of_associativity = assoc - 1;
 296         ebx->split.physical_line_partition = lines_per_tag - 1;
 297         ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 298                 (ebx->split.ways_of_associativity + 1) - 1;
 299 }
 300 
 301 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 302 
 303 /*
 304  * L3 cache descriptors
 305  */
 306 static void amd_calc_l3_indices(struct amd_northbridge *nb)
 307 {
 308         struct amd_l3_cache *l3 = &nb->l3_cache;
 309         unsigned int sc0, sc1, sc2, sc3;
 310         u32 val = 0;
 311 
 312         pci_read_config_dword(nb->misc, 0x1C4, &val);
 313 
 314         /* calculate subcache sizes */
 315         l3->subcaches[0] = sc0 = !(val & BIT(0));
 316         l3->subcaches[1] = sc1 = !(val & BIT(4));
 317 
 318         if (boot_cpu_data.x86 == 0x15) {
 319                 l3->subcaches[0] = sc0 += !(val & BIT(1));
 320                 l3->subcaches[1] = sc1 += !(val & BIT(5));
 321         }
 322 
 323         l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 324         l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 325 
 326         l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 327 }
 328 
 329 /*
 330  * check whether a slot used for disabling an L3 index is occupied.
 331  * @l3: L3 cache descriptor
 332  * @slot: slot number (0..1)
 333  *
 334  * @returns: the disabled index if used or negative value if slot free.
 335  */
 336 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 337 {
 338         unsigned int reg = 0;
 339 
 340         pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
 341 
 342         /* check whether this slot is activated already */
 343         if (reg & (3UL << 30))
 344                 return reg & 0xfff;
 345 
 346         return -1;
 347 }
 348 
 349 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
 350                                   unsigned int slot)
 351 {
 352         int index;
 353         struct amd_northbridge *nb = this_leaf->priv;
 354 
 355         index = amd_get_l3_disable_slot(nb, slot);
 356         if (index >= 0)
 357                 return sprintf(buf, "%d\n", index);
 358 
 359         return sprintf(buf, "FREE\n");
 360 }
 361 
 362 #define SHOW_CACHE_DISABLE(slot)                                        \
 363 static ssize_t                                                          \
 364 cache_disable_##slot##_show(struct device *dev,                         \
 365                             struct device_attribute *attr, char *buf)   \
 366 {                                                                       \
 367         struct cacheinfo *this_leaf = dev_get_drvdata(dev);             \
 368         return show_cache_disable(this_leaf, buf, slot);                \
 369 }
 370 SHOW_CACHE_DISABLE(0)
 371 SHOW_CACHE_DISABLE(1)
 372 
 373 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
 374                                  unsigned slot, unsigned long idx)
 375 {
 376         int i;
 377 
 378         idx |= BIT(30);
 379 
 380         /*
 381          *  disable index in all 4 subcaches
 382          */
 383         for (i = 0; i < 4; i++) {
 384                 u32 reg = idx | (i << 20);
 385 
 386                 if (!nb->l3_cache.subcaches[i])
 387                         continue;
 388 
 389                 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 390 
 391                 /*
 392                  * We need to WBINVD on a core on the node containing the L3
 393                  * cache which indices we disable therefore a simple wbinvd()
 394                  * is not sufficient.
 395                  */
 396                 wbinvd_on_cpu(cpu);
 397 
 398                 reg |= BIT(31);
 399                 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 400         }
 401 }
 402 
 403 /*
 404  * disable a L3 cache index by using a disable-slot
 405  *
 406  * @l3:    L3 cache descriptor
 407  * @cpu:   A CPU on the node containing the L3 cache
 408  * @slot:  slot number (0..1)
 409  * @index: index to disable
 410  *
 411  * @return: 0 on success, error status on failure
 412  */
 413 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
 414                             unsigned slot, unsigned long index)
 415 {
 416         int ret = 0;
 417 
 418         /*  check if @slot is already used or the index is already disabled */
 419         ret = amd_get_l3_disable_slot(nb, slot);
 420         if (ret >= 0)
 421                 return -EEXIST;
 422 
 423         if (index > nb->l3_cache.indices)
 424                 return -EINVAL;
 425 
 426         /* check whether the other slot has disabled the same index already */
 427         if (index == amd_get_l3_disable_slot(nb, !slot))
 428                 return -EEXIST;
 429 
 430         amd_l3_disable_index(nb, cpu, slot, index);
 431 
 432         return 0;
 433 }
 434 
 435 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
 436                                    const char *buf, size_t count,
 437                                    unsigned int slot)
 438 {
 439         unsigned long val = 0;
 440         int cpu, err = 0;
 441         struct amd_northbridge *nb = this_leaf->priv;
 442 
 443         if (!capable(CAP_SYS_ADMIN))
 444                 return -EPERM;
 445 
 446         cpu = cpumask_first(&this_leaf->shared_cpu_map);
 447 
 448         if (kstrtoul(buf, 10, &val) < 0)
 449                 return -EINVAL;
 450 
 451         err = amd_set_l3_disable_slot(nb, cpu, slot, val);
 452         if (err) {
 453                 if (err == -EEXIST)
 454                         pr_warn("L3 slot %d in use/index already disabled!\n",
 455                                    slot);
 456                 return err;
 457         }
 458         return count;
 459 }
 460 
 461 #define STORE_CACHE_DISABLE(slot)                                       \
 462 static ssize_t                                                          \
 463 cache_disable_##slot##_store(struct device *dev,                        \
 464                              struct device_attribute *attr,             \
 465                              const char *buf, size_t count)             \
 466 {                                                                       \
 467         struct cacheinfo *this_leaf = dev_get_drvdata(dev);             \
 468         return store_cache_disable(this_leaf, buf, count, slot);        \
 469 }
 470 STORE_CACHE_DISABLE(0)
 471 STORE_CACHE_DISABLE(1)
 472 
 473 static ssize_t subcaches_show(struct device *dev,
 474                               struct device_attribute *attr, char *buf)
 475 {
 476         struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 477         int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 478 
 479         return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 480 }
 481 
 482 static ssize_t subcaches_store(struct device *dev,
 483                                struct device_attribute *attr,
 484                                const char *buf, size_t count)
 485 {
 486         struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 487         int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 488         unsigned long val;
 489 
 490         if (!capable(CAP_SYS_ADMIN))
 491                 return -EPERM;
 492 
 493         if (kstrtoul(buf, 16, &val) < 0)
 494                 return -EINVAL;
 495 
 496         if (amd_set_subcaches(cpu, val))
 497                 return -EINVAL;
 498 
 499         return count;
 500 }
 501 
 502 static DEVICE_ATTR_RW(cache_disable_0);
 503 static DEVICE_ATTR_RW(cache_disable_1);
 504 static DEVICE_ATTR_RW(subcaches);
 505 
 506 static umode_t
 507 cache_private_attrs_is_visible(struct kobject *kobj,
 508                                struct attribute *attr, int unused)
 509 {
 510         struct device *dev = kobj_to_dev(kobj);
 511         struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 512         umode_t mode = attr->mode;
 513 
 514         if (!this_leaf->priv)
 515                 return 0;
 516 
 517         if ((attr == &dev_attr_subcaches.attr) &&
 518             amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 519                 return mode;
 520 
 521         if ((attr == &dev_attr_cache_disable_0.attr ||
 522              attr == &dev_attr_cache_disable_1.attr) &&
 523             amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 524                 return mode;
 525 
 526         return 0;
 527 }
 528 
 529 static struct attribute_group cache_private_group = {
 530         .is_visible = cache_private_attrs_is_visible,
 531 };
 532 
 533 static void init_amd_l3_attrs(void)
 534 {
 535         int n = 1;
 536         static struct attribute **amd_l3_attrs;
 537 
 538         if (amd_l3_attrs) /* already initialized */
 539                 return;
 540 
 541         if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 542                 n += 2;
 543         if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 544                 n += 1;
 545 
 546         amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
 547         if (!amd_l3_attrs)
 548                 return;
 549 
 550         n = 0;
 551         if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
 552                 amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
 553                 amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
 554         }
 555         if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 556                 amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
 557 
 558         cache_private_group.attrs = amd_l3_attrs;
 559 }
 560 
 561 const struct attribute_group *
 562 cache_get_priv_group(struct cacheinfo *this_leaf)
 563 {
 564         struct amd_northbridge *nb = this_leaf->priv;
 565 
 566         if (this_leaf->level < 3 || !nb)
 567                 return NULL;
 568 
 569         if (nb && nb->l3_cache.indices)
 570                 init_amd_l3_attrs();
 571 
 572         return &cache_private_group;
 573 }
 574 
 575 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
 576 {
 577         int node;
 578 
 579         /* only for L3, and not in virtualized environments */
 580         if (index < 3)
 581                 return;
 582 
 583         node = amd_get_nb_id(smp_processor_id());
 584         this_leaf->nb = node_to_amd_nb(node);
 585         if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
 586                 amd_calc_l3_indices(this_leaf->nb);
 587 }
 588 #else
 589 #define amd_init_l3_cache(x, y)
 590 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
 591 
 592 static int
 593 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 594 {
 595         union _cpuid4_leaf_eax  eax;
 596         union _cpuid4_leaf_ebx  ebx;
 597         union _cpuid4_leaf_ecx  ecx;
 598         unsigned                edx;
 599 
 600         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 601                 if (boot_cpu_has(X86_FEATURE_TOPOEXT))
 602                         cpuid_count(0x8000001d, index, &eax.full,
 603                                     &ebx.full, &ecx.full, &edx);
 604                 else
 605                         amd_cpuid4(index, &eax, &ebx, &ecx);
 606                 amd_init_l3_cache(this_leaf, index);
 607         } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
 608                 cpuid_count(0x8000001d, index, &eax.full,
 609                             &ebx.full, &ecx.full, &edx);
 610                 amd_init_l3_cache(this_leaf, index);
 611         } else {
 612                 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 613         }
 614 
 615         if (eax.split.type == CTYPE_NULL)
 616                 return -EIO; /* better error ? */
 617 
 618         this_leaf->eax = eax;
 619         this_leaf->ebx = ebx;
 620         this_leaf->ecx = ecx;
 621         this_leaf->size = (ecx.split.number_of_sets          + 1) *
 622                           (ebx.split.coherency_line_size     + 1) *
 623                           (ebx.split.physical_line_partition + 1) *
 624                           (ebx.split.ways_of_associativity   + 1);
 625         return 0;
 626 }
 627 
 628 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
 629 {
 630         unsigned int            eax, ebx, ecx, edx, op;
 631         union _cpuid4_leaf_eax  cache_eax;
 632         int                     i = -1;
 633 
 634         if (c->x86_vendor == X86_VENDOR_AMD ||
 635             c->x86_vendor == X86_VENDOR_HYGON)
 636                 op = 0x8000001d;
 637         else
 638                 op = 4;
 639 
 640         do {
 641                 ++i;
 642                 /* Do cpuid(op) loop to find out num_cache_leaves */
 643                 cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
 644                 cache_eax.full = eax;
 645         } while (cache_eax.split.type != CTYPE_NULL);
 646         return i;
 647 }
 648 
 649 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
 650 {
 651         /*
 652          * We may have multiple LLCs if L3 caches exist, so check if we
 653          * have an L3 cache by looking at the L3 cache CPUID leaf.
 654          */
 655         if (!cpuid_edx(0x80000006))
 656                 return;
 657 
 658         if (c->x86 < 0x17) {
 659                 /* LLC is at the node level. */
 660                 per_cpu(cpu_llc_id, cpu) = node_id;
 661         } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
 662                 /*
 663                  * LLC is at the core complex level.
 664                  * Core complex ID is ApicId[3] for these processors.
 665                  */
 666                 per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 667         } else {
 668                 /*
 669                  * LLC ID is calculated from the number of threads sharing the
 670                  * cache.
 671                  * */
 672                 u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
 673                 u32 llc_index = find_num_cache_leaves(c) - 1;
 674 
 675                 cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
 676                 if (eax)
 677                         num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
 678 
 679                 if (num_sharing_cache) {
 680                         int bits = get_count_order(num_sharing_cache);
 681 
 682                         per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
 683                 }
 684         }
 685 }
 686 
 687 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
 688 {
 689         /*
 690          * We may have multiple LLCs if L3 caches exist, so check if we
 691          * have an L3 cache by looking at the L3 cache CPUID leaf.
 692          */
 693         if (!cpuid_edx(0x80000006))
 694                 return;
 695 
 696         /*
 697          * LLC is at the core complex level.
 698          * Core complex ID is ApicId[3] for these processors.
 699          */
 700         per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 701 }
 702 
 703 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 704 {
 705 
 706         if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 707                 num_cache_leaves = find_num_cache_leaves(c);
 708         } else if (c->extended_cpuid_level >= 0x80000006) {
 709                 if (cpuid_edx(0x80000006) & 0xf000)
 710                         num_cache_leaves = 4;
 711                 else
 712                         num_cache_leaves = 3;
 713         }
 714 }
 715 
 716 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
 717 {
 718         num_cache_leaves = find_num_cache_leaves(c);
 719 }
 720 
 721 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 722 {
 723         /* Cache sizes */
 724         unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 725         unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 726         unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 727         unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 728 #ifdef CONFIG_SMP
 729         unsigned int cpu = c->cpu_index;
 730 #endif
 731 
 732         if (c->cpuid_level > 3) {
 733                 static int is_initialized;
 734 
 735                 if (is_initialized == 0) {
 736                         /* Init num_cache_leaves from boot CPU */
 737                         num_cache_leaves = find_num_cache_leaves(c);
 738                         is_initialized++;
 739                 }
 740 
 741                 /*
 742                  * Whenever possible use cpuid(4), deterministic cache
 743                  * parameters cpuid leaf to find the cache details
 744                  */
 745                 for (i = 0; i < num_cache_leaves; i++) {
 746                         struct _cpuid4_info_regs this_leaf = {};
 747                         int retval;
 748 
 749                         retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 750                         if (retval < 0)
 751                                 continue;
 752 
 753                         switch (this_leaf.eax.split.level) {
 754                         case 1:
 755                                 if (this_leaf.eax.split.type == CTYPE_DATA)
 756                                         new_l1d = this_leaf.size/1024;
 757                                 else if (this_leaf.eax.split.type == CTYPE_INST)
 758                                         new_l1i = this_leaf.size/1024;
 759                                 break;
 760                         case 2:
 761                                 new_l2 = this_leaf.size/1024;
 762                                 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 763                                 index_msb = get_count_order(num_threads_sharing);
 764                                 l2_id = c->apicid & ~((1 << index_msb) - 1);
 765                                 break;
 766                         case 3:
 767                                 new_l3 = this_leaf.size/1024;
 768                                 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 769                                 index_msb = get_count_order(num_threads_sharing);
 770                                 l3_id = c->apicid & ~((1 << index_msb) - 1);
 771                                 break;
 772                         default:
 773                                 break;
 774                         }
 775                 }
 776         }
 777         /*
 778          * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 779          * trace cache
 780          */
 781         if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 782                 /* supports eax=2  call */
 783                 int j, n;
 784                 unsigned int regs[4];
 785                 unsigned char *dp = (unsigned char *)regs;
 786                 int only_trace = 0;
 787 
 788                 if (num_cache_leaves != 0 && c->x86 == 15)
 789                         only_trace = 1;
 790 
 791                 /* Number of times to iterate */
 792                 n = cpuid_eax(2) & 0xFF;
 793 
 794                 for (i = 0 ; i < n ; i++) {
 795                         cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 796 
 797                         /* If bit 31 is set, this is an unknown format */
 798                         for (j = 0 ; j < 3 ; j++)
 799                                 if (regs[j] & (1 << 31))
 800                                         regs[j] = 0;
 801 
 802                         /* Byte 0 is level count, not a descriptor */
 803                         for (j = 1 ; j < 16 ; j++) {
 804                                 unsigned char des = dp[j];
 805                                 unsigned char k = 0;
 806 
 807                                 /* look up this descriptor in the table */
 808                                 while (cache_table[k].descriptor != 0) {
 809                                         if (cache_table[k].descriptor == des) {
 810                                                 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 811                                                         break;
 812                                                 switch (cache_table[k].cache_type) {
 813                                                 case LVL_1_INST:
 814                                                         l1i += cache_table[k].size;
 815                                                         break;
 816                                                 case LVL_1_DATA:
 817                                                         l1d += cache_table[k].size;
 818                                                         break;
 819                                                 case LVL_2:
 820                                                         l2 += cache_table[k].size;
 821                                                         break;
 822                                                 case LVL_3:
 823                                                         l3 += cache_table[k].size;
 824                                                         break;
 825                                                 case LVL_TRACE:
 826                                                         trace += cache_table[k].size;
 827                                                         break;
 828                                                 }
 829 
 830                                                 break;
 831                                         }
 832 
 833                                         k++;
 834                                 }
 835                         }
 836                 }
 837         }
 838 
 839         if (new_l1d)
 840                 l1d = new_l1d;
 841 
 842         if (new_l1i)
 843                 l1i = new_l1i;
 844 
 845         if (new_l2) {
 846                 l2 = new_l2;
 847 #ifdef CONFIG_SMP
 848                 per_cpu(cpu_llc_id, cpu) = l2_id;
 849 #endif
 850         }
 851 
 852         if (new_l3) {
 853                 l3 = new_l3;
 854 #ifdef CONFIG_SMP
 855                 per_cpu(cpu_llc_id, cpu) = l3_id;
 856 #endif
 857         }
 858 
 859 #ifdef CONFIG_SMP
 860         /*
 861          * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
 862          * turns means that the only possibility is SMT (as indicated in
 863          * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
 864          * that SMT shares all caches, we can unconditionally set cpu_llc_id to
 865          * c->phys_proc_id.
 866          */
 867         if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
 868                 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
 869 #endif
 870 
 871         c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 872 
 873         if (!l2)
 874                 cpu_detect_cache_sizes(c);
 875 }
 876 
 877 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
 878                                     struct _cpuid4_info_regs *base)
 879 {
 880         struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 881         struct cacheinfo *this_leaf;
 882         int i, sibling;
 883 
 884         /*
 885          * For L3, always use the pre-calculated cpu_llc_shared_mask
 886          * to derive shared_cpu_map.
 887          */
 888         if (index == 3) {
 889                 for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 890                         this_cpu_ci = get_cpu_cacheinfo(i);
 891                         if (!this_cpu_ci->info_list)
 892                                 continue;
 893                         this_leaf = this_cpu_ci->info_list + index;
 894                         for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 895                                 if (!cpu_online(sibling))
 896                                         continue;
 897                                 cpumask_set_cpu(sibling,
 898                                                 &this_leaf->shared_cpu_map);
 899                         }
 900                 }
 901         } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 902                 unsigned int apicid, nshared, first, last;
 903 
 904                 nshared = base->eax.split.num_threads_sharing + 1;
 905                 apicid = cpu_data(cpu).apicid;
 906                 first = apicid - (apicid % nshared);
 907                 last = first + nshared - 1;
 908 
 909                 for_each_online_cpu(i) {
 910                         this_cpu_ci = get_cpu_cacheinfo(i);
 911                         if (!this_cpu_ci->info_list)
 912                                 continue;
 913 
 914                         apicid = cpu_data(i).apicid;
 915                         if ((apicid < first) || (apicid > last))
 916                                 continue;
 917 
 918                         this_leaf = this_cpu_ci->info_list + index;
 919 
 920                         for_each_online_cpu(sibling) {
 921                                 apicid = cpu_data(sibling).apicid;
 922                                 if ((apicid < first) || (apicid > last))
 923                                         continue;
 924                                 cpumask_set_cpu(sibling,
 925                                                 &this_leaf->shared_cpu_map);
 926                         }
 927                 }
 928         } else
 929                 return 0;
 930 
 931         return 1;
 932 }
 933 
 934 static void __cache_cpumap_setup(unsigned int cpu, int index,
 935                                  struct _cpuid4_info_regs *base)
 936 {
 937         struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 938         struct cacheinfo *this_leaf, *sibling_leaf;
 939         unsigned long num_threads_sharing;
 940         int index_msb, i;
 941         struct cpuinfo_x86 *c = &cpu_data(cpu);
 942 
 943         if (c->x86_vendor == X86_VENDOR_AMD ||
 944             c->x86_vendor == X86_VENDOR_HYGON) {
 945                 if (__cache_amd_cpumap_setup(cpu, index, base))
 946                         return;
 947         }
 948 
 949         this_leaf = this_cpu_ci->info_list + index;
 950         num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 951 
 952         cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 953         if (num_threads_sharing == 1)
 954                 return;
 955 
 956         index_msb = get_count_order(num_threads_sharing);
 957 
 958         for_each_online_cpu(i)
 959                 if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
 960                         struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 961 
 962                         if (i == cpu || !sib_cpu_ci->info_list)
 963                                 continue;/* skip if itself or no cacheinfo */
 964                         sibling_leaf = sib_cpu_ci->info_list + index;
 965                         cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
 966                         cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
 967                 }
 968 }
 969 
 970 static void ci_leaf_init(struct cacheinfo *this_leaf,
 971                          struct _cpuid4_info_regs *base)
 972 {
 973         this_leaf->id = base->id;
 974         this_leaf->attributes = CACHE_ID;
 975         this_leaf->level = base->eax.split.level;
 976         this_leaf->type = cache_type_map[base->eax.split.type];
 977         this_leaf->coherency_line_size =
 978                                 base->ebx.split.coherency_line_size + 1;
 979         this_leaf->ways_of_associativity =
 980                                 base->ebx.split.ways_of_associativity + 1;
 981         this_leaf->size = base->size;
 982         this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
 983         this_leaf->physical_line_partition =
 984                                 base->ebx.split.physical_line_partition + 1;
 985         this_leaf->priv = base->nb;
 986 }
 987 
 988 static int __init_cache_level(unsigned int cpu)
 989 {
 990         struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 991 
 992         if (!num_cache_leaves)
 993                 return -ENOENT;
 994         if (!this_cpu_ci)
 995                 return -EINVAL;
 996         this_cpu_ci->num_levels = 3;
 997         this_cpu_ci->num_leaves = num_cache_leaves;
 998         return 0;
 999 }
1000 
1001 /*
1002  * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1003  * ECX as cache index. Then right shift apicid by the number's order to get
1004  * cache id for this cache node.
1005  */
1006 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1007 {
1008         struct cpuinfo_x86 *c = &cpu_data(cpu);
1009         unsigned long num_threads_sharing;
1010         int index_msb;
1011 
1012         num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1013         index_msb = get_count_order(num_threads_sharing);
1014         id4_regs->id = c->apicid >> index_msb;
1015 }
1016 
1017 static int __populate_cache_leaves(unsigned int cpu)
1018 {
1019         unsigned int idx, ret;
1020         struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1021         struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1022         struct _cpuid4_info_regs id4_regs = {};
1023 
1024         for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1025                 ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1026                 if (ret)
1027                         return ret;
1028                 get_cache_id(cpu, &id4_regs);
1029                 ci_leaf_init(this_leaf++, &id4_regs);
1030                 __cache_cpumap_setup(cpu, idx, &id4_regs);
1031         }
1032         this_cpu_ci->cpu_map_populated = true;
1033 
1034         return 0;
1035 }
1036 
1037 DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
1038 DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)

/* [<][>][^][v][top][bottom][index][help] */