root/drivers/edac/bluefield_edac.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. smc_call1
  2. bluefield_gather_report_ecc
  3. bluefield_edac_check
  4. bluefield_edac_init_dimms
  5. bluefield_edac_mc_probe
  6. bluefield_edac_mc_remove

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Bluefield-specific EDAC driver.
   4  *
   5  * Copyright (c) 2019 Mellanox Technologies.
   6  */
   7 
   8 #include <linux/acpi.h>
   9 #include <linux/arm-smccc.h>
  10 #include <linux/bitfield.h>
  11 #include <linux/edac.h>
  12 #include <linux/io.h>
  13 #include <linux/module.h>
  14 #include <linux/platform_device.h>
  15 
  16 #include "edac_module.h"
  17 
  18 #define DRIVER_NAME             "bluefield-edac"
  19 
  20 /*
  21  * Mellanox BlueField EMI (External Memory Interface) register definitions.
  22  */
  23 
  24 #define MLXBF_ECC_CNT 0x340
  25 #define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0)
  26 #define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16)
  27 
  28 #define MLXBF_ECC_ERR 0x348
  29 #define MLXBF_ECC_ERR__SECC BIT(0)
  30 #define MLXBF_ECC_ERR__DECC BIT(16)
  31 
  32 #define MLXBF_ECC_LATCH_SEL 0x354
  33 #define MLXBF_ECC_LATCH_SEL__START BIT(24)
  34 
  35 #define MLXBF_ERR_ADDR_0 0x358
  36 
  37 #define MLXBF_ERR_ADDR_1 0x37c
  38 
  39 #define MLXBF_SYNDROM 0x35c
  40 #define MLXBF_SYNDROM__DERR BIT(0)
  41 #define MLXBF_SYNDROM__SERR BIT(1)
  42 #define MLXBF_SYNDROM__SYN GENMASK(25, 16)
  43 
  44 #define MLXBF_ADD_INFO 0x364
  45 #define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8)
  46 
  47 #define MLXBF_EDAC_MAX_DIMM_PER_MC      2
  48 #define MLXBF_EDAC_ERROR_GRAIN          8
  49 
  50 /*
  51  * Request MLNX_SIP_GET_DIMM_INFO
  52  *
  53  * Retrieve information about DIMM on a certain slot.
  54  *
  55  * Call register usage:
  56  * a0: MLNX_SIP_GET_DIMM_INFO
  57  * a1: (Memory controller index) << 16 | (Dimm index in memory controller)
  58  * a2-7: not used.
  59  *
  60  * Return status:
  61  * a0: MLXBF_DIMM_INFO defined below describing the DIMM.
  62  * a1-3: not used.
  63  */
  64 #define MLNX_SIP_GET_DIMM_INFO          0x82000008
  65 
  66 /* Format for the SMC response about the memory information */
  67 #define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0)
  68 #define MLXBF_DIMM_INFO__IS_RDIMM BIT(16)
  69 #define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17)
  70 #define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18)
  71 #define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21)
  72 #define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24)
  73 
  74 struct bluefield_edac_priv {
  75         int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC];
  76         void __iomem *emi_base;
  77         int dimm_per_mc;
  78 };
  79 
  80 static u64 smc_call1(u64 smc_op, u64 smc_arg)
  81 {
  82         struct arm_smccc_res res;
  83 
  84         arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res);
  85 
  86         return res.a0;
  87 }
  88 
  89 /*
  90  * Gather the ECC information from the External Memory Interface registers
  91  * and report it to the edac handler.
  92  */
  93 static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
  94                                         int error_cnt,
  95                                         int is_single_ecc)
  96 {
  97         struct bluefield_edac_priv *priv = mci->pvt_info;
  98         u32 dram_additional_info, err_prank, edea0, edea1;
  99         u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom;
 100         enum hw_event_mc_err_type ecc_type;
 101         u64 ecc_dimm_addr;
 102         int ecc_dimm;
 103 
 104         ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED :
 105                                    HW_EVENT_ERR_UNCORRECTED;
 106 
 107         /*
 108          * Tell the External Memory Interface to populate the relevant
 109          * registers with information about the last ECC error occurrence.
 110          */
 111         ecc_latch_select = MLXBF_ECC_LATCH_SEL__START;
 112         writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL);
 113 
 114         /*
 115          * Verify that the ECC reported info in the registers is of the
 116          * same type as the one asked to report. If not, just report the
 117          * error without the detailed information.
 118          */
 119         dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM);
 120         serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom);
 121         derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom);
 122         syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom);
 123 
 124         if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) {
 125                 edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0,
 126                                      0, 0, -1, mci->ctl_name, "");
 127                 return;
 128         }
 129 
 130         dram_additional_info = readl(priv->emi_base + MLXBF_ADD_INFO);
 131         err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info);
 132 
 133         ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0;
 134 
 135         edea0 = readl(priv->emi_base + MLXBF_ERR_ADDR_0);
 136         edea1 = readl(priv->emi_base + MLXBF_ERR_ADDR_1);
 137 
 138         ecc_dimm_addr = ((u64)edea1 << 32) | edea0;
 139 
 140         edac_mc_handle_error(ecc_type, mci, error_cnt,
 141                              PFN_DOWN(ecc_dimm_addr),
 142                              offset_in_page(ecc_dimm_addr),
 143                              syndrom, ecc_dimm, 0, 0, mci->ctl_name, "");
 144 }
 145 
 146 static void bluefield_edac_check(struct mem_ctl_info *mci)
 147 {
 148         struct bluefield_edac_priv *priv = mci->pvt_info;
 149         u32 ecc_count, single_error_count, double_error_count, ecc_error = 0;
 150 
 151         /*
 152          * The memory controller might not be initialized by the firmware
 153          * when there isn't memory, which may lead to bad register readings.
 154          */
 155         if (mci->edac_cap == EDAC_FLAG_NONE)
 156                 return;
 157 
 158         ecc_count = readl(priv->emi_base + MLXBF_ECC_CNT);
 159         single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count);
 160         double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count);
 161 
 162         if (single_error_count) {
 163                 ecc_error |= MLXBF_ECC_ERR__SECC;
 164 
 165                 bluefield_gather_report_ecc(mci, single_error_count, 1);
 166         }
 167 
 168         if (double_error_count) {
 169                 ecc_error |= MLXBF_ECC_ERR__DECC;
 170 
 171                 bluefield_gather_report_ecc(mci, double_error_count, 0);
 172         }
 173 
 174         /* Write to clear reported errors. */
 175         if (ecc_count)
 176                 writel(ecc_error, priv->emi_base + MLXBF_ECC_ERR);
 177 }
 178 
 179 /* Initialize the DIMMs information for the given memory controller. */
 180 static void bluefield_edac_init_dimms(struct mem_ctl_info *mci)
 181 {
 182         struct bluefield_edac_priv *priv = mci->pvt_info;
 183         int mem_ctrl_idx = mci->mc_idx;
 184         struct dimm_info *dimm;
 185         u64 smc_info, smc_arg;
 186         int is_empty = 1, i;
 187 
 188         for (i = 0; i < priv->dimm_per_mc; i++) {
 189                 dimm = mci->dimms[i];
 190 
 191                 smc_arg = mem_ctrl_idx << 16 | i;
 192                 smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg);
 193 
 194                 if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) {
 195                         dimm->mtype = MEM_EMPTY;
 196                         continue;
 197                 }
 198 
 199                 is_empty = 0;
 200 
 201                 dimm->edac_mode = EDAC_SECDED;
 202 
 203                 if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info))
 204                         dimm->mtype = MEM_NVDIMM;
 205                 else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info))
 206                         dimm->mtype = MEM_LRDDR4;
 207                 else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info))
 208                         dimm->mtype = MEM_RDDR4;
 209                 else
 210                         dimm->mtype = MEM_DDR4;
 211 
 212                 dimm->nr_pages =
 213                         FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) *
 214                         (SZ_1G / PAGE_SIZE);
 215                 dimm->grain = MLXBF_EDAC_ERROR_GRAIN;
 216 
 217                 /* Mem controller for BlueField only supports x4, x8 and x16 */
 218                 switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) {
 219                 case 4:
 220                         dimm->dtype = DEV_X4;
 221                         break;
 222                 case 8:
 223                         dimm->dtype = DEV_X8;
 224                         break;
 225                 case 16:
 226                         dimm->dtype = DEV_X16;
 227                         break;
 228                 default:
 229                         dimm->dtype = DEV_UNKNOWN;
 230                 }
 231 
 232                 priv->dimm_ranks[i] =
 233                         FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info);
 234         }
 235 
 236         if (is_empty)
 237                 mci->edac_cap = EDAC_FLAG_NONE;
 238         else
 239                 mci->edac_cap = EDAC_FLAG_SECDED;
 240 }
 241 
 242 static int bluefield_edac_mc_probe(struct platform_device *pdev)
 243 {
 244         struct bluefield_edac_priv *priv;
 245         struct device *dev = &pdev->dev;
 246         struct edac_mc_layer layers[1];
 247         struct mem_ctl_info *mci;
 248         struct resource *emi_res;
 249         unsigned int mc_idx, dimm_count;
 250         int rc, ret;
 251 
 252         /* Read the MSS (Memory SubSystem) index from ACPI table. */
 253         if (device_property_read_u32(dev, "mss_number", &mc_idx)) {
 254                 dev_warn(dev, "bf_edac: MSS number unknown\n");
 255                 return -EINVAL;
 256         }
 257 
 258         /* Read the DIMMs per MC from ACPI table. */
 259         if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) {
 260                 dev_warn(dev, "bf_edac: DIMMs per MC unknown\n");
 261                 return -EINVAL;
 262         }
 263 
 264         if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) {
 265                 dev_warn(dev, "bf_edac: DIMMs per MC not valid\n");
 266                 return -EINVAL;
 267         }
 268 
 269         emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 270         if (!emi_res)
 271                 return -EINVAL;
 272 
 273         layers[0].type = EDAC_MC_LAYER_SLOT;
 274         layers[0].size = dimm_count;
 275         layers[0].is_virt_csrow = true;
 276 
 277         mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv));
 278         if (!mci)
 279                 return -ENOMEM;
 280 
 281         priv = mci->pvt_info;
 282 
 283         priv->dimm_per_mc = dimm_count;
 284         priv->emi_base = devm_ioremap_resource(dev, emi_res);
 285         if (IS_ERR(priv->emi_base)) {
 286                 dev_err(dev, "failed to map EMI IO resource\n");
 287                 ret = PTR_ERR(priv->emi_base);
 288                 goto err;
 289         }
 290 
 291         mci->pdev = dev;
 292         mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 |
 293                          MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM;
 294         mci->edac_ctl_cap = EDAC_FLAG_SECDED;
 295 
 296         mci->mod_name = DRIVER_NAME;
 297         mci->ctl_name = "BlueField_Memory_Controller";
 298         mci->dev_name = dev_name(dev);
 299         mci->edac_check = bluefield_edac_check;
 300 
 301         /* Initialize mci with the actual populated DIMM information. */
 302         bluefield_edac_init_dimms(mci);
 303 
 304         platform_set_drvdata(pdev, mci);
 305 
 306         /* Register with EDAC core */
 307         rc = edac_mc_add_mc(mci);
 308         if (rc) {
 309                 dev_err(dev, "failed to register with EDAC core\n");
 310                 ret = rc;
 311                 goto err;
 312         }
 313 
 314         /* Only POLL mode supported so far. */
 315         edac_op_state = EDAC_OPSTATE_POLL;
 316 
 317         return 0;
 318 
 319 err:
 320         edac_mc_free(mci);
 321 
 322         return ret;
 323 
 324 }
 325 
 326 static int bluefield_edac_mc_remove(struct platform_device *pdev)
 327 {
 328         struct mem_ctl_info *mci = platform_get_drvdata(pdev);
 329 
 330         edac_mc_del_mc(&pdev->dev);
 331         edac_mc_free(mci);
 332 
 333         return 0;
 334 }
 335 
 336 static const struct acpi_device_id bluefield_mc_acpi_ids[] = {
 337         {"MLNXBF08", 0},
 338         {}
 339 };
 340 
 341 MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids);
 342 
 343 static struct platform_driver bluefield_edac_mc_driver = {
 344         .driver = {
 345                 .name = DRIVER_NAME,
 346                 .acpi_match_table = bluefield_mc_acpi_ids,
 347         },
 348         .probe = bluefield_edac_mc_probe,
 349         .remove = bluefield_edac_mc_remove,
 350 };
 351 
 352 module_platform_driver(bluefield_edac_mc_driver);
 353 
 354 MODULE_DESCRIPTION("Mellanox BlueField memory edac driver");
 355 MODULE_AUTHOR("Mellanox Technologies");
 356 MODULE_LICENSE("GPL v2");

/* [<][>][^][v][top][bottom][index][help] */