1#include "amd64_edac.h"
2#include <asm/amd_nb.h>
3
4static struct edac_pci_ctl_info *pci_ctl;
5
6static int report_gart_errors;
7module_param(report_gart_errors, int, 0644);
8
9/*
10 * Set by command line parameter. If BIOS has enabled the ECC, this override is
11 * cleared to prevent re-enabling the hardware by this driver.
12 */
13static int ecc_enable_override;
14module_param(ecc_enable_override, int, 0644);
15
16static struct msr __percpu *msrs;
17
18/*
19 * count successfully initialized driver instances for setup_pci_device()
20 */
21static atomic_t drv_instances = ATOMIC_INIT(0);
22
23/* Per-node stuff */
24static struct ecc_settings **ecc_stngs;
25
26/*
27 * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
28 * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
29 * or higher value'.
30 *
31 *FIXME: Produce a better mapping/linearisation.
32 */
33static const struct scrubrate {
34       u32 scrubval;           /* bit pattern for scrub rate */
35       u32 bandwidth;          /* bandwidth consumed (bytes/sec) */
36} scrubrates[] = {
37	{ 0x01, 1600000000UL},
38	{ 0x02, 800000000UL},
39	{ 0x03, 400000000UL},
40	{ 0x04, 200000000UL},
41	{ 0x05, 100000000UL},
42	{ 0x06, 50000000UL},
43	{ 0x07, 25000000UL},
44	{ 0x08, 12284069UL},
45	{ 0x09, 6274509UL},
46	{ 0x0A, 3121951UL},
47	{ 0x0B, 1560975UL},
48	{ 0x0C, 781440UL},
49	{ 0x0D, 390720UL},
50	{ 0x0E, 195300UL},
51	{ 0x0F, 97650UL},
52	{ 0x10, 48854UL},
53	{ 0x11, 24427UL},
54	{ 0x12, 12213UL},
55	{ 0x13, 6101UL},
56	{ 0x14, 3051UL},
57	{ 0x15, 1523UL},
58	{ 0x16, 761UL},
59	{ 0x00, 0UL},        /* scrubbing off */
60};
61
62int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
63			       u32 *val, const char *func)
64{
65	int err = 0;
66
67	err = pci_read_config_dword(pdev, offset, val);
68	if (err)
69		amd64_warn("%s: error reading F%dx%03x.\n",
70			   func, PCI_FUNC(pdev->devfn), offset);
71
72	return err;
73}
74
75int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
76				u32 val, const char *func)
77{
78	int err = 0;
79
80	err = pci_write_config_dword(pdev, offset, val);
81	if (err)
82		amd64_warn("%s: error writing to F%dx%03x.\n",
83			   func, PCI_FUNC(pdev->devfn), offset);
84
85	return err;
86}
87
88/*
89 * Select DCT to which PCI cfg accesses are routed
90 */
91static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct)
92{
93	u32 reg = 0;
94
95	amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, &reg);
96	reg &= (pvt->model == 0x30) ? ~3 : ~1;
97	reg |= dct;
98	amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg);
99}
100
101/*
102 *
103 * Depending on the family, F2 DCT reads need special handling:
104 *
105 * K8: has a single DCT only and no address offsets >= 0x100
106 *
107 * F10h: each DCT has its own set of regs
108 *	DCT0 -> F2x040..
109 *	DCT1 -> F2x140..
110 *
111 * F16h: has only 1 DCT
112 *
113 * F15h: we select which DCT we access using F1x10C[DctCfgSel]
114 */
115static inline int amd64_read_dct_pci_cfg(struct amd64_pvt *pvt, u8 dct,
116					 int offset, u32 *val)
117{
118	switch (pvt->fam) {
119	case 0xf:
120		if (dct || offset >= 0x100)
121			return -EINVAL;
122		break;
123
124	case 0x10:
125		if (dct) {
126			/*
127			 * Note: If ganging is enabled, barring the regs
128			 * F2x[1,0]98 and F2x[1,0]9C; reads reads to F2x1xx
129			 * return 0. (cf. Section 2.8.1 F10h BKDG)
130			 */
131			if (dct_ganging_enabled(pvt))
132				return 0;
133
134			offset += 0x100;
135		}
136		break;
137
138	case 0x15:
139		/*
140		 * F15h: F2x1xx addresses do not map explicitly to DCT1.
141		 * We should select which DCT we access using F1x10C[DctCfgSel]
142		 */
143		dct = (dct && pvt->model == 0x30) ? 3 : dct;
144		f15h_select_dct(pvt, dct);
145		break;
146
147	case 0x16:
148		if (dct)
149			return -EINVAL;
150		break;
151
152	default:
153		break;
154	}
155	return amd64_read_pci_cfg(pvt->F2, offset, val);
156}
157
158/*
159 * Memory scrubber control interface. For K8, memory scrubbing is handled by
160 * hardware and can involve L2 cache, dcache as well as the main memory. With
161 * F10, this is extended to L3 cache scrubbing on CPU models sporting that
162 * functionality.
163 *
164 * This causes the "units" for the scrubbing speed to vary from 64 byte blocks
165 * (dram) over to cache lines. This is nasty, so we will use bandwidth in
166 * bytes/sec for the setting.
167 *
168 * Currently, we only do dram scrubbing. If the scrubbing is done in software on
169 * other archs, we might not have access to the caches directly.
170 */
171
172/*
173 * scan the scrub rate mapping table for a close or matching bandwidth value to
174 * issue. If requested is too big, then use last maximum value found.
175 */
176static int __set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate)
177{
178	u32 scrubval;
179	int i;
180
181	/*
182	 * map the configured rate (new_bw) to a value specific to the AMD64
183	 * memory controller and apply to register. Search for the first
184	 * bandwidth entry that is greater or equal than the setting requested
185	 * and program that. If at last entry, turn off DRAM scrubbing.
186	 *
187	 * If no suitable bandwidth is found, turn off DRAM scrubbing entirely
188	 * by falling back to the last element in scrubrates[].
189	 */
190	for (i = 0; i < ARRAY_SIZE(scrubrates) - 1; i++) {
191		/*
192		 * skip scrub rates which aren't recommended
193		 * (see F10 BKDG, F3x58)
194		 */
195		if (scrubrates[i].scrubval < min_rate)
196			continue;
197
198		if (scrubrates[i].bandwidth <= new_bw)
199			break;
200	}
201
202	scrubval = scrubrates[i].scrubval;
203
204	pci_write_bits32(ctl, SCRCTRL, scrubval, 0x001F);
205
206	if (scrubval)
207		return scrubrates[i].bandwidth;
208
209	return 0;
210}
211
212static int set_scrub_rate(struct mem_ctl_info *mci, u32 bw)
213{
214	struct amd64_pvt *pvt = mci->pvt_info;
215	u32 min_scrubrate = 0x5;
216
217	if (pvt->fam == 0xf)
218		min_scrubrate = 0x0;
219
220	/* Erratum #505 */
221	if (pvt->fam == 0x15 && pvt->model < 0x10)
222		f15h_select_dct(pvt, 0);
223
224	return __set_scrub_rate(pvt->F3, bw, min_scrubrate);
225}
226
227static int get_scrub_rate(struct mem_ctl_info *mci)
228{
229	struct amd64_pvt *pvt = mci->pvt_info;
230	u32 scrubval = 0;
231	int i, retval = -EINVAL;
232
233	/* Erratum #505 */
234	if (pvt->fam == 0x15 && pvt->model < 0x10)
235		f15h_select_dct(pvt, 0);
236
237	amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
238
239	scrubval = scrubval & 0x001F;
240
241	for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
242		if (scrubrates[i].scrubval == scrubval) {
243			retval = scrubrates[i].bandwidth;
244			break;
245		}
246	}
247	return retval;
248}
249
250/*
251 * returns true if the SysAddr given by sys_addr matches the
252 * DRAM base/limit associated with node_id
253 */
254static bool base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, u8 nid)
255{
256	u64 addr;
257
258	/* The K8 treats this as a 40-bit value.  However, bits 63-40 will be
259	 * all ones if the most significant implemented address bit is 1.
260	 * Here we discard bits 63-40.  See section 3.4.2 of AMD publication
261	 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
262	 * Application Programming.
263	 */
264	addr = sys_addr & 0x000000ffffffffffull;
265
266	return ((addr >= get_dram_base(pvt, nid)) &&
267		(addr <= get_dram_limit(pvt, nid)));
268}
269
270/*
271 * Attempt to map a SysAddr to a node. On success, return a pointer to the
272 * mem_ctl_info structure for the node that the SysAddr maps to.
273 *
274 * On failure, return NULL.
275 */
276static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
277						u64 sys_addr)
278{
279	struct amd64_pvt *pvt;
280	u8 node_id;
281	u32 intlv_en, bits;
282
283	/*
284	 * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
285	 * 3.4.4.2) registers to map the SysAddr to a node ID.
286	 */
287	pvt = mci->pvt_info;
288
289	/*
290	 * The value of this field should be the same for all DRAM Base
291	 * registers.  Therefore we arbitrarily choose to read it from the
292	 * register for node 0.
293	 */
294	intlv_en = dram_intlv_en(pvt, 0);
295
296	if (intlv_en == 0) {
297		for (node_id = 0; node_id < DRAM_RANGES; node_id++) {
298			if (base_limit_match(pvt, sys_addr, node_id))
299				goto found;
300		}
301		goto err_no_match;
302	}
303
304	if (unlikely((intlv_en != 0x01) &&
305		     (intlv_en != 0x03) &&
306		     (intlv_en != 0x07))) {
307		amd64_warn("DRAM Base[IntlvEn] junk value: 0x%x, BIOS bug?\n", intlv_en);
308		return NULL;
309	}
310
311	bits = (((u32) sys_addr) >> 12) & intlv_en;
312
313	for (node_id = 0; ; ) {
314		if ((dram_intlv_sel(pvt, node_id) & intlv_en) == bits)
315			break;	/* intlv_sel field matches */
316
317		if (++node_id >= DRAM_RANGES)
318			goto err_no_match;
319	}
320
321	/* sanity test for sys_addr */
322	if (unlikely(!base_limit_match(pvt, sys_addr, node_id))) {
323		amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address"
324			   "range for node %d with node interleaving enabled.\n",
325			   __func__, sys_addr, node_id);
326		return NULL;
327	}
328
329found:
330	return edac_mc_find((int)node_id);
331
332err_no_match:
333	edac_dbg(2, "sys_addr 0x%lx doesn't match any node\n",
334		 (unsigned long)sys_addr);
335
336	return NULL;
337}
338
339/*
340 * compute the CS base address of the @csrow on the DRAM controller @dct.
341 * For details see F2x[5C:40] in the processor's BKDG
342 */
343static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
344				 u64 *base, u64 *mask)
345{
346	u64 csbase, csmask, base_bits, mask_bits;
347	u8 addr_shift;
348
349	if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
350		csbase		= pvt->csels[dct].csbases[csrow];
351		csmask		= pvt->csels[dct].csmasks[csrow];
352		base_bits	= GENMASK_ULL(31, 21) | GENMASK_ULL(15, 9);
353		mask_bits	= GENMASK_ULL(29, 21) | GENMASK_ULL(15, 9);
354		addr_shift	= 4;
355
356	/*
357	 * F16h and F15h, models 30h and later need two addr_shift values:
358	 * 8 for high and 6 for low (cf. F16h BKDG).
359	 */
360	} else if (pvt->fam == 0x16 ||
361		  (pvt->fam == 0x15 && pvt->model >= 0x30)) {
362		csbase          = pvt->csels[dct].csbases[csrow];
363		csmask          = pvt->csels[dct].csmasks[csrow >> 1];
364
365		*base  = (csbase & GENMASK_ULL(15,  5)) << 6;
366		*base |= (csbase & GENMASK_ULL(30, 19)) << 8;
367
368		*mask = ~0ULL;
369		/* poke holes for the csmask */
370		*mask &= ~((GENMASK_ULL(15, 5)  << 6) |
371			   (GENMASK_ULL(30, 19) << 8));
372
373		*mask |= (csmask & GENMASK_ULL(15, 5))  << 6;
374		*mask |= (csmask & GENMASK_ULL(30, 19)) << 8;
375
376		return;
377	} else {
378		csbase		= pvt->csels[dct].csbases[csrow];
379		csmask		= pvt->csels[dct].csmasks[csrow >> 1];
380		addr_shift	= 8;
381
382		if (pvt->fam == 0x15)
383			base_bits = mask_bits =
384				GENMASK_ULL(30,19) | GENMASK_ULL(13,5);
385		else
386			base_bits = mask_bits =
387				GENMASK_ULL(28,19) | GENMASK_ULL(13,5);
388	}
389
390	*base  = (csbase & base_bits) << addr_shift;
391
392	*mask  = ~0ULL;
393	/* poke holes for the csmask */
394	*mask &= ~(mask_bits << addr_shift);
395	/* OR them in */
396	*mask |= (csmask & mask_bits) << addr_shift;
397}
398
399#define for_each_chip_select(i, dct, pvt) \
400	for (i = 0; i < pvt->csels[dct].b_cnt; i++)
401
402#define chip_select_base(i, dct, pvt) \
403	pvt->csels[dct].csbases[i]
404
405#define for_each_chip_select_mask(i, dct, pvt) \
406	for (i = 0; i < pvt->csels[dct].m_cnt; i++)
407
408/*
409 * @input_addr is an InputAddr associated with the node given by mci. Return the
410 * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
411 */
412static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
413{
414	struct amd64_pvt *pvt;
415	int csrow;
416	u64 base, mask;
417
418	pvt = mci->pvt_info;
419
420	for_each_chip_select(csrow, 0, pvt) {
421		if (!csrow_enabled(csrow, 0, pvt))
422			continue;
423
424		get_cs_base_and_mask(pvt, csrow, 0, &base, &mask);
425
426		mask = ~mask;
427
428		if ((input_addr & mask) == (base & mask)) {
429			edac_dbg(2, "InputAddr 0x%lx matches csrow %d (node %d)\n",
430				 (unsigned long)input_addr, csrow,
431				 pvt->mc_node_id);
432
433			return csrow;
434		}
435	}
436	edac_dbg(2, "no matching csrow for InputAddr 0x%lx (MC node %d)\n",
437		 (unsigned long)input_addr, pvt->mc_node_id);
438
439	return -1;
440}
441
442/*
443 * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094)
444 * for the node represented by mci. Info is passed back in *hole_base,
445 * *hole_offset, and *hole_size.  Function returns 0 if info is valid or 1 if
446 * info is invalid. Info may be invalid for either of the following reasons:
447 *
448 * - The revision of the node is not E or greater.  In this case, the DRAM Hole
449 *   Address Register does not exist.
450 *
451 * - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
452 *   indicating that its contents are not valid.
453 *
454 * The values passed back in *hole_base, *hole_offset, and *hole_size are
455 * complete 32-bit values despite the fact that the bitfields in the DHAR
456 * only represent bits 31-24 of the base and offset values.
457 */
458int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
459			     u64 *hole_offset, u64 *hole_size)
460{
461	struct amd64_pvt *pvt = mci->pvt_info;
462
463	/* only revE and later have the DRAM Hole Address Register */
464	if (pvt->fam == 0xf && pvt->ext_model < K8_REV_E) {
465		edac_dbg(1, "  revision %d for node %d does not support DHAR\n",
466			 pvt->ext_model, pvt->mc_node_id);
467		return 1;
468	}
469
470	/* valid for Fam10h and above */
471	if (pvt->fam >= 0x10 && !dhar_mem_hoist_valid(pvt)) {
472		edac_dbg(1, "  Dram Memory Hoisting is DISABLED on this system\n");
473		return 1;
474	}
475
476	if (!dhar_valid(pvt)) {
477		edac_dbg(1, "  Dram Memory Hoisting is DISABLED on this node %d\n",
478			 pvt->mc_node_id);
479		return 1;
480	}
481
482	/* This node has Memory Hoisting */
483
484	/* +------------------+--------------------+--------------------+-----
485	 * | memory           | DRAM hole          | relocated          |
486	 * | [0, (x - 1)]     | [x, 0xffffffff]    | addresses from     |
487	 * |                  |                    | DRAM hole          |
488	 * |                  |                    | [0x100000000,      |
489	 * |                  |                    |  (0x100000000+     |
490	 * |                  |                    |   (0xffffffff-x))] |
491	 * +------------------+--------------------+--------------------+-----
492	 *
493	 * Above is a diagram of physical memory showing the DRAM hole and the
494	 * relocated addresses from the DRAM hole.  As shown, the DRAM hole
495	 * starts at address x (the base address) and extends through address
496	 * 0xffffffff.  The DRAM Hole Address Register (DHAR) relocates the
497	 * addresses in the hole so that they start at 0x100000000.
498	 */
499
500	*hole_base = dhar_base(pvt);
501	*hole_size = (1ULL << 32) - *hole_base;
502
503	*hole_offset = (pvt->fam > 0xf) ? f10_dhar_offset(pvt)
504					: k8_dhar_offset(pvt);
505
506	edac_dbg(1, "  DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
507		 pvt->mc_node_id, (unsigned long)*hole_base,
508		 (unsigned long)*hole_offset, (unsigned long)*hole_size);
509
510	return 0;
511}
512EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info);
513
514/*
515 * Return the DramAddr that the SysAddr given by @sys_addr maps to.  It is
516 * assumed that sys_addr maps to the node given by mci.
517 *
518 * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section
519 * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a
520 * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled,
521 * then it is also involved in translating a SysAddr to a DramAddr. Sections
522 * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting.
523 * These parts of the documentation are unclear. I interpret them as follows:
524 *
525 * When node n receives a SysAddr, it processes the SysAddr as follows:
526 *
527 * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM
528 *    Limit registers for node n. If the SysAddr is not within the range
529 *    specified by the base and limit values, then node n ignores the Sysaddr
530 *    (since it does not map to node n). Otherwise continue to step 2 below.
531 *
532 * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is
533 *    disabled so skip to step 3 below. Otherwise see if the SysAddr is within
534 *    the range of relocated addresses (starting at 0x100000000) from the DRAM
535 *    hole. If not, skip to step 3 below. Else get the value of the
536 *    DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the
537 *    offset defined by this value from the SysAddr.
538 *
539 * 3. Obtain the base address for node n from the DRAMBase field of the DRAM
540 *    Base register for node n. To obtain the DramAddr, subtract the base
541 *    address from the SysAddr, as shown near the start of section 3.4.4 (p.70).
542 */
543static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
544{
545	struct amd64_pvt *pvt = mci->pvt_info;
546	u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
547	int ret;
548
549	dram_base = get_dram_base(pvt, pvt->mc_node_id);
550
551	ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
552				      &hole_size);
553	if (!ret) {
554		if ((sys_addr >= (1ULL << 32)) &&
555		    (sys_addr < ((1ULL << 32) + hole_size))) {
556			/* use DHAR to translate SysAddr to DramAddr */
557			dram_addr = sys_addr - hole_offset;
558
559			edac_dbg(2, "using DHAR to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
560				 (unsigned long)sys_addr,
561				 (unsigned long)dram_addr);
562
563			return dram_addr;
564		}
565	}
566
567	/*
568	 * Translate the SysAddr to a DramAddr as shown near the start of
569	 * section 3.4.4 (p. 70).  Although sys_addr is a 64-bit value, the k8
570	 * only deals with 40-bit values.  Therefore we discard bits 63-40 of
571	 * sys_addr below.  If bit 39 of sys_addr is 1 then the bits we
572	 * discard are all 1s.  Otherwise the bits we discard are all 0s.  See
573	 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
574	 * Programmer's Manual Volume 1 Application Programming.
575	 */
576	dram_addr = (sys_addr & GENMASK_ULL(39, 0)) - dram_base;
577
578	edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
579		 (unsigned long)sys_addr, (unsigned long)dram_addr);
580	return dram_addr;
581}
582
583/*
584 * @intlv_en is the value of the IntlvEn field from a DRAM Base register
585 * (section 3.4.4.1).  Return the number of bits from a SysAddr that are used
586 * for node interleaving.
587 */
588static int num_node_interleave_bits(unsigned intlv_en)
589{
590	static const int intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
591	int n;
592
593	BUG_ON(intlv_en > 7);
594	n = intlv_shift_table[intlv_en];
595	return n;
596}
597
598/* Translate the DramAddr given by @dram_addr to an InputAddr. */
599static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
600{
601	struct amd64_pvt *pvt;
602	int intlv_shift;
603	u64 input_addr;
604
605	pvt = mci->pvt_info;
606
607	/*
608	 * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
609	 * concerning translating a DramAddr to an InputAddr.
610	 */
611	intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0));
612	input_addr = ((dram_addr >> intlv_shift) & GENMASK_ULL(35, 12)) +
613		      (dram_addr & 0xfff);
614
615	edac_dbg(2, "  Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
616		 intlv_shift, (unsigned long)dram_addr,
617		 (unsigned long)input_addr);
618
619	return input_addr;
620}
621
622/*
623 * Translate the SysAddr represented by @sys_addr to an InputAddr.  It is
624 * assumed that @sys_addr maps to the node given by mci.
625 */
626static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
627{
628	u64 input_addr;
629
630	input_addr =
631	    dram_addr_to_input_addr(mci, sys_addr_to_dram_addr(mci, sys_addr));
632
633	edac_dbg(2, "SysAdddr 0x%lx translates to InputAddr 0x%lx\n",
634		 (unsigned long)sys_addr, (unsigned long)input_addr);
635
636	return input_addr;
637}
638
639/* Map the Error address to a PAGE and PAGE OFFSET. */
640static inline void error_address_to_page_and_offset(u64 error_address,
641						    struct err_info *err)
642{
643	err->page = (u32) (error_address >> PAGE_SHIFT);
644	err->offset = ((u32) error_address) & ~PAGE_MASK;
645}
646
647/*
648 * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
649 * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
650 * of a node that detected an ECC memory error.  mci represents the node that
651 * the error address maps to (possibly different from the node that detected
652 * the error).  Return the number of the csrow that sys_addr maps to, or -1 on
653 * error.
654 */
655static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
656{
657	int csrow;
658
659	csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr));
660
661	if (csrow == -1)
662		amd64_mc_err(mci, "Failed to translate InputAddr to csrow for "
663				  "address 0x%lx\n", (unsigned long)sys_addr);
664	return csrow;
665}
666
667static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
668
669/*
670 * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
671 * are ECC capable.
672 */
673static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
674{
675	u8 bit;
676	unsigned long edac_cap = EDAC_FLAG_NONE;
677
678	bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F)
679		? 19
680		: 17;
681
682	if (pvt->dclr0 & BIT(bit))
683		edac_cap = EDAC_FLAG_SECDED;
684
685	return edac_cap;
686}
687
688static void debug_display_dimm_sizes(struct amd64_pvt *, u8);
689
690static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
691{
692	edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr);
693
694	if (pvt->dram_type == MEM_LRDDR3) {
695		u32 dcsm = pvt->csels[chan].csmasks[0];
696		/*
697		 * It's assumed all LRDIMMs in a DCT are going to be of
698		 * same 'type' until proven otherwise. So, use a cs
699		 * value of '0' here to get dcsm value.
700		 */
701		edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3));
702	}
703
704	edac_dbg(1, "All DIMMs support ECC:%s\n",
705		    (dclr & BIT(19)) ? "yes" : "no");
706
707
708	edac_dbg(1, "  PAR/ERR parity: %s\n",
709		 (dclr & BIT(8)) ?  "enabled" : "disabled");
710
711	if (pvt->fam == 0x10)
712		edac_dbg(1, "  DCT 128bit mode width: %s\n",
713			 (dclr & BIT(11)) ?  "128b" : "64b");
714
715	edac_dbg(1, "  x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
716		 (dclr & BIT(12)) ?  "yes" : "no",
717		 (dclr & BIT(13)) ?  "yes" : "no",
718		 (dclr & BIT(14)) ?  "yes" : "no",
719		 (dclr & BIT(15)) ?  "yes" : "no");
720}
721
722/* Display and decode various NB registers for debug purposes. */
723static void dump_misc_regs(struct amd64_pvt *pvt)
724{
725	edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
726
727	edac_dbg(1, "  NB two channel DRAM capable: %s\n",
728		 (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no");
729
730	edac_dbg(1, "  ECC capable: %s, ChipKill ECC capable: %s\n",
731		 (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no",
732		 (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no");
733
734	debug_dump_dramcfg_low(pvt, pvt->dclr0, 0);
735
736	edac_dbg(1, "F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare);
737
738	edac_dbg(1, "F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, offset: 0x%08x\n",
739		 pvt->dhar, dhar_base(pvt),
740		 (pvt->fam == 0xf) ? k8_dhar_offset(pvt)
741				   : f10_dhar_offset(pvt));
742
743	edac_dbg(1, "  DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
744
745	debug_display_dimm_sizes(pvt, 0);
746
747	/* everything below this point is Fam10h and above */
748	if (pvt->fam == 0xf)
749		return;
750
751	debug_display_dimm_sizes(pvt, 1);
752
753	amd64_info("using %s syndromes.\n", ((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
754
755	/* Only if NOT ganged does dclr1 have valid info */
756	if (!dct_ganging_enabled(pvt))
757		debug_dump_dramcfg_low(pvt, pvt->dclr1, 1);
758}
759
760/*
761 * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60]
762 */
763static void prep_chip_selects(struct amd64_pvt *pvt)
764{
765	if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
766		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
767		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8;
768	} else if (pvt->fam == 0x15 && pvt->model == 0x30) {
769		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
770		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
771	} else {
772		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
773		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
774	}
775}
776
777/*
778 * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
779 */
780static void read_dct_base_mask(struct amd64_pvt *pvt)
781{
782	int cs;
783
784	prep_chip_selects(pvt);
785
786	for_each_chip_select(cs, 0, pvt) {
787		int reg0   = DCSB0 + (cs * 4);
788		int reg1   = DCSB1 + (cs * 4);
789		u32 *base0 = &pvt->csels[0].csbases[cs];
790		u32 *base1 = &pvt->csels[1].csbases[cs];
791
792		if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
793			edac_dbg(0, "  DCSB0[%d]=0x%08x reg: F2x%x\n",
794				 cs, *base0, reg0);
795
796		if (pvt->fam == 0xf)
797			continue;
798
799		if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
800			edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
801				 cs, *base1, (pvt->fam == 0x10) ? reg1
802								: reg0);
803	}
804
805	for_each_chip_select_mask(cs, 0, pvt) {
806		int reg0   = DCSM0 + (cs * 4);
807		int reg1   = DCSM1 + (cs * 4);
808		u32 *mask0 = &pvt->csels[0].csmasks[cs];
809		u32 *mask1 = &pvt->csels[1].csmasks[cs];
810
811		if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
812			edac_dbg(0, "    DCSM0[%d]=0x%08x reg: F2x%x\n",
813				 cs, *mask0, reg0);
814
815		if (pvt->fam == 0xf)
816			continue;
817
818		if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
819			edac_dbg(0, "    DCSM1[%d]=0x%08x reg: F2x%x\n",
820				 cs, *mask1, (pvt->fam == 0x10) ? reg1
821								: reg0);
822	}
823}
824
825static void determine_memory_type(struct amd64_pvt *pvt)
826{
827	u32 dram_ctrl, dcsm;
828
829	switch (pvt->fam) {
830	case 0xf:
831		if (pvt->ext_model >= K8_REV_F)
832			goto ddr3;
833
834		pvt->dram_type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
835		return;
836
837	case 0x10:
838		if (pvt->dchr0 & DDR3_MODE)
839			goto ddr3;
840
841		pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2;
842		return;
843
844	case 0x15:
845		if (pvt->model < 0x60)
846			goto ddr3;
847
848		/*
849		 * Model 0x60h needs special handling:
850		 *
851		 * We use a Chip Select value of '0' to obtain dcsm.
852		 * Theoretically, it is possible to populate LRDIMMs of different
853		 * 'Rank' value on a DCT. But this is not the common case. So,
854		 * it's reasonable to assume all DIMMs are going to be of same
855		 * 'type' until proven otherwise.
856		 */
857		amd64_read_dct_pci_cfg(pvt, 0, DRAM_CONTROL, &dram_ctrl);
858		dcsm = pvt->csels[0].csmasks[0];
859
860		if (((dram_ctrl >> 8) & 0x7) == 0x2)
861			pvt->dram_type = MEM_DDR4;
862		else if (pvt->dclr0 & BIT(16))
863			pvt->dram_type = MEM_DDR3;
864		else if (dcsm & 0x3)
865			pvt->dram_type = MEM_LRDDR3;
866		else
867			pvt->dram_type = MEM_RDDR3;
868
869		return;
870
871	case 0x16:
872		goto ddr3;
873
874	default:
875		WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam);
876		pvt->dram_type = MEM_EMPTY;
877	}
878	return;
879
880ddr3:
881	pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3;
882}
883
884/* Get the number of DCT channels the memory controller is using. */
885static int k8_early_channel_count(struct amd64_pvt *pvt)
886{
887	int flag;
888
889	if (pvt->ext_model >= K8_REV_F)
890		/* RevF (NPT) and later */
891		flag = pvt->dclr0 & WIDTH_128;
892	else
893		/* RevE and earlier */
894		flag = pvt->dclr0 & REVE_WIDTH_128;
895
896	/* not used */
897	pvt->dclr1 = 0;
898
899	return (flag) ? 2 : 1;
900}
901
902/* On F10h and later ErrAddr is MC4_ADDR[47:1] */
903static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
904{
905	u16 mce_nid = amd_get_nb_id(m->extcpu);
906	struct mem_ctl_info *mci;
907	u8 start_bit = 1;
908	u8 end_bit   = 47;
909	u64 addr;
910
911	mci = edac_mc_find(mce_nid);
912	if (!mci)
913		return 0;
914
915	pvt = mci->pvt_info;
916
917	if (pvt->fam == 0xf) {
918		start_bit = 3;
919		end_bit   = 39;
920	}
921
922	addr = m->addr & GENMASK_ULL(end_bit, start_bit);
923
924	/*
925	 * Erratum 637 workaround
926	 */
927	if (pvt->fam == 0x15) {
928		u64 cc6_base, tmp_addr;
929		u32 tmp;
930		u8 intlv_en;
931
932		if ((addr & GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7)
933			return addr;
934
935
936		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp);
937		intlv_en = tmp >> 21 & 0x7;
938
939		/* add [47:27] + 3 trailing bits */
940		cc6_base  = (tmp & GENMASK_ULL(20, 0)) << 3;
941
942		/* reverse and add DramIntlvEn */
943		cc6_base |= intlv_en ^ 0x7;
944
945		/* pin at [47:24] */
946		cc6_base <<= 24;
947
948		if (!intlv_en)
949			return cc6_base | (addr & GENMASK_ULL(23, 0));
950
951		amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_BASE, &tmp);
952
953							/* faster log2 */
954		tmp_addr  = (addr & GENMASK_ULL(23, 12)) << __fls(intlv_en + 1);
955
956		/* OR DramIntlvSel into bits [14:12] */
957		tmp_addr |= (tmp & GENMASK_ULL(23, 21)) >> 9;
958
959		/* add remaining [11:0] bits from original MC4_ADDR */
960		tmp_addr |= addr & GENMASK_ULL(11, 0);
961
962		return cc6_base | tmp_addr;
963	}
964
965	return addr;
966}
967
968static struct pci_dev *pci_get_related_function(unsigned int vendor,
969						unsigned int device,
970						struct pci_dev *related)
971{
972	struct pci_dev *dev = NULL;
973
974	while ((dev = pci_get_device(vendor, device, dev))) {
975		if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) &&
976		    (dev->bus->number == related->bus->number) &&
977		    (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
978			break;
979	}
980
981	return dev;
982}
983
984static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
985{
986	struct amd_northbridge *nb;
987	struct pci_dev *f1 = NULL;
988	unsigned int pci_func;
989	int off = range << 3;
990	u32 llim;
991
992	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off,  &pvt->ranges[range].base.lo);
993	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo);
994
995	if (pvt->fam == 0xf)
996		return;
997
998	if (!dram_rw(pvt, range))
999		return;
1000
1001	amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off,  &pvt->ranges[range].base.hi);
1002	amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi);
1003
1004	/* F15h: factor in CC6 save area by reading dst node's limit reg */
1005	if (pvt->fam != 0x15)
1006		return;
1007
1008	nb = node_to_amd_nb(dram_dst_node(pvt, range));
1009	if (WARN_ON(!nb))
1010		return;
1011
1012	if (pvt->model == 0x60)
1013		pci_func = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1;
1014	else if (pvt->model == 0x30)
1015		pci_func = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1;
1016	else
1017		pci_func = PCI_DEVICE_ID_AMD_15H_NB_F1;
1018
1019	f1 = pci_get_related_function(nb->misc->vendor, pci_func, nb->misc);
1020	if (WARN_ON(!f1))
1021		return;
1022
1023	amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim);
1024
1025	pvt->ranges[range].lim.lo &= GENMASK_ULL(15, 0);
1026
1027				    /* {[39:27],111b} */
1028	pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16;
1029
1030	pvt->ranges[range].lim.hi &= GENMASK_ULL(7, 0);
1031
1032				    /* [47:40] */
1033	pvt->ranges[range].lim.hi |= llim >> 13;
1034
1035	pci_dev_put(f1);
1036}
1037
1038static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1039				    struct err_info *err)
1040{
1041	struct amd64_pvt *pvt = mci->pvt_info;
1042
1043	error_address_to_page_and_offset(sys_addr, err);
1044
1045	/*
1046	 * Find out which node the error address belongs to. This may be
1047	 * different from the node that detected the error.
1048	 */
1049	err->src_mci = find_mc_by_sys_addr(mci, sys_addr);
1050	if (!err->src_mci) {
1051		amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
1052			     (unsigned long)sys_addr);
1053		err->err_code = ERR_NODE;
1054		return;
1055	}
1056
1057	/* Now map the sys_addr to a CSROW */
1058	err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr);
1059	if (err->csrow < 0) {
1060		err->err_code = ERR_CSROW;
1061		return;
1062	}
1063
1064	/* CHIPKILL enabled */
1065	if (pvt->nbcfg & NBCFG_CHIPKILL) {
1066		err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
1067		if (err->channel < 0) {
1068			/*
1069			 * Syndrome didn't map, so we don't know which of the
1070			 * 2 DIMMs is in error. So we need to ID 'both' of them
1071			 * as suspect.
1072			 */
1073			amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - "
1074				      "possible error reporting race\n",
1075				      err->syndrome);
1076			err->err_code = ERR_CHANNEL;
1077			return;
1078		}
1079	} else {
1080		/*
1081		 * non-chipkill ecc mode
1082		 *
1083		 * The k8 documentation is unclear about how to determine the
1084		 * channel number when using non-chipkill memory.  This method
1085		 * was obtained from email communication with someone at AMD.
1086		 * (Wish the email was placed in this comment - norsk)
1087		 */
1088		err->channel = ((sys_addr & BIT(3)) != 0);
1089	}
1090}
1091
1092static int ddr2_cs_size(unsigned i, bool dct_width)
1093{
1094	unsigned shift = 0;
1095
1096	if (i <= 2)
1097		shift = i;
1098	else if (!(i & 0x1))
1099		shift = i >> 1;
1100	else
1101		shift = (i + 1) >> 1;
1102
1103	return 128 << (shift + !!dct_width);
1104}
1105
1106static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1107				  unsigned cs_mode, int cs_mask_nr)
1108{
1109	u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
1110
1111	if (pvt->ext_model >= K8_REV_F) {
1112		WARN_ON(cs_mode > 11);
1113		return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
1114	}
1115	else if (pvt->ext_model >= K8_REV_D) {
1116		unsigned diff;
1117		WARN_ON(cs_mode > 10);
1118
1119		/*
1120		 * the below calculation, besides trying to win an obfuscated C
1121		 * contest, maps cs_mode values to DIMM chip select sizes. The
1122		 * mappings are:
1123		 *
1124		 * cs_mode	CS size (mb)
1125		 * =======	============
1126		 * 0		32
1127		 * 1		64
1128		 * 2		128
1129		 * 3		128
1130		 * 4		256
1131		 * 5		512
1132		 * 6		256
1133		 * 7		512
1134		 * 8		1024
1135		 * 9		1024
1136		 * 10		2048
1137		 *
1138		 * Basically, it calculates a value with which to shift the
1139		 * smallest CS size of 32MB.
1140		 *
1141		 * ddr[23]_cs_size have a similar purpose.
1142		 */
1143		diff = cs_mode/3 + (unsigned)(cs_mode > 5);
1144
1145		return 32 << (cs_mode - diff);
1146	}
1147	else {
1148		WARN_ON(cs_mode > 6);
1149		return 32 << cs_mode;
1150	}
1151}
1152
1153/*
1154 * Get the number of DCT channels in use.
1155 *
1156 * Return:
1157 *	number of Memory Channels in operation
1158 * Pass back:
1159 *	contents of the DCL0_LOW register
1160 */
1161static int f1x_early_channel_count(struct amd64_pvt *pvt)
1162{
1163	int i, j, channels = 0;
1164
1165	/* On F10h, if we are in 128 bit mode, then we are using 2 channels */
1166	if (pvt->fam == 0x10 && (pvt->dclr0 & WIDTH_128))
1167		return 2;
1168
1169	/*
1170	 * Need to check if in unganged mode: In such, there are 2 channels,
1171	 * but they are not in 128 bit mode and thus the above 'dclr0' status
1172	 * bit will be OFF.
1173	 *
1174	 * Need to check DCT0[0] and DCT1[0] to see if only one of them has
1175	 * their CSEnable bit on. If so, then SINGLE DIMM case.
1176	 */
1177	edac_dbg(0, "Data width is not 128 bits - need more decoding\n");
1178
1179	/*
1180	 * Check DRAM Bank Address Mapping values for each DIMM to see if there
1181	 * is more than just one DIMM present in unganged mode. Need to check
1182	 * both controllers since DIMMs can be placed in either one.
1183	 */
1184	for (i = 0; i < 2; i++) {
1185		u32 dbam = (i ? pvt->dbam1 : pvt->dbam0);
1186
1187		for (j = 0; j < 4; j++) {
1188			if (DBAM_DIMM(j, dbam) > 0) {
1189				channels++;
1190				break;
1191			}
1192		}
1193	}
1194
1195	if (channels > 2)
1196		channels = 2;
1197
1198	amd64_info("MCT channel count: %d\n", channels);
1199
1200	return channels;
1201}
1202
1203static int ddr3_cs_size(unsigned i, bool dct_width)
1204{
1205	unsigned shift = 0;
1206	int cs_size = 0;
1207
1208	if (i == 0 || i == 3 || i == 4)
1209		cs_size = -1;
1210	else if (i <= 2)
1211		shift = i;
1212	else if (i == 12)
1213		shift = 7;
1214	else if (!(i & 0x1))
1215		shift = i >> 1;
1216	else
1217		shift = (i + 1) >> 1;
1218
1219	if (cs_size != -1)
1220		cs_size = (128 * (1 << !!dct_width)) << shift;
1221
1222	return cs_size;
1223}
1224
1225static int ddr3_lrdimm_cs_size(unsigned i, unsigned rank_multiply)
1226{
1227	unsigned shift = 0;
1228	int cs_size = 0;
1229
1230	if (i < 4 || i == 6)
1231		cs_size = -1;
1232	else if (i == 12)
1233		shift = 7;
1234	else if (!(i & 0x1))
1235		shift = i >> 1;
1236	else
1237		shift = (i + 1) >> 1;
1238
1239	if (cs_size != -1)
1240		cs_size = rank_multiply * (128 << shift);
1241
1242	return cs_size;
1243}
1244
1245static int ddr4_cs_size(unsigned i)
1246{
1247	int cs_size = 0;
1248
1249	if (i == 0)
1250		cs_size = -1;
1251	else if (i == 1)
1252		cs_size = 1024;
1253	else
1254		/* Min cs_size = 1G */
1255		cs_size = 1024 * (1 << (i >> 1));
1256
1257	return cs_size;
1258}
1259
1260static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1261				   unsigned cs_mode, int cs_mask_nr)
1262{
1263	u32 dclr = dct ? pvt->dclr1 : pvt->dclr0;
1264
1265	WARN_ON(cs_mode > 11);
1266
1267	if (pvt->dchr0 & DDR3_MODE || pvt->dchr1 & DDR3_MODE)
1268		return ddr3_cs_size(cs_mode, dclr & WIDTH_128);
1269	else
1270		return ddr2_cs_size(cs_mode, dclr & WIDTH_128);
1271}
1272
1273/*
1274 * F15h supports only 64bit DCT interfaces
1275 */
1276static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1277				   unsigned cs_mode, int cs_mask_nr)
1278{
1279	WARN_ON(cs_mode > 12);
1280
1281	return ddr3_cs_size(cs_mode, false);
1282}
1283
1284/* F15h M60h supports DDR4 mapping as well.. */
1285static int f15_m60h_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1286					unsigned cs_mode, int cs_mask_nr)
1287{
1288	int cs_size;
1289	u32 dcsm = pvt->csels[dct].csmasks[cs_mask_nr];
1290
1291	WARN_ON(cs_mode > 12);
1292
1293	if (pvt->dram_type == MEM_DDR4) {
1294		if (cs_mode > 9)
1295			return -1;
1296
1297		cs_size = ddr4_cs_size(cs_mode);
1298	} else if (pvt->dram_type == MEM_LRDDR3) {
1299		unsigned rank_multiply = dcsm & 0xf;
1300
1301		if (rank_multiply == 3)
1302			rank_multiply = 4;
1303		cs_size = ddr3_lrdimm_cs_size(cs_mode, rank_multiply);
1304	} else {
1305		/* Minimum cs size is 512mb for F15hM60h*/
1306		if (cs_mode == 0x1)
1307			return -1;
1308
1309		cs_size = ddr3_cs_size(cs_mode, false);
1310	}
1311
1312	return cs_size;
1313}
1314
1315/*
1316 * F16h and F15h model 30h have only limited cs_modes.
1317 */
1318static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
1319				unsigned cs_mode, int cs_mask_nr)
1320{
1321	WARN_ON(cs_mode > 12);
1322
1323	if (cs_mode == 6 || cs_mode == 8 ||
1324	    cs_mode == 9 || cs_mode == 12)
1325		return -1;
1326	else
1327		return ddr3_cs_size(cs_mode, false);
1328}
1329
1330static void read_dram_ctl_register(struct amd64_pvt *pvt)
1331{
1332
1333	if (pvt->fam == 0xf)
1334		return;
1335
1336	if (!amd64_read_pci_cfg(pvt->F2, DCT_SEL_LO, &pvt->dct_sel_lo)) {
1337		edac_dbg(0, "F2x110 (DCTSelLow): 0x%08x, High range addrs at: 0x%x\n",
1338			 pvt->dct_sel_lo, dct_sel_baseaddr(pvt));
1339
1340		edac_dbg(0, "  DCTs operate in %s mode\n",
1341			 (dct_ganging_enabled(pvt) ? "ganged" : "unganged"));
1342
1343		if (!dct_ganging_enabled(pvt))
1344			edac_dbg(0, "  Address range split per DCT: %s\n",
1345				 (dct_high_range_enabled(pvt) ? "yes" : "no"));
1346
1347		edac_dbg(0, "  data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n",
1348			 (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"),
1349			 (dct_memory_cleared(pvt) ? "yes" : "no"));
1350
1351		edac_dbg(0, "  channel interleave: %s, "
1352			 "interleave bits selector: 0x%x\n",
1353			 (dct_interleave_enabled(pvt) ? "enabled" : "disabled"),
1354			 dct_sel_interleave_addr(pvt));
1355	}
1356
1357	amd64_read_pci_cfg(pvt->F2, DCT_SEL_HI, &pvt->dct_sel_hi);
1358}
1359
1360/*
1361 * Determine channel (DCT) based on the interleaving mode (see F15h M30h BKDG,
1362 * 2.10.12 Memory Interleaving Modes).
1363 */
1364static u8 f15_m30h_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1365				     u8 intlv_en, int num_dcts_intlv,
1366				     u32 dct_sel)
1367{
1368	u8 channel = 0;
1369	u8 select;
1370
1371	if (!(intlv_en))
1372		return (u8)(dct_sel);
1373
1374	if (num_dcts_intlv == 2) {
1375		select = (sys_addr >> 8) & 0x3;
1376		channel = select ? 0x3 : 0;
1377	} else if (num_dcts_intlv == 4) {
1378		u8 intlv_addr = dct_sel_interleave_addr(pvt);
1379		switch (intlv_addr) {
1380		case 0x4:
1381			channel = (sys_addr >> 8) & 0x3;
1382			break;
1383		case 0x5:
1384			channel = (sys_addr >> 9) & 0x3;
1385			break;
1386		}
1387	}
1388	return channel;
1389}
1390
1391/*
1392 * Determine channel (DCT) based on the interleaving mode: F10h BKDG, 2.8.9 Memory
1393 * Interleaving Modes.
1394 */
1395static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1396				bool hi_range_sel, u8 intlv_en)
1397{
1398	u8 dct_sel_high = (pvt->dct_sel_lo >> 1) & 1;
1399
1400	if (dct_ganging_enabled(pvt))
1401		return 0;
1402
1403	if (hi_range_sel)
1404		return dct_sel_high;
1405
1406	/*
1407	 * see F2x110[DctSelIntLvAddr] - channel interleave mode
1408	 */
1409	if (dct_interleave_enabled(pvt)) {
1410		u8 intlv_addr = dct_sel_interleave_addr(pvt);
1411
1412		/* return DCT select function: 0=DCT0, 1=DCT1 */
1413		if (!intlv_addr)
1414			return sys_addr >> 6 & 1;
1415
1416		if (intlv_addr & 0x2) {
1417			u8 shift = intlv_addr & 0x1 ? 9 : 6;
1418			u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2;
1419
1420			return ((sys_addr >> shift) & 1) ^ temp;
1421		}
1422
1423		return (sys_addr >> (12 + hweight8(intlv_en))) & 1;
1424	}
1425
1426	if (dct_high_range_enabled(pvt))
1427		return ~dct_sel_high & 1;
1428
1429	return 0;
1430}
1431
1432/* Convert the sys_addr to the normalized DCT address */
1433static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
1434				 u64 sys_addr, bool hi_rng,
1435				 u32 dct_sel_base_addr)
1436{
1437	u64 chan_off;
1438	u64 dram_base		= get_dram_base(pvt, range);
1439	u64 hole_off		= f10_dhar_offset(pvt);
1440	u64 dct_sel_base_off	= (u64)(pvt->dct_sel_hi & 0xFFFFFC00) << 16;
1441
1442	if (hi_rng) {
1443		/*
1444		 * if
1445		 * base address of high range is below 4Gb
1446		 * (bits [47:27] at [31:11])
1447		 * DRAM address space on this DCT is hoisted above 4Gb	&&
1448		 * sys_addr > 4Gb
1449		 *
1450		 *	remove hole offset from sys_addr
1451		 * else
1452		 *	remove high range offset from sys_addr
1453		 */
1454		if ((!(dct_sel_base_addr >> 16) ||
1455		     dct_sel_base_addr < dhar_base(pvt)) &&
1456		    dhar_valid(pvt) &&
1457		    (sys_addr >= BIT_64(32)))
1458			chan_off = hole_off;
1459		else
1460			chan_off = dct_sel_base_off;
1461	} else {
1462		/*
1463		 * if
1464		 * we have a valid hole		&&
1465		 * sys_addr > 4Gb
1466		 *
1467		 *	remove hole
1468		 * else
1469		 *	remove dram base to normalize to DCT address
1470		 */
1471		if (dhar_valid(pvt) && (sys_addr >= BIT_64(32)))
1472			chan_off = hole_off;
1473		else
1474			chan_off = dram_base;
1475	}
1476
1477	return (sys_addr & GENMASK_ULL(47,6)) - (chan_off & GENMASK_ULL(47,23));
1478}
1479
1480/*
1481 * checks if the csrow passed in is marked as SPARED, if so returns the new
1482 * spare row
1483 */
1484static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
1485{
1486	int tmp_cs;
1487
1488	if (online_spare_swap_done(pvt, dct) &&
1489	    csrow == online_spare_bad_dramcs(pvt, dct)) {
1490
1491		for_each_chip_select(tmp_cs, dct, pvt) {
1492			if (chip_select_base(tmp_cs, dct, pvt) & 0x2) {
1493				csrow = tmp_cs;
1494				break;
1495			}
1496		}
1497	}
1498	return csrow;
1499}
1500
1501/*
1502 * Iterate over the DRAM DCT "base" and "mask" registers looking for a
1503 * SystemAddr match on the specified 'ChannelSelect' and 'NodeID'
1504 *
1505 * Return:
1506 *	-EINVAL:  NOT FOUND
1507 *	0..csrow = Chip-Select Row
1508 */
1509static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct)
1510{
1511	struct mem_ctl_info *mci;
1512	struct amd64_pvt *pvt;
1513	u64 cs_base, cs_mask;
1514	int cs_found = -EINVAL;
1515	int csrow;
1516
1517	mci = edac_mc_find(nid);
1518	if (!mci)
1519		return cs_found;
1520
1521	pvt = mci->pvt_info;
1522
1523	edac_dbg(1, "input addr: 0x%llx, DCT: %d\n", in_addr, dct);
1524
1525	for_each_chip_select(csrow, dct, pvt) {
1526		if (!csrow_enabled(csrow, dct, pvt))
1527			continue;
1528
1529		get_cs_base_and_mask(pvt, csrow, dct, &cs_base, &cs_mask);
1530
1531		edac_dbg(1, "    CSROW=%d CSBase=0x%llx CSMask=0x%llx\n",
1532			 csrow, cs_base, cs_mask);
1533
1534		cs_mask = ~cs_mask;
1535
1536		edac_dbg(1, "    (InputAddr & ~CSMask)=0x%llx (CSBase & ~CSMask)=0x%llx\n",
1537			 (in_addr & cs_mask), (cs_base & cs_mask));
1538
1539		if ((in_addr & cs_mask) == (cs_base & cs_mask)) {
1540			if (pvt->fam == 0x15 && pvt->model >= 0x30) {
1541				cs_found =  csrow;
1542				break;
1543			}
1544			cs_found = f10_process_possible_spare(pvt, dct, csrow);
1545
1546			edac_dbg(1, " MATCH csrow=%d\n", cs_found);
1547			break;
1548		}
1549	}
1550	return cs_found;
1551}
1552
1553/*
1554 * See F2x10C. Non-interleaved graphics framebuffer memory under the 16G is
1555 * swapped with a region located at the bottom of memory so that the GPU can use
1556 * the interleaved region and thus two channels.
1557 */
1558static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr)
1559{
1560	u32 swap_reg, swap_base, swap_limit, rgn_size, tmp_addr;
1561
1562	if (pvt->fam == 0x10) {
1563		/* only revC3 and revE have that feature */
1564		if (pvt->model < 4 || (pvt->model < 0xa && pvt->stepping < 3))
1565			return sys_addr;
1566	}
1567
1568	amd64_read_pci_cfg(pvt->F2, SWAP_INTLV_REG, &swap_reg);
1569
1570	if (!(swap_reg & 0x1))
1571		return sys_addr;
1572
1573	swap_base	= (swap_reg >> 3) & 0x7f;
1574	swap_limit	= (swap_reg >> 11) & 0x7f;
1575	rgn_size	= (swap_reg >> 20) & 0x7f;
1576	tmp_addr	= sys_addr >> 27;
1577
1578	if (!(sys_addr >> 34) &&
1579	    (((tmp_addr >= swap_base) &&
1580	     (tmp_addr <= swap_limit)) ||
1581	     (tmp_addr < rgn_size)))
1582		return sys_addr ^ (u64)swap_base << 27;
1583
1584	return sys_addr;
1585}
1586
1587/* For a given @dram_range, check if @sys_addr falls within it. */
1588static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
1589				  u64 sys_addr, int *chan_sel)
1590{
1591	int cs_found = -EINVAL;
1592	u64 chan_addr;
1593	u32 dct_sel_base;
1594	u8 channel;
1595	bool high_range = false;
1596
1597	u8 node_id    = dram_dst_node(pvt, range);
1598	u8 intlv_en   = dram_intlv_en(pvt, range);
1599	u32 intlv_sel = dram_intlv_sel(pvt, range);
1600
1601	edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
1602		 range, sys_addr, get_dram_limit(pvt, range));
1603
1604	if (dhar_valid(pvt) &&
1605	    dhar_base(pvt) <= sys_addr &&
1606	    sys_addr < BIT_64(32)) {
1607		amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
1608			    sys_addr);
1609		return -EINVAL;
1610	}
1611
1612	if (intlv_en && (intlv_sel != ((sys_addr >> 12) & intlv_en)))
1613		return -EINVAL;
1614
1615	sys_addr = f1x_swap_interleaved_region(pvt, sys_addr);
1616
1617	dct_sel_base = dct_sel_baseaddr(pvt);
1618
1619	/*
1620	 * check whether addresses >= DctSelBaseAddr[47:27] are to be used to
1621	 * select between DCT0 and DCT1.
1622	 */
1623	if (dct_high_range_enabled(pvt) &&
1624	   !dct_ganging_enabled(pvt) &&
1625	   ((sys_addr >> 27) >= (dct_sel_base >> 11)))
1626		high_range = true;
1627
1628	channel = f1x_determine_channel(pvt, sys_addr, high_range, intlv_en);
1629
1630	chan_addr = f1x_get_norm_dct_addr(pvt, range, sys_addr,
1631					  high_range, dct_sel_base);
1632
1633	/* Remove node interleaving, see F1x120 */
1634	if (intlv_en)
1635		chan_addr = ((chan_addr >> (12 + hweight8(intlv_en))) << 12) |
1636			    (chan_addr & 0xfff);
1637
1638	/* remove channel interleave */
1639	if (dct_interleave_enabled(pvt) &&
1640	   !dct_high_range_enabled(pvt) &&
1641	   !dct_ganging_enabled(pvt)) {
1642
1643		if (dct_sel_interleave_addr(pvt) != 1) {
1644			if (dct_sel_interleave_addr(pvt) == 0x3)
1645				/* hash 9 */
1646				chan_addr = ((chan_addr >> 10) << 9) |
1647					     (chan_addr & 0x1ff);
1648			else
1649				/* A[6] or hash 6 */
1650				chan_addr = ((chan_addr >> 7) << 6) |
1651					     (chan_addr & 0x3f);
1652		} else
1653			/* A[12] */
1654			chan_addr = ((chan_addr >> 13) << 12) |
1655				     (chan_addr & 0xfff);
1656	}
1657
1658	edac_dbg(1, "   Normalized DCT addr: 0x%llx\n", chan_addr);
1659
1660	cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel);
1661
1662	if (cs_found >= 0)
1663		*chan_sel = channel;
1664
1665	return cs_found;
1666}
1667
1668static int f15_m30h_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
1669					u64 sys_addr, int *chan_sel)
1670{
1671	int cs_found = -EINVAL;
1672	int num_dcts_intlv = 0;
1673	u64 chan_addr, chan_offset;
1674	u64 dct_base, dct_limit;
1675	u32 dct_cont_base_reg, dct_cont_limit_reg, tmp;
1676	u8 channel, alias_channel, leg_mmio_hole, dct_sel, dct_offset_en;
1677
1678	u64 dhar_offset		= f10_dhar_offset(pvt);
1679	u8 intlv_addr		= dct_sel_interleave_addr(pvt);
1680	u8 node_id		= dram_dst_node(pvt, range);
1681	u8 intlv_en		= dram_intlv_en(pvt, range);
1682
1683	amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &dct_cont_base_reg);
1684	amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &dct_cont_limit_reg);
1685
1686	dct_offset_en		= (u8) ((dct_cont_base_reg >> 3) & BIT(0));
1687	dct_sel			= (u8) ((dct_cont_base_reg >> 4) & 0x7);
1688
1689	edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
1690		 range, sys_addr, get_dram_limit(pvt, range));
1691
1692	if (!(get_dram_base(pvt, range)  <= sys_addr) &&
1693	    !(get_dram_limit(pvt, range) >= sys_addr))
1694		return -EINVAL;
1695
1696	if (dhar_valid(pvt) &&
1697	    dhar_base(pvt) <= sys_addr &&
1698	    sys_addr < BIT_64(32)) {
1699		amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
1700			    sys_addr);
1701		return -EINVAL;
1702	}
1703
1704	/* Verify sys_addr is within DCT Range. */
1705	dct_base = (u64) dct_sel_baseaddr(pvt);
1706	dct_limit = (dct_cont_limit_reg >> 11) & 0x1FFF;
1707
1708	if (!(dct_cont_base_reg & BIT(0)) &&
1709	    !(dct_base <= (sys_addr >> 27) &&
1710	      dct_limit >= (sys_addr >> 27)))
1711		return -EINVAL;
1712
1713	/* Verify number of dct's that participate in channel interleaving. */
1714	num_dcts_intlv = (int) hweight8(intlv_en);
1715
1716	if (!(num_dcts_intlv % 2 == 0) || (num_dcts_intlv > 4))
1717		return -EINVAL;
1718
1719	channel = f15_m30h_determine_channel(pvt, sys_addr, intlv_en,
1720					     num_dcts_intlv, dct_sel);
1721
1722	/* Verify we stay within the MAX number of channels allowed */
1723	if (channel > 3)
1724		return -EINVAL;
1725
1726	leg_mmio_hole = (u8) (dct_cont_base_reg >> 1 & BIT(0));
1727
1728	/* Get normalized DCT addr */
1729	if (leg_mmio_hole && (sys_addr >= BIT_64(32)))
1730		chan_offset = dhar_offset;
1731	else
1732		chan_offset = dct_base << 27;
1733
1734	chan_addr = sys_addr - chan_offset;
1735
1736	/* remove channel interleave */
1737	if (num_dcts_intlv == 2) {
1738		if (intlv_addr == 0x4)
1739			chan_addr = ((chan_addr >> 9) << 8) |
1740						(chan_addr & 0xff);
1741		else if (intlv_addr == 0x5)
1742			chan_addr = ((chan_addr >> 10) << 9) |
1743						(chan_addr & 0x1ff);
1744		else
1745			return -EINVAL;
1746
1747	} else if (num_dcts_intlv == 4) {
1748		if (intlv_addr == 0x4)
1749			chan_addr = ((chan_addr >> 10) << 8) |
1750							(chan_addr & 0xff);
1751		else if (intlv_addr == 0x5)
1752			chan_addr = ((chan_addr >> 11) << 9) |
1753							(chan_addr & 0x1ff);
1754		else
1755			return -EINVAL;
1756	}
1757
1758	if (dct_offset_en) {
1759		amd64_read_pci_cfg(pvt->F1,
1760				   DRAM_CONT_HIGH_OFF + (int) channel * 4,
1761				   &tmp);
1762		chan_addr +=  (u64) ((tmp >> 11) & 0xfff) << 27;
1763	}
1764
1765	f15h_select_dct(pvt, channel);
1766
1767	edac_dbg(1, "   Normalized DCT addr: 0x%llx\n", chan_addr);
1768
1769	/*
1770	 * Find Chip select:
1771	 * if channel = 3, then alias it to 1. This is because, in F15 M30h,
1772	 * there is support for 4 DCT's, but only 2 are currently functional.
1773	 * They are DCT0 and DCT3. But we have read all registers of DCT3 into
1774	 * pvt->csels[1]. So we need to use '1' here to get correct info.
1775	 * Refer F15 M30h BKDG Section 2.10 and 2.10.3 for clarifications.
1776	 */
1777	alias_channel =  (channel == 3) ? 1 : channel;
1778
1779	cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, alias_channel);
1780
1781	if (cs_found >= 0)
1782		*chan_sel = alias_channel;
1783
1784	return cs_found;
1785}
1786
1787static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt,
1788					u64 sys_addr,
1789					int *chan_sel)
1790{
1791	int cs_found = -EINVAL;
1792	unsigned range;
1793
1794	for (range = 0; range < DRAM_RANGES; range++) {
1795		if (!dram_rw(pvt, range))
1796			continue;
1797
1798		if (pvt->fam == 0x15 && pvt->model >= 0x30)
1799			cs_found = f15_m30h_match_to_this_node(pvt, range,
1800							       sys_addr,
1801							       chan_sel);
1802
1803		else if ((get_dram_base(pvt, range)  <= sys_addr) &&
1804			 (get_dram_limit(pvt, range) >= sys_addr)) {
1805			cs_found = f1x_match_to_this_node(pvt, range,
1806							  sys_addr, chan_sel);
1807			if (cs_found >= 0)
1808				break;
1809		}
1810	}
1811	return cs_found;
1812}
1813
1814/*
1815 * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps
1816 * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW).
1817 *
1818 * The @sys_addr is usually an error address received from the hardware
1819 * (MCX_ADDR).
1820 */
1821static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
1822				     struct err_info *err)
1823{
1824	struct amd64_pvt *pvt = mci->pvt_info;
1825
1826	error_address_to_page_and_offset(sys_addr, err);
1827
1828	err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel);
1829	if (err->csrow < 0) {
1830		err->err_code = ERR_CSROW;
1831		return;
1832	}
1833
1834	/*
1835	 * We need the syndromes for channel detection only when we're
1836	 * ganged. Otherwise @chan should already contain the channel at
1837	 * this point.
1838	 */
1839	if (dct_ganging_enabled(pvt))
1840		err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
1841}
1842
1843/*
1844 * debug routine to display the memory sizes of all logical DIMMs and its
1845 * CSROWs
1846 */
1847static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
1848{
1849	int dimm, size0, size1;
1850	u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
1851	u32 dbam  = ctrl ? pvt->dbam1 : pvt->dbam0;
1852
1853	if (pvt->fam == 0xf) {
1854		/* K8 families < revF not supported yet */
1855	       if (pvt->ext_model < K8_REV_F)
1856			return;
1857	       else
1858		       WARN_ON(ctrl != 0);
1859	}
1860
1861	if (pvt->fam == 0x10) {
1862		dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1
1863							   : pvt->dbam0;
1864		dcsb = (ctrl && !dct_ganging_enabled(pvt)) ?
1865				 pvt->csels[1].csbases :
1866				 pvt->csels[0].csbases;
1867	} else if (ctrl) {
1868		dbam = pvt->dbam0;
1869		dcsb = pvt->csels[1].csbases;
1870	}
1871	edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n",
1872		 ctrl, dbam);
1873
1874	edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl);
1875
1876	/* Dump memory sizes for DIMM and its CSROWs */
1877	for (dimm = 0; dimm < 4; dimm++) {
1878
1879		size0 = 0;
1880		if (dcsb[dimm*2] & DCSB_CS_ENABLE)
1881			/* For f15m60h, need multiplier for LRDIMM cs_size
1882			 * calculation. We pass 'dimm' value to the dbam_to_cs
1883			 * mapper so we can find the multiplier from the
1884			 * corresponding DCSM.
1885			 */
1886			size0 = pvt->ops->dbam_to_cs(pvt, ctrl,
1887						     DBAM_DIMM(dimm, dbam),
1888						     dimm);
1889
1890		size1 = 0;
1891		if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE)
1892			size1 = pvt->ops->dbam_to_cs(pvt, ctrl,
1893						     DBAM_DIMM(dimm, dbam),
1894						     dimm);
1895
1896		amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
1897				dimm * 2,     size0,
1898				dimm * 2 + 1, size1);
1899	}
1900}
1901
1902static struct amd64_family_type family_types[] = {
1903	[K8_CPUS] = {
1904		.ctl_name = "K8",
1905		.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
1906		.f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC,
1907		.ops = {
1908			.early_channel_count	= k8_early_channel_count,
1909			.map_sysaddr_to_csrow	= k8_map_sysaddr_to_csrow,
1910			.dbam_to_cs		= k8_dbam_to_chip_select,
1911		}
1912	},
1913	[F10_CPUS] = {
1914		.ctl_name = "F10h",
1915		.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
1916		.f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC,
1917		.ops = {
1918			.early_channel_count	= f1x_early_channel_count,
1919			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1920			.dbam_to_cs		= f10_dbam_to_chip_select,
1921		}
1922	},
1923	[F15_CPUS] = {
1924		.ctl_name = "F15h",
1925		.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
1926		.f3_id = PCI_DEVICE_ID_AMD_15H_NB_F3,
1927		.ops = {
1928			.early_channel_count	= f1x_early_channel_count,
1929			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1930			.dbam_to_cs		= f15_dbam_to_chip_select,
1931		}
1932	},
1933	[F15_M30H_CPUS] = {
1934		.ctl_name = "F15h_M30h",
1935		.f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
1936		.f3_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F3,
1937		.ops = {
1938			.early_channel_count	= f1x_early_channel_count,
1939			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1940			.dbam_to_cs		= f16_dbam_to_chip_select,
1941		}
1942	},
1943	[F15_M60H_CPUS] = {
1944		.ctl_name = "F15h_M60h",
1945		.f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
1946		.f3_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F3,
1947		.ops = {
1948			.early_channel_count	= f1x_early_channel_count,
1949			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1950			.dbam_to_cs		= f15_m60h_dbam_to_chip_select,
1951		}
1952	},
1953	[F16_CPUS] = {
1954		.ctl_name = "F16h",
1955		.f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
1956		.f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3,
1957		.ops = {
1958			.early_channel_count	= f1x_early_channel_count,
1959			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1960			.dbam_to_cs		= f16_dbam_to_chip_select,
1961		}
1962	},
1963	[F16_M30H_CPUS] = {
1964		.ctl_name = "F16h_M30h",
1965		.f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
1966		.f3_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F3,
1967		.ops = {
1968			.early_channel_count	= f1x_early_channel_count,
1969			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
1970			.dbam_to_cs		= f16_dbam_to_chip_select,
1971		}
1972	},
1973};
1974
1975/*
1976 * These are tables of eigenvectors (one per line) which can be used for the
1977 * construction of the syndrome tables. The modified syndrome search algorithm
1978 * uses those to find the symbol in error and thus the DIMM.
1979 *
1980 * Algorithm courtesy of Ross LaFetra from AMD.
1981 */
1982static const u16 x4_vectors[] = {
1983	0x2f57, 0x1afe, 0x66cc, 0xdd88,
1984	0x11eb, 0x3396, 0x7f4c, 0xeac8,
1985	0x0001, 0x0002, 0x0004, 0x0008,
1986	0x1013, 0x3032, 0x4044, 0x8088,
1987	0x106b, 0x30d6, 0x70fc, 0xe0a8,
1988	0x4857, 0xc4fe, 0x13cc, 0x3288,
1989	0x1ac5, 0x2f4a, 0x5394, 0xa1e8,
1990	0x1f39, 0x251e, 0xbd6c, 0x6bd8,
1991	0x15c1, 0x2a42, 0x89ac, 0x4758,
1992	0x2b03, 0x1602, 0x4f0c, 0xca08,
1993	0x1f07, 0x3a0e, 0x6b04, 0xbd08,
1994	0x8ba7, 0x465e, 0x244c, 0x1cc8,
1995	0x2b87, 0x164e, 0x642c, 0xdc18,
1996	0x40b9, 0x80de, 0x1094, 0x20e8,
1997	0x27db, 0x1eb6, 0x9dac, 0x7b58,
1998	0x11c1, 0x2242, 0x84ac, 0x4c58,
1999	0x1be5, 0x2d7a, 0x5e34, 0xa718,
2000	0x4b39, 0x8d1e, 0x14b4, 0x28d8,
2001	0x4c97, 0xc87e, 0x11fc, 0x33a8,
2002	0x8e97, 0x497e, 0x2ffc, 0x1aa8,
2003	0x16b3, 0x3d62, 0x4f34, 0x8518,
2004	0x1e2f, 0x391a, 0x5cac, 0xf858,
2005	0x1d9f, 0x3b7a, 0x572c, 0xfe18,
2006	0x15f5, 0x2a5a, 0x5264, 0xa3b8,
2007	0x1dbb, 0x3b66, 0x715c, 0xe3f8,
2008	0x4397, 0xc27e, 0x17fc, 0x3ea8,
2009	0x1617, 0x3d3e, 0x6464, 0xb8b8,
2010	0x23ff, 0x12aa, 0xab6c, 0x56d8,
2011	0x2dfb, 0x1ba6, 0x913c, 0x7328,
2012	0x185d, 0x2ca6, 0x7914, 0x9e28,
2013	0x171b, 0x3e36, 0x7d7c, 0xebe8,
2014	0x4199, 0x82ee, 0x19f4, 0x2e58,
2015	0x4807, 0xc40e, 0x130c, 0x3208,
2016	0x1905, 0x2e0a, 0x5804, 0xac08,
2017	0x213f, 0x132a, 0xadfc, 0x5ba8,
2018	0x19a9, 0x2efe, 0xb5cc, 0x6f88,
2019};
2020
2021static const u16 x8_vectors[] = {
2022	0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480,
2023	0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80,
2024	0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80,
2025	0x0411, 0x0822, 0x1044, 0x0158, 0x02b0, 0x2360, 0x46c0, 0xab80,
2026	0x0811, 0x1022, 0x012c, 0x0258, 0x04b0, 0x4660, 0x8cc0, 0x2780,
2027	0x2071, 0x40e2, 0xa0c4, 0x0108, 0x0210, 0x0420, 0x0840, 0x1080,
2028	0x4071, 0x80e2, 0x0104, 0x0208, 0x0410, 0x0820, 0x1040, 0x2080,
2029	0x8071, 0x0102, 0x0204, 0x0408, 0x0810, 0x1020, 0x2040, 0x4080,
2030	0x019d, 0x03d6, 0x136c, 0x2198, 0x50b0, 0xb2e0, 0x0740, 0x0e80,
2031	0x0189, 0x03ea, 0x072c, 0x0e58, 0x1cb0, 0x56e0, 0x37c0, 0xf580,
2032	0x01fd, 0x0376, 0x06ec, 0x0bb8, 0x1110, 0x2220, 0x4440, 0x8880,
2033	0x0163, 0x02c6, 0x1104, 0x0758, 0x0eb0, 0x2be0, 0x6140, 0xc280,
2034	0x02fd, 0x01c6, 0x0b5c, 0x1108, 0x07b0, 0x25a0, 0x8840, 0x6180,
2035	0x0801, 0x012e, 0x025c, 0x04b8, 0x1370, 0x26e0, 0x57c0, 0xb580,
2036	0x0401, 0x0802, 0x015c, 0x02b8, 0x22b0, 0x13e0, 0x7140, 0xe280,
2037	0x0201, 0x0402, 0x0804, 0x01b8, 0x11b0, 0x31a0, 0x8040, 0x7180,
2038	0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080,
2039	0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
2040	0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000,
2041};
2042
2043static int decode_syndrome(u16 syndrome, const u16 *vectors, unsigned num_vecs,
2044			   unsigned v_dim)
2045{
2046	unsigned int i, err_sym;
2047
2048	for (err_sym = 0; err_sym < num_vecs / v_dim; err_sym++) {
2049		u16 s = syndrome;
2050		unsigned v_idx =  err_sym * v_dim;
2051		unsigned v_end = (err_sym + 1) * v_dim;
2052
2053		/* walk over all 16 bits of the syndrome */
2054		for (i = 1; i < (1U << 16); i <<= 1) {
2055
2056			/* if bit is set in that eigenvector... */
2057			if (v_idx < v_end && vectors[v_idx] & i) {
2058				u16 ev_comp = vectors[v_idx++];
2059
2060				/* ... and bit set in the modified syndrome, */
2061				if (s & i) {
2062					/* remove it. */
2063					s ^= ev_comp;
2064
2065					if (!s)
2066						return err_sym;
2067				}
2068
2069			} else if (s & i)
2070				/* can't get to zero, move to next symbol */
2071				break;
2072		}
2073	}
2074
2075	edac_dbg(0, "syndrome(%x) not found\n", syndrome);
2076	return -1;
2077}
2078
2079static int map_err_sym_to_channel(int err_sym, int sym_size)
2080{
2081	if (sym_size == 4)
2082		switch (err_sym) {
2083		case 0x20:
2084		case 0x21:
2085			return 0;
2086			break;
2087		case 0x22:
2088		case 0x23:
2089			return 1;
2090			break;
2091		default:
2092			return err_sym >> 4;
2093			break;
2094		}
2095	/* x8 symbols */
2096	else
2097		switch (err_sym) {
2098		/* imaginary bits not in a DIMM */
2099		case 0x10:
2100			WARN(1, KERN_ERR "Invalid error symbol: 0x%x\n",
2101					  err_sym);
2102			return -1;
2103			break;
2104
2105		case 0x11:
2106			return 0;
2107			break;
2108		case 0x12:
2109			return 1;
2110			break;
2111		default:
2112			return err_sym >> 3;
2113			break;
2114		}
2115	return -1;
2116}
2117
2118static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
2119{
2120	struct amd64_pvt *pvt = mci->pvt_info;
2121	int err_sym = -1;
2122
2123	if (pvt->ecc_sym_sz == 8)
2124		err_sym = decode_syndrome(syndrome, x8_vectors,
2125					  ARRAY_SIZE(x8_vectors),
2126					  pvt->ecc_sym_sz);
2127	else if (pvt->ecc_sym_sz == 4)
2128		err_sym = decode_syndrome(syndrome, x4_vectors,
2129					  ARRAY_SIZE(x4_vectors),
2130					  pvt->ecc_sym_sz);
2131	else {
2132		amd64_warn("Illegal syndrome type: %u\n", pvt->ecc_sym_sz);
2133		return err_sym;
2134	}
2135
2136	return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
2137}
2138
2139static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err,
2140			    u8 ecc_type)
2141{
2142	enum hw_event_mc_err_type err_type;
2143	const char *string;
2144
2145	if (ecc_type == 2)
2146		err_type = HW_EVENT_ERR_CORRECTED;
2147	else if (ecc_type == 1)
2148		err_type = HW_EVENT_ERR_UNCORRECTED;
2149	else {
2150		WARN(1, "Something is rotten in the state of Denmark.\n");
2151		return;
2152	}
2153
2154	switch (err->err_code) {
2155	case DECODE_OK:
2156		string = "";
2157		break;
2158	case ERR_NODE:
2159		string = "Failed to map error addr to a node";
2160		break;
2161	case ERR_CSROW:
2162		string = "Failed to map error addr to a csrow";
2163		break;
2164	case ERR_CHANNEL:
2165		string = "unknown syndrome - possible error reporting race";
2166		break;
2167	default:
2168		string = "WTF error";
2169		break;
2170	}
2171
2172	edac_mc_handle_error(err_type, mci, 1,
2173			     err->page, err->offset, err->syndrome,
2174			     err->csrow, err->channel, -1,
2175			     string, "");
2176}
2177
2178static inline void decode_bus_error(int node_id, struct mce *m)
2179{
2180	struct mem_ctl_info *mci;
2181	struct amd64_pvt *pvt;
2182	u8 ecc_type = (m->status >> 45) & 0x3;
2183	u8 xec = XEC(m->status, 0x1f);
2184	u16 ec = EC(m->status);
2185	u64 sys_addr;
2186	struct err_info err;
2187
2188	mci = edac_mc_find(node_id);
2189	if (!mci)
2190		return;
2191
2192	pvt = mci->pvt_info;
2193
2194	/* Bail out early if this was an 'observed' error */
2195	if (PP(ec) == NBSL_PP_OBS)
2196		return;
2197
2198	/* Do only ECC errors */
2199	if (xec && xec != F10_NBSL_EXT_ERR_ECC)
2200		return;
2201
2202	memset(&err, 0, sizeof(err));
2203
2204	sys_addr = get_error_address(pvt, m);
2205
2206	if (ecc_type == 2)
2207		err.syndrome = extract_syndrome(m->status);
2208
2209	pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err);
2210
2211	__log_bus_error(mci, &err, ecc_type);
2212}
2213
2214/*
2215 * Use pvt->F2 which contains the F2 CPU PCI device to get the related
2216 * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error.
2217 */
2218static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
2219{
2220	/* Reserve the ADDRESS MAP Device */
2221	pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2);
2222	if (!pvt->F1) {
2223		amd64_err("error address map device not found: "
2224			  "vendor %x device 0x%x (broken BIOS?)\n",
2225			  PCI_VENDOR_ID_AMD, f1_id);
2226		return -ENODEV;
2227	}
2228
2229	/* Reserve the MISC Device */
2230	pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2);
2231	if (!pvt->F3) {
2232		pci_dev_put(pvt->F1);
2233		pvt->F1 = NULL;
2234
2235		amd64_err("error F3 device not found: "
2236			  "vendor %x device 0x%x (broken BIOS?)\n",
2237			  PCI_VENDOR_ID_AMD, f3_id);
2238
2239		return -ENODEV;
2240	}
2241	edac_dbg(1, "F1: %s\n", pci_name(pvt->F1));
2242	edac_dbg(1, "F2: %s\n", pci_name(pvt->F2));
2243	edac_dbg(1, "F3: %s\n", pci_name(pvt->F3));
2244
2245	return 0;
2246}
2247
2248static void free_mc_sibling_devs(struct amd64_pvt *pvt)
2249{
2250	pci_dev_put(pvt->F1);
2251	pci_dev_put(pvt->F3);
2252}
2253
2254/*
2255 * Retrieve the hardware registers of the memory controller (this includes the
2256 * 'Address Map' and 'Misc' device regs)
2257 */
2258static void read_mc_regs(struct amd64_pvt *pvt)
2259{
2260	unsigned range;
2261	u64 msr_val;
2262	u32 tmp;
2263
2264	/*
2265	 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
2266	 * those are Read-As-Zero
2267	 */
2268	rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem);
2269	edac_dbg(0, "  TOP_MEM:  0x%016llx\n", pvt->top_mem);
2270
2271	/* check first whether TOP_MEM2 is enabled */
2272	rdmsrl(MSR_K8_SYSCFG, msr_val);
2273	if (msr_val & (1U << 21)) {
2274		rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
2275		edac_dbg(0, "  TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
2276	} else
2277		edac_dbg(0, "  TOP_MEM2 disabled\n");
2278
2279	amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap);
2280
2281	read_dram_ctl_register(pvt);
2282
2283	for (range = 0; range < DRAM_RANGES; range++) {
2284		u8 rw;
2285
2286		/* read settings for this DRAM range */
2287		read_dram_base_limit_regs(pvt, range);
2288
2289		rw = dram_rw(pvt, range);
2290		if (!rw)
2291			continue;
2292
2293		edac_dbg(1, "  DRAM range[%d], base: 0x%016llx; limit: 0x%016llx\n",
2294			 range,
2295			 get_dram_base(pvt, range),
2296			 get_dram_limit(pvt, range));
2297
2298		edac_dbg(1, "   IntlvEn=%s; Range access: %s%s IntlvSel=%d DstNode=%d\n",
2299			 dram_intlv_en(pvt, range) ? "Enabled" : "Disabled",
2300			 (rw & 0x1) ? "R" : "-",
2301			 (rw & 0x2) ? "W" : "-",
2302			 dram_intlv_sel(pvt, range),
2303			 dram_dst_node(pvt, range));
2304	}
2305
2306	read_dct_base_mask(pvt);
2307
2308	amd64_read_pci_cfg(pvt->F1, DHAR, &pvt->dhar);
2309	amd64_read_dct_pci_cfg(pvt, 0, DBAM0, &pvt->dbam0);
2310
2311	amd64_read_pci_cfg(pvt->F3, F10_ONLINE_SPARE, &pvt->online_spare);
2312
2313	amd64_read_dct_pci_cfg(pvt, 0, DCLR0, &pvt->dclr0);
2314	amd64_read_dct_pci_cfg(pvt, 0, DCHR0, &pvt->dchr0);
2315
2316	if (!dct_ganging_enabled(pvt)) {
2317		amd64_read_dct_pci_cfg(pvt, 1, DCLR0, &pvt->dclr1);
2318		amd64_read_dct_pci_cfg(pvt, 1, DCHR0, &pvt->dchr1);
2319	}
2320
2321	pvt->ecc_sym_sz = 4;
2322	determine_memory_type(pvt);
2323	edac_dbg(1, "  DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
2324
2325	if (pvt->fam >= 0x10) {
2326		amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
2327		/* F16h has only DCT0, so no need to read dbam1 */
2328		if (pvt->fam != 0x16)
2329			amd64_read_dct_pci_cfg(pvt, 1, DBAM0, &pvt->dbam1);
2330
2331		/* F10h, revD and later can do x8 ECC too */
2332		if ((pvt->fam > 0x10 || pvt->model > 7) && tmp & BIT(25))
2333			pvt->ecc_sym_sz = 8;
2334	}
2335	dump_misc_regs(pvt);
2336}
2337
2338/*
2339 * NOTE: CPU Revision Dependent code
2340 *
2341 * Input:
2342 *	@csrow_nr ChipSelect Row Number (0..NUM_CHIPSELECTS-1)
2343 *	k8 private pointer to -->
2344 *			DRAM Bank Address mapping register
2345 *			node_id
2346 *			DCL register where dual_channel_active is
2347 *
2348 * The DBAM register consists of 4 sets of 4 bits each definitions:
2349 *
2350 * Bits:	CSROWs
2351 * 0-3		CSROWs 0 and 1
2352 * 4-7		CSROWs 2 and 3
2353 * 8-11		CSROWs 4 and 5
2354 * 12-15	CSROWs 6 and 7
2355 *
2356 * Values range from: 0 to 15
2357 * The meaning of the values depends on CPU revision and dual-channel state,
2358 * see relevant BKDG more info.
2359 *
2360 * The memory controller provides for total of only 8 CSROWs in its current
2361 * architecture. Each "pair" of CSROWs normally represents just one DIMM in
2362 * single channel or two (2) DIMMs in dual channel mode.
2363 *
2364 * The following code logic collapses the various tables for CSROW based on CPU
2365 * revision.
2366 *
2367 * Returns:
2368 *	The number of PAGE_SIZE pages on the specified CSROW number it
2369 *	encompasses
2370 *
2371 */
2372static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
2373{
2374	u32 cs_mode, nr_pages;
2375	u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
2376
2377
2378	/*
2379	 * The math on this doesn't look right on the surface because x/2*4 can
2380	 * be simplified to x*2 but this expression makes use of the fact that
2381	 * it is integral math where 1/2=0. This intermediate value becomes the
2382	 * number of bits to shift the DBAM register to extract the proper CSROW
2383	 * field.
2384	 */
2385	cs_mode = DBAM_DIMM(csrow_nr / 2, dbam);
2386
2387	nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, (csrow_nr / 2))
2388							   << (20 - PAGE_SHIFT);
2389
2390	edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
2391		    csrow_nr, dct,  cs_mode);
2392	edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
2393
2394	return nr_pages;
2395}
2396
2397/*
2398 * Initialize the array of csrow attribute instances, based on the values
2399 * from pci config hardware registers.
2400 */
2401static int init_csrows(struct mem_ctl_info *mci)
2402{
2403	struct amd64_pvt *pvt = mci->pvt_info;
2404	struct csrow_info *csrow;
2405	struct dimm_info *dimm;
2406	enum edac_type edac_mode;
2407	int i, j, empty = 1;
2408	int nr_pages = 0;
2409	u32 val;
2410
2411	amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
2412
2413	pvt->nbcfg = val;
2414
2415	edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
2416		 pvt->mc_node_id, val,
2417		 !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
2418
2419	/*
2420	 * We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
2421	 */
2422	for_each_chip_select(i, 0, pvt) {
2423		bool row_dct0 = !!csrow_enabled(i, 0, pvt);
2424		bool row_dct1 = false;
2425
2426		if (pvt->fam != 0xf)
2427			row_dct1 = !!csrow_enabled(i, 1, pvt);
2428
2429		if (!row_dct0 && !row_dct1)
2430			continue;
2431
2432		csrow = mci->csrows[i];
2433		empty = 0;
2434
2435		edac_dbg(1, "MC node: %d, csrow: %d\n",
2436			    pvt->mc_node_id, i);
2437
2438		if (row_dct0) {
2439			nr_pages = get_csrow_nr_pages(pvt, 0, i);
2440			csrow->channels[0]->dimm->nr_pages = nr_pages;
2441		}
2442
2443		/* K8 has only one DCT */
2444		if (pvt->fam != 0xf && row_dct1) {
2445			int row_dct1_pages = get_csrow_nr_pages(pvt, 1, i);
2446
2447			csrow->channels[1]->dimm->nr_pages = row_dct1_pages;
2448			nr_pages += row_dct1_pages;
2449		}
2450
2451		edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
2452
2453		/*
2454		 * determine whether CHIPKILL or JUST ECC or NO ECC is operating
2455		 */
2456		if (pvt->nbcfg & NBCFG_ECC_ENABLE)
2457			edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ?
2458				    EDAC_S4ECD4ED : EDAC_SECDED;
2459		else
2460			edac_mode = EDAC_NONE;
2461
2462		for (j = 0; j < pvt->channel_count; j++) {
2463			dimm = csrow->channels[j]->dimm;
2464			dimm->mtype = pvt->dram_type;
2465			dimm->edac_mode = edac_mode;
2466		}
2467	}
2468
2469	return empty;
2470}
2471
2472/* get all cores on this DCT */
2473static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid)
2474{
2475	int cpu;
2476
2477	for_each_online_cpu(cpu)
2478		if (amd_get_nb_id(cpu) == nid)
2479			cpumask_set_cpu(cpu, mask);
2480}
2481
2482/* check MCG_CTL on all the cpus on this node */
2483static bool nb_mce_bank_enabled_on_node(u16 nid)
2484{
2485	cpumask_var_t mask;
2486	int cpu, nbe;
2487	bool ret = false;
2488
2489	if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
2490		amd64_warn("%s: Error allocating mask\n", __func__);
2491		return false;
2492	}
2493
2494	get_cpus_on_this_dct_cpumask(mask, nid);
2495
2496	rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs);
2497
2498	for_each_cpu(cpu, mask) {
2499		struct msr *reg = per_cpu_ptr(msrs, cpu);
2500		nbe = reg->l & MSR_MCGCTL_NBE;
2501
2502		edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2503			 cpu, reg->q,
2504			 (nbe ? "enabled" : "disabled"));
2505
2506		if (!nbe)
2507			goto out;
2508	}
2509	ret = true;
2510
2511out:
2512	free_cpumask_var(mask);
2513	return ret;
2514}
2515
2516static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
2517{
2518	cpumask_var_t cmask;
2519	int cpu;
2520
2521	if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) {
2522		amd64_warn("%s: error allocating mask\n", __func__);
2523		return false;
2524	}
2525
2526	get_cpus_on_this_dct_cpumask(cmask, nid);
2527
2528	rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2529
2530	for_each_cpu(cpu, cmask) {
2531
2532		struct msr *reg = per_cpu_ptr(msrs, cpu);
2533
2534		if (on) {
2535			if (reg->l & MSR_MCGCTL_NBE)
2536				s->flags.nb_mce_enable = 1;
2537
2538			reg->l |= MSR_MCGCTL_NBE;
2539		} else {
2540			/*
2541			 * Turn off NB MCE reporting only when it was off before
2542			 */
2543			if (!s->flags.nb_mce_enable)
2544				reg->l &= ~MSR_MCGCTL_NBE;
2545		}
2546	}
2547	wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2548
2549	free_cpumask_var(cmask);
2550
2551	return 0;
2552}
2553
2554static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
2555				       struct pci_dev *F3)
2556{
2557	bool ret = true;
2558	u32 value, mask = 0x3;		/* UECC/CECC enable */
2559
2560	if (toggle_ecc_err_reporting(s, nid, ON)) {
2561		amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
2562		return false;
2563	}
2564
2565	amd64_read_pci_cfg(F3, NBCTL, &value);
2566
2567	s->old_nbctl   = value & mask;
2568	s->nbctl_valid = true;
2569
2570	value |= mask;
2571	amd64_write_pci_cfg(F3, NBCTL, value);
2572
2573	amd64_read_pci_cfg(F3, NBCFG, &value);
2574
2575	edac_dbg(0, "1: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
2576		 nid, value, !!(value & NBCFG_ECC_ENABLE));
2577
2578	if (!(value & NBCFG_ECC_ENABLE)) {
2579		amd64_warn("DRAM ECC disabled on this node, enabling...\n");
2580
2581		s->flags.nb_ecc_prev = 0;
2582
2583		/* Attempt to turn on DRAM ECC Enable */
2584		value |= NBCFG_ECC_ENABLE;
2585		amd64_write_pci_cfg(F3, NBCFG, value);
2586
2587		amd64_read_pci_cfg(F3, NBCFG, &value);
2588
2589		if (!(value & NBCFG_ECC_ENABLE)) {
2590			amd64_warn("Hardware rejected DRAM ECC enable,"
2591				   "check memory DIMM configuration.\n");
2592			ret = false;
2593		} else {
2594			amd64_info("Hardware accepted DRAM ECC Enable\n");
2595		}
2596	} else {
2597		s->flags.nb_ecc_prev = 1;
2598	}
2599
2600	edac_dbg(0, "2: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
2601		 nid, value, !!(value & NBCFG_ECC_ENABLE));
2602
2603	return ret;
2604}
2605
2606static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
2607					struct pci_dev *F3)
2608{
2609	u32 value, mask = 0x3;		/* UECC/CECC enable */
2610
2611
2612	if (!s->nbctl_valid)
2613		return;
2614
2615	amd64_read_pci_cfg(F3, NBCTL, &value);
2616	value &= ~mask;
2617	value |= s->old_nbctl;
2618
2619	amd64_write_pci_cfg(F3, NBCTL, value);
2620
2621	/* restore previous BIOS DRAM ECC "off" setting we force-enabled */
2622	if (!s->flags.nb_ecc_prev) {
2623		amd64_read_pci_cfg(F3, NBCFG, &value);
2624		value &= ~NBCFG_ECC_ENABLE;
2625		amd64_write_pci_cfg(F3, NBCFG, value);
2626	}
2627
2628	/* restore the NB Enable MCGCTL bit */
2629	if (toggle_ecc_err_reporting(s, nid, OFF))
2630		amd64_warn("Error restoring NB MCGCTL settings!\n");
2631}
2632
2633/*
2634 * EDAC requires that the BIOS have ECC enabled before
2635 * taking over the processing of ECC errors. A command line
2636 * option allows to force-enable hardware ECC later in
2637 * enable_ecc_error_reporting().
2638 */
2639static const char *ecc_msg =
2640	"ECC disabled in the BIOS or no ECC capability, module will not load.\n"
2641	" Either enable ECC checking or force module loading by setting "
2642	"'ecc_enable_override'.\n"
2643	" (Note that use of the override may cause unknown side effects.)\n";
2644
2645static bool ecc_enabled(struct pci_dev *F3, u16 nid)
2646{
2647	u32 value;
2648	u8 ecc_en = 0;
2649	bool nb_mce_en = false;
2650
2651	amd64_read_pci_cfg(F3, NBCFG, &value);
2652
2653	ecc_en = !!(value & NBCFG_ECC_ENABLE);
2654	amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled"));
2655
2656	nb_mce_en = nb_mce_bank_enabled_on_node(nid);
2657	if (!nb_mce_en)
2658		amd64_notice("NB MCE bank disabled, set MSR "
2659			     "0x%08x[4] on node %d to enable.\n",
2660			     MSR_IA32_MCG_CTL, nid);
2661
2662	if (!ecc_en || !nb_mce_en) {
2663		amd64_notice("%s", ecc_msg);
2664		return false;
2665	}
2666	return true;
2667}
2668
2669static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
2670				 struct amd64_family_type *fam)
2671{
2672	struct amd64_pvt *pvt = mci->pvt_info;
2673
2674	mci->mtype_cap		= MEM_FLAG_DDR2 | MEM_FLAG_RDDR2;
2675	mci->edac_ctl_cap	= EDAC_FLAG_NONE;
2676
2677	if (pvt->nbcap & NBCAP_SECDED)
2678		mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
2679
2680	if (pvt->nbcap & NBCAP_CHIPKILL)
2681		mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
2682
2683	mci->edac_cap		= determine_edac_cap(pvt);
2684	mci->mod_name		= EDAC_MOD_STR;
2685	mci->mod_ver		= EDAC_AMD64_VERSION;
2686	mci->ctl_name		= fam->ctl_name;
2687	mci->dev_name		= pci_name(pvt->F2);
2688	mci->ctl_page_to_phys	= NULL;
2689
2690	/* memory scrubber interface */
2691	mci->set_sdram_scrub_rate = set_scrub_rate;
2692	mci->get_sdram_scrub_rate = get_scrub_rate;
2693}
2694
2695/*
2696 * returns a pointer to the family descriptor on success, NULL otherwise.
2697 */
2698static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
2699{
2700	struct amd64_family_type *fam_type = NULL;
2701
2702	pvt->ext_model  = boot_cpu_data.x86_model >> 4;
2703	pvt->stepping	= boot_cpu_data.x86_mask;
2704	pvt->model	= boot_cpu_data.x86_model;
2705	pvt->fam	= boot_cpu_data.x86;
2706
2707	switch (pvt->fam) {
2708	case 0xf:
2709		fam_type	= &family_types[K8_CPUS];
2710		pvt->ops	= &family_types[K8_CPUS].ops;
2711		break;
2712
2713	case 0x10:
2714		fam_type	= &family_types[F10_CPUS];
2715		pvt->ops	= &family_types[F10_CPUS].ops;
2716		break;
2717
2718	case 0x15:
2719		if (pvt->model == 0x30) {
2720			fam_type = &family_types[F15_M30H_CPUS];
2721			pvt->ops = &family_types[F15_M30H_CPUS].ops;
2722			break;
2723		} else if (pvt->model == 0x60) {
2724			fam_type = &family_types[F15_M60H_CPUS];
2725			pvt->ops = &family_types[F15_M60H_CPUS].ops;
2726			break;
2727		}
2728
2729		fam_type	= &family_types[F15_CPUS];
2730		pvt->ops	= &family_types[F15_CPUS].ops;
2731		break;
2732
2733	case 0x16:
2734		if (pvt->model == 0x30) {
2735			fam_type = &family_types[F16_M30H_CPUS];
2736			pvt->ops = &family_types[F16_M30H_CPUS].ops;
2737			break;
2738		}
2739		fam_type	= &family_types[F16_CPUS];
2740		pvt->ops	= &family_types[F16_CPUS].ops;
2741		break;
2742
2743	default:
2744		amd64_err("Unsupported family!\n");
2745		return NULL;
2746	}
2747
2748	amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name,
2749		     (pvt->fam == 0xf ?
2750				(pvt->ext_model >= K8_REV_F  ? "revF or later "
2751							     : "revE or earlier ")
2752				 : ""), pvt->mc_node_id);
2753	return fam_type;
2754}
2755
2756static const struct attribute_group *amd64_edac_attr_groups[] = {
2757#ifdef CONFIG_EDAC_DEBUG
2758	&amd64_edac_dbg_group,
2759#endif
2760#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
2761	&amd64_edac_inj_group,
2762#endif
2763	NULL
2764};
2765
2766static int init_one_instance(struct pci_dev *F2)
2767{
2768	struct amd64_pvt *pvt = NULL;
2769	struct amd64_family_type *fam_type = NULL;
2770	struct mem_ctl_info *mci = NULL;
2771	struct edac_mc_layer layers[2];
2772	int err = 0, ret;
2773	u16 nid = amd_get_node_id(F2);
2774
2775	ret = -ENOMEM;
2776	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
2777	if (!pvt)
2778		goto err_ret;
2779
2780	pvt->mc_node_id	= nid;
2781	pvt->F2 = F2;
2782
2783	ret = -EINVAL;
2784	fam_type = per_family_init(pvt);
2785	if (!fam_type)
2786		goto err_free;
2787
2788	ret = -ENODEV;
2789	err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id);
2790	if (err)
2791		goto err_free;
2792
2793	read_mc_regs(pvt);
2794
2795	/*
2796	 * We need to determine how many memory channels there are. Then use
2797	 * that information for calculating the size of the dynamic instance
2798	 * tables in the 'mci' structure.
2799	 */
2800	ret = -EINVAL;
2801	pvt->channel_count = pvt->ops->early_channel_count(pvt);
2802	if (pvt->channel_count < 0)
2803		goto err_siblings;
2804
2805	ret = -ENOMEM;
2806	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
2807	layers[0].size = pvt->csels[0].b_cnt;
2808	layers[0].is_virt_csrow = true;
2809	layers[1].type = EDAC_MC_LAYER_CHANNEL;
2810
2811	/*
2812	 * Always allocate two channels since we can have setups with DIMMs on
2813	 * only one channel. Also, this simplifies handling later for the price
2814	 * of a couple of KBs tops.
2815	 */
2816	layers[1].size = 2;
2817	layers[1].is_virt_csrow = false;
2818
2819	mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
2820	if (!mci)
2821		goto err_siblings;
2822
2823	mci->pvt_info = pvt;
2824	mci->pdev = &pvt->F2->dev;
2825
2826	setup_mci_misc_attrs(mci, fam_type);
2827
2828	if (init_csrows(mci))
2829		mci->edac_cap = EDAC_FLAG_NONE;
2830
2831	ret = -ENODEV;
2832	if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) {
2833		edac_dbg(1, "failed edac_mc_add_mc()\n");
2834		goto err_add_mc;
2835	}
2836
2837	/* register stuff with EDAC MCE */
2838	if (report_gart_errors)
2839		amd_report_gart_errors(true);
2840
2841	amd_register_ecc_decoder(decode_bus_error);
2842
2843	atomic_inc(&drv_instances);
2844
2845	return 0;
2846
2847err_add_mc:
2848	edac_mc_free(mci);
2849
2850err_siblings:
2851	free_mc_sibling_devs(pvt);
2852
2853err_free:
2854	kfree(pvt);
2855
2856err_ret:
2857	return ret;
2858}
2859
2860static int probe_one_instance(struct pci_dev *pdev,
2861			      const struct pci_device_id *mc_type)
2862{
2863	u16 nid = amd_get_node_id(pdev);
2864	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2865	struct ecc_settings *s;
2866	int ret = 0;
2867
2868	ret = pci_enable_device(pdev);
2869	if (ret < 0) {
2870		edac_dbg(0, "ret=%d\n", ret);
2871		return -EIO;
2872	}
2873
2874	ret = -ENOMEM;
2875	s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
2876	if (!s)
2877		goto err_out;
2878
2879	ecc_stngs[nid] = s;
2880
2881	if (!ecc_enabled(F3, nid)) {
2882		ret = -ENODEV;
2883
2884		if (!ecc_enable_override)
2885			goto err_enable;
2886
2887		amd64_warn("Forcing ECC on!\n");
2888
2889		if (!enable_ecc_error_reporting(s, nid, F3))
2890			goto err_enable;
2891	}
2892
2893	ret = init_one_instance(pdev);
2894	if (ret < 0) {
2895		amd64_err("Error probing instance: %d\n", nid);
2896		restore_ecc_error_reporting(s, nid, F3);
2897	}
2898
2899	return ret;
2900
2901err_enable:
2902	kfree(s);
2903	ecc_stngs[nid] = NULL;
2904
2905err_out:
2906	return ret;
2907}
2908
2909static void remove_one_instance(struct pci_dev *pdev)
2910{
2911	struct mem_ctl_info *mci;
2912	struct amd64_pvt *pvt;
2913	u16 nid = amd_get_node_id(pdev);
2914	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2915	struct ecc_settings *s = ecc_stngs[nid];
2916
2917	mci = find_mci_by_dev(&pdev->dev);
2918	WARN_ON(!mci);
2919
2920	/* Remove from EDAC CORE tracking list */
2921	mci = edac_mc_del_mc(&pdev->dev);
2922	if (!mci)
2923		return;
2924
2925	pvt = mci->pvt_info;
2926
2927	restore_ecc_error_reporting(s, nid, F3);
2928
2929	free_mc_sibling_devs(pvt);
2930
2931	/* unregister from EDAC MCE */
2932	amd_report_gart_errors(false);
2933	amd_unregister_ecc_decoder(decode_bus_error);
2934
2935	kfree(ecc_stngs[nid]);
2936	ecc_stngs[nid] = NULL;
2937
2938	/* Free the EDAC CORE resources */
2939	mci->pvt_info = NULL;
2940
2941	kfree(pvt);
2942	edac_mc_free(mci);
2943}
2944
2945/*
2946 * This table is part of the interface for loading drivers for PCI devices. The
2947 * PCI core identifies what devices are on a system during boot, and then
2948 * inquiry this table to see if this driver is for a given device found.
2949 */
2950static const struct pci_device_id amd64_pci_table[] = {
2951	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL) },
2952	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM) },
2953	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F2) },
2954	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F2) },
2955	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F2) },
2956	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F2) },
2957	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F2) },
2958	{0, }
2959};
2960MODULE_DEVICE_TABLE(pci, amd64_pci_table);
2961
2962static struct pci_driver amd64_pci_driver = {
2963	.name		= EDAC_MOD_STR,
2964	.probe		= probe_one_instance,
2965	.remove		= remove_one_instance,
2966	.id_table	= amd64_pci_table,
2967};
2968
2969static void setup_pci_device(void)
2970{
2971	struct mem_ctl_info *mci;
2972	struct amd64_pvt *pvt;
2973
2974	if (pci_ctl)
2975		return;
2976
2977	mci = edac_mc_find(0);
2978	if (!mci)
2979		return;
2980
2981	pvt = mci->pvt_info;
2982	pci_ctl = edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR);
2983	if (!pci_ctl) {
2984		pr_warn("%s(): Unable to create PCI control\n", __func__);
2985		pr_warn("%s(): PCI error report via EDAC not set\n", __func__);
2986	}
2987}
2988
2989static int __init amd64_edac_init(void)
2990{
2991	int err = -ENODEV;
2992
2993	printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
2994
2995	opstate_init();
2996
2997	if (amd_cache_northbridges() < 0)
2998		goto err_ret;
2999
3000	err = -ENOMEM;
3001	ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
3002	if (!ecc_stngs)
3003		goto err_free;
3004
3005	msrs = msrs_alloc();
3006	if (!msrs)
3007		goto err_free;
3008
3009	err = pci_register_driver(&amd64_pci_driver);
3010	if (err)
3011		goto err_pci;
3012
3013	err = -ENODEV;
3014	if (!atomic_read(&drv_instances))
3015		goto err_no_instances;
3016
3017	setup_pci_device();
3018
3019#ifdef CONFIG_X86_32
3020	amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR);
3021#endif
3022
3023	return 0;
3024
3025err_no_instances:
3026	pci_unregister_driver(&amd64_pci_driver);
3027
3028err_pci:
3029	msrs_free(msrs);
3030	msrs = NULL;
3031
3032err_free:
3033	kfree(ecc_stngs);
3034	ecc_stngs = NULL;
3035
3036err_ret:
3037	return err;
3038}
3039
3040static void __exit amd64_edac_exit(void)
3041{
3042	if (pci_ctl)
3043		edac_pci_release_generic_ctl(pci_ctl);
3044
3045	pci_unregister_driver(&amd64_pci_driver);
3046
3047	kfree(ecc_stngs);
3048	ecc_stngs = NULL;
3049
3050	msrs_free(msrs);
3051	msrs = NULL;
3052}
3053
3054module_init(amd64_edac_init);
3055module_exit(amd64_edac_exit);
3056
3057MODULE_LICENSE("GPL");
3058MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
3059		"Dave Peterson, Thayne Harbaugh");
3060MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
3061		EDAC_AMD64_VERSION);
3062
3063module_param(edac_op_state, int, 0444);
3064MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
3065