1/*
2 * Copyright © 2012 NetCommWireless
3 * Iwo Mergler <Iwo.Mergler@netcommwireless.com.au>
4 *
5 * Test for multi-bit error recovery on a NAND page This mostly tests the
6 * ECC controller / driver.
7 *
8 * There are two test modes:
9 *
10 *	0 - artificially inserting bit errors until the ECC fails
11 *	    This is the default method and fairly quick. It should
12 *	    be independent of the quality of the FLASH.
13 *
14 *	1 - re-writing the same pattern repeatedly until the ECC fails.
15 *	    This method relies on the physics of NAND FLASH to eventually
16 *	    generate '0' bits if '1' has been written sufficient times.
17 *	    Depending on the NAND, the first bit errors will appear after
18 *	    1000 or more writes and then will usually snowball, reaching the
19 *	    limits of the ECC quickly.
20 *
21 *	    The test stops after 10000 cycles, should your FLASH be
22 *	    exceptionally good and not generate bit errors before that. Try
23 *	    a different page in that case.
24 *
25 * Please note that neither of these tests will significantly 'use up' any
26 * FLASH endurance. Only a maximum of two erase operations will be performed.
27 *
28 *
29 * This program is free software; you can redistribute it and/or modify it
30 * under the terms of the GNU General Public License version 2 as published by
31 * the Free Software Foundation.
32 *
33 * This program is distributed in the hope that it will be useful, but WITHOUT
34 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
35 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
36 * more details.
37 *
38 * You should have received a copy of the GNU General Public License along with
39 * this program; see the file COPYING. If not, write to the Free Software
40 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
41 */
42
43#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
44
45#include <linux/init.h>
46#include <linux/module.h>
47#include <linux/moduleparam.h>
48#include <linux/mtd/mtd.h>
49#include <linux/err.h>
50#include <linux/mtd/nand.h>
51#include <linux/slab.h>
52#include "mtd_test.h"
53
54static int dev;
55module_param(dev, int, S_IRUGO);
56MODULE_PARM_DESC(dev, "MTD device number to use");
57
58static unsigned page_offset;
59module_param(page_offset, uint, S_IRUGO);
60MODULE_PARM_DESC(page_offset, "Page number relative to dev start");
61
62static unsigned seed;
63module_param(seed, uint, S_IRUGO);
64MODULE_PARM_DESC(seed, "Random seed");
65
66static int mode;
67module_param(mode, int, S_IRUGO);
68MODULE_PARM_DESC(mode, "0=incremental errors, 1=overwrite test");
69
70static unsigned max_overwrite = 10000;
71
72static loff_t   offset;     /* Offset of the page we're using. */
73static unsigned eraseblock; /* Eraseblock number for our page. */
74
75/* We assume that the ECC can correct up to a certain number
76 * of biterrors per subpage. */
77static unsigned subsize;  /* Size of subpages */
78static unsigned subcount; /* Number of subpages per page */
79
80static struct mtd_info *mtd;   /* MTD device */
81
82static uint8_t *wbuffer; /* One page write / compare buffer */
83static uint8_t *rbuffer; /* One page read buffer */
84
85/* 'random' bytes from known offsets */
86static uint8_t hash(unsigned offset)
87{
88	unsigned v = offset;
89	unsigned char c;
90	v ^= 0x7f7edfd3;
91	v = v ^ (v >> 3);
92	v = v ^ (v >> 5);
93	v = v ^ (v >> 13);
94	c = v & 0xFF;
95	/* Reverse bits of result. */
96	c = (c & 0x0F) << 4 | (c & 0xF0) >> 4;
97	c = (c & 0x33) << 2 | (c & 0xCC) >> 2;
98	c = (c & 0x55) << 1 | (c & 0xAA) >> 1;
99	return c;
100}
101
102/* Writes wbuffer to page */
103static int write_page(int log)
104{
105	if (log)
106		pr_info("write_page\n");
107
108	return mtdtest_write(mtd, offset, mtd->writesize, wbuffer);
109}
110
111/* Re-writes the data area while leaving the OOB alone. */
112static int rewrite_page(int log)
113{
114	int err = 0;
115	struct mtd_oob_ops ops;
116
117	if (log)
118		pr_info("rewrite page\n");
119
120	ops.mode      = MTD_OPS_RAW; /* No ECC */
121	ops.len       = mtd->writesize;
122	ops.retlen    = 0;
123	ops.ooblen    = 0;
124	ops.oobretlen = 0;
125	ops.ooboffs   = 0;
126	ops.datbuf    = wbuffer;
127	ops.oobbuf    = NULL;
128
129	err = mtd_write_oob(mtd, offset, &ops);
130	if (err || ops.retlen != mtd->writesize) {
131		pr_err("error: write_oob failed (%d)\n", err);
132		if (!err)
133			err = -EIO;
134	}
135
136	return err;
137}
138
139/* Reads page into rbuffer. Returns number of corrected bit errors (>=0)
140 * or error (<0) */
141static int read_page(int log)
142{
143	int err = 0;
144	size_t read;
145	struct mtd_ecc_stats oldstats;
146
147	if (log)
148		pr_info("read_page\n");
149
150	/* Saving last mtd stats */
151	memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats));
152
153	err = mtd_read(mtd, offset, mtd->writesize, &read, rbuffer);
154	if (err == -EUCLEAN)
155		err = mtd->ecc_stats.corrected - oldstats.corrected;
156
157	if (err < 0 || read != mtd->writesize) {
158		pr_err("error: read failed at %#llx\n", (long long)offset);
159		if (err >= 0)
160			err = -EIO;
161	}
162
163	return err;
164}
165
166/* Verifies rbuffer against random sequence */
167static int verify_page(int log)
168{
169	unsigned i, errs = 0;
170
171	if (log)
172		pr_info("verify_page\n");
173
174	for (i = 0; i < mtd->writesize; i++) {
175		if (rbuffer[i] != hash(i+seed)) {
176			pr_err("Error: page offset %u, expected %02x, got %02x\n",
177				i, hash(i+seed), rbuffer[i]);
178			errs++;
179		}
180	}
181
182	if (errs)
183		return -EIO;
184	else
185		return 0;
186}
187
188#define CBIT(v, n) ((v) & (1 << (n)))
189#define BCLR(v, n) ((v) = (v) & ~(1 << (n)))
190
191/* Finds the first '1' bit in wbuffer starting at offset 'byte'
192 * and sets it to '0'. */
193static int insert_biterror(unsigned byte)
194{
195	int bit;
196
197	while (byte < mtd->writesize) {
198		for (bit = 7; bit >= 0; bit--) {
199			if (CBIT(wbuffer[byte], bit)) {
200				BCLR(wbuffer[byte], bit);
201				pr_info("Inserted biterror @ %u/%u\n", byte, bit);
202				return 0;
203			}
204		}
205		byte++;
206	}
207	pr_err("biterror: Failed to find a '1' bit\n");
208	return -EIO;
209}
210
211/* Writes 'random' data to page and then introduces deliberate bit
212 * errors into the page, while verifying each step. */
213static int incremental_errors_test(void)
214{
215	int err = 0;
216	unsigned i;
217	unsigned errs_per_subpage = 0;
218
219	pr_info("incremental biterrors test\n");
220
221	for (i = 0; i < mtd->writesize; i++)
222		wbuffer[i] = hash(i+seed);
223
224	err = write_page(1);
225	if (err)
226		goto exit;
227
228	while (1) {
229
230		err = rewrite_page(1);
231		if (err)
232			goto exit;
233
234		err = read_page(1);
235		if (err > 0)
236			pr_info("Read reported %d corrected bit errors\n", err);
237		if (err < 0) {
238			pr_err("After %d biterrors per subpage, read reported error %d\n",
239				errs_per_subpage, err);
240			err = 0;
241			goto exit;
242		}
243
244		err = verify_page(1);
245		if (err) {
246			pr_err("ECC failure, read data is incorrect despite read success\n");
247			goto exit;
248		}
249
250		pr_info("Successfully corrected %d bit errors per subpage\n",
251			errs_per_subpage);
252
253		for (i = 0; i < subcount; i++) {
254			err = insert_biterror(i * subsize);
255			if (err < 0)
256				goto exit;
257		}
258		errs_per_subpage++;
259	}
260
261exit:
262	return err;
263}
264
265
266/* Writes 'random' data to page and then re-writes that same data repeatedly.
267   This eventually develops bit errors (bits written as '1' will slowly become
268   '0'), which are corrected as far as the ECC is capable of. */
269static int overwrite_test(void)
270{
271	int err = 0;
272	unsigned i;
273	unsigned max_corrected = 0;
274	unsigned opno = 0;
275	/* We don't expect more than this many correctable bit errors per
276	 * page. */
277	#define MAXBITS 512
278	static unsigned bitstats[MAXBITS]; /* bit error histogram. */
279
280	memset(bitstats, 0, sizeof(bitstats));
281
282	pr_info("overwrite biterrors test\n");
283
284	for (i = 0; i < mtd->writesize; i++)
285		wbuffer[i] = hash(i+seed);
286
287	err = write_page(1);
288	if (err)
289		goto exit;
290
291	while (opno < max_overwrite) {
292
293		err = rewrite_page(0);
294		if (err)
295			break;
296
297		err = read_page(0);
298		if (err >= 0) {
299			if (err >= MAXBITS) {
300				pr_info("Implausible number of bit errors corrected\n");
301				err = -EIO;
302				break;
303			}
304			bitstats[err]++;
305			if (err > max_corrected) {
306				max_corrected = err;
307				pr_info("Read reported %d corrected bit errors\n",
308					err);
309			}
310		} else { /* err < 0 */
311			pr_info("Read reported error %d\n", err);
312			err = 0;
313			break;
314		}
315
316		err = verify_page(0);
317		if (err) {
318			bitstats[max_corrected] = opno;
319			pr_info("ECC failure, read data is incorrect despite read success\n");
320			break;
321		}
322
323		err = mtdtest_relax();
324		if (err)
325			break;
326
327		opno++;
328	}
329
330	/* At this point bitstats[0] contains the number of ops with no bit
331	 * errors, bitstats[1] the number of ops with 1 bit error, etc. */
332	pr_info("Bit error histogram (%d operations total):\n", opno);
333	for (i = 0; i < max_corrected; i++)
334		pr_info("Page reads with %3d corrected bit errors: %d\n",
335			i, bitstats[i]);
336
337exit:
338	return err;
339}
340
341static int __init mtd_nandbiterrs_init(void)
342{
343	int err = 0;
344
345	printk("\n");
346	printk(KERN_INFO "==================================================\n");
347	pr_info("MTD device: %d\n", dev);
348
349	mtd = get_mtd_device(NULL, dev);
350	if (IS_ERR(mtd)) {
351		err = PTR_ERR(mtd);
352		pr_err("error: cannot get MTD device\n");
353		goto exit_mtddev;
354	}
355
356	if (!mtd_type_is_nand(mtd)) {
357		pr_info("this test requires NAND flash\n");
358		err = -ENODEV;
359		goto exit_nand;
360	}
361
362	pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n",
363		(unsigned long long)mtd->size, mtd->erasesize,
364		mtd->writesize, mtd->oobsize);
365
366	subsize  = mtd->writesize >> mtd->subpage_sft;
367	subcount = mtd->writesize / subsize;
368
369	pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize);
370
371	offset     = (loff_t)page_offset * mtd->writesize;
372	eraseblock = mtd_div_by_eb(offset, mtd);
373
374	pr_info("Using page=%u, offset=%llu, eraseblock=%u\n",
375		page_offset, offset, eraseblock);
376
377	wbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
378	if (!wbuffer) {
379		err = -ENOMEM;
380		goto exit_wbuffer;
381	}
382
383	rbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
384	if (!rbuffer) {
385		err = -ENOMEM;
386		goto exit_rbuffer;
387	}
388
389	err = mtdtest_erase_eraseblock(mtd, eraseblock);
390	if (err)
391		goto exit_error;
392
393	if (mode == 0)
394		err = incremental_errors_test();
395	else
396		err = overwrite_test();
397
398	if (err)
399		goto exit_error;
400
401	/* We leave the block un-erased in case of test failure. */
402	err = mtdtest_erase_eraseblock(mtd, eraseblock);
403	if (err)
404		goto exit_error;
405
406	err = -EIO;
407	pr_info("finished successfully.\n");
408	printk(KERN_INFO "==================================================\n");
409
410exit_error:
411	kfree(rbuffer);
412exit_rbuffer:
413	kfree(wbuffer);
414exit_wbuffer:
415	/* Nothing */
416exit_nand:
417	put_mtd_device(mtd);
418exit_mtddev:
419	return err;
420}
421
422static void __exit mtd_nandbiterrs_exit(void)
423{
424	return;
425}
426
427module_init(mtd_nandbiterrs_init);
428module_exit(mtd_nandbiterrs_exit);
429
430MODULE_DESCRIPTION("NAND bit error recovery test");
431MODULE_AUTHOR("Iwo Mergler");
432MODULE_LICENSE("GPL");
433