1/*
2 * Helpers for formatting and printing strings
3 *
4 * Copyright 31 August 2008 James Bottomley
5 * Copyright (C) 2013, Intel Corporation
6 */
7#include <linux/bug.h>
8#include <linux/kernel.h>
9#include <linux/math64.h>
10#include <linux/export.h>
11#include <linux/ctype.h>
12#include <linux/errno.h>
13#include <linux/string.h>
14#include <linux/string_helpers.h>
15
16/**
17 * string_get_size - get the size in the specified units
18 * @size:	The size to be converted in blocks
19 * @blk_size:	Size of the block (use 1 for size in bytes)
20 * @units:	units to use (powers of 1000 or 1024)
21 * @buf:	buffer to format to
22 * @len:	length of buffer
23 *
24 * This function returns a string formatted to 3 significant figures
25 * giving the size in the required units.  @buf should have room for
26 * at least 9 bytes and will always be zero terminated.
27 *
28 */
29void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
30		     char *buf, int len)
31{
32	static const char *const units_10[] = {
33		"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
34	};
35	static const char *const units_2[] = {
36		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
37	};
38	static const char *const *const units_str[] = {
39		[STRING_UNITS_10] = units_10,
40		[STRING_UNITS_2] = units_2,
41	};
42	static const unsigned int divisor[] = {
43		[STRING_UNITS_10] = 1000,
44		[STRING_UNITS_2] = 1024,
45	};
46	static const unsigned int rounding[] = { 500, 50, 5 };
47	int i = 0, j;
48	u32 remainder = 0, sf_cap;
49	char tmp[8];
50	const char *unit;
51
52	tmp[0] = '\0';
53
54	if (blk_size == 0)
55		size = 0;
56	if (size == 0)
57		goto out;
58
59	/* This is Napier's algorithm.  Reduce the original block size to
60	 *
61	 * coefficient * divisor[units]^i
62	 *
63	 * we do the reduction so both coefficients are just under 32 bits so
64	 * that multiplying them together won't overflow 64 bits and we keep
65	 * as much precision as possible in the numbers.
66	 *
67	 * Note: it's safe to throw away the remainders here because all the
68	 * precision is in the coefficients.
69	 */
70	while (blk_size >> 32) {
71		do_div(blk_size, divisor[units]);
72		i++;
73	}
74
75	while (size >> 32) {
76		do_div(size, divisor[units]);
77		i++;
78	}
79
80	/* now perform the actual multiplication keeping i as the sum of the
81	 * two logarithms */
82	size *= blk_size;
83
84	/* and logarithmically reduce it until it's just under the divisor */
85	while (size >= divisor[units]) {
86		remainder = do_div(size, divisor[units]);
87		i++;
88	}
89
90	/* work out in j how many digits of precision we need from the
91	 * remainder */
92	sf_cap = size;
93	for (j = 0; sf_cap*10 < 1000; j++)
94		sf_cap *= 10;
95
96	if (units == STRING_UNITS_2) {
97		/* express the remainder as a decimal.  It's currently the
98		 * numerator of a fraction whose denominator is
99		 * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
100		remainder *= 1000;
101		remainder >>= 10;
102	}
103
104	/* add a 5 to the digit below what will be printed to ensure
105	 * an arithmetical round up and carry it through to size */
106	remainder += rounding[j];
107	if (remainder >= 1000) {
108		remainder -= 1000;
109		size += 1;
110	}
111
112	if (j) {
113		snprintf(tmp, sizeof(tmp), ".%03u", remainder);
114		tmp[j+1] = '\0';
115	}
116
117 out:
118	if (i >= ARRAY_SIZE(units_2))
119		unit = "UNK";
120	else
121		unit = units_str[units][i];
122
123	snprintf(buf, len, "%u%s %s", (u32)size,
124		 tmp, unit);
125}
126EXPORT_SYMBOL(string_get_size);
127
128static bool unescape_space(char **src, char **dst)
129{
130	char *p = *dst, *q = *src;
131
132	switch (*q) {
133	case 'n':
134		*p = '\n';
135		break;
136	case 'r':
137		*p = '\r';
138		break;
139	case 't':
140		*p = '\t';
141		break;
142	case 'v':
143		*p = '\v';
144		break;
145	case 'f':
146		*p = '\f';
147		break;
148	default:
149		return false;
150	}
151	*dst += 1;
152	*src += 1;
153	return true;
154}
155
156static bool unescape_octal(char **src, char **dst)
157{
158	char *p = *dst, *q = *src;
159	u8 num;
160
161	if (isodigit(*q) == 0)
162		return false;
163
164	num = (*q++) & 7;
165	while (num < 32 && isodigit(*q) && (q - *src < 3)) {
166		num <<= 3;
167		num += (*q++) & 7;
168	}
169	*p = num;
170	*dst += 1;
171	*src = q;
172	return true;
173}
174
175static bool unescape_hex(char **src, char **dst)
176{
177	char *p = *dst, *q = *src;
178	int digit;
179	u8 num;
180
181	if (*q++ != 'x')
182		return false;
183
184	num = digit = hex_to_bin(*q++);
185	if (digit < 0)
186		return false;
187
188	digit = hex_to_bin(*q);
189	if (digit >= 0) {
190		q++;
191		num = (num << 4) | digit;
192	}
193	*p = num;
194	*dst += 1;
195	*src = q;
196	return true;
197}
198
199static bool unescape_special(char **src, char **dst)
200{
201	char *p = *dst, *q = *src;
202
203	switch (*q) {
204	case '\"':
205		*p = '\"';
206		break;
207	case '\\':
208		*p = '\\';
209		break;
210	case 'a':
211		*p = '\a';
212		break;
213	case 'e':
214		*p = '\e';
215		break;
216	default:
217		return false;
218	}
219	*dst += 1;
220	*src += 1;
221	return true;
222}
223
224/**
225 * string_unescape - unquote characters in the given string
226 * @src:	source buffer (escaped)
227 * @dst:	destination buffer (unescaped)
228 * @size:	size of the destination buffer (0 to unlimit)
229 * @flags:	combination of the flags (bitwise OR):
230 *	%UNESCAPE_SPACE:
231 *		'\f' - form feed
232 *		'\n' - new line
233 *		'\r' - carriage return
234 *		'\t' - horizontal tab
235 *		'\v' - vertical tab
236 *	%UNESCAPE_OCTAL:
237 *		'\NNN' - byte with octal value NNN (1 to 3 digits)
238 *	%UNESCAPE_HEX:
239 *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
240 *	%UNESCAPE_SPECIAL:
241 *		'\"' - double quote
242 *		'\\' - backslash
243 *		'\a' - alert (BEL)
244 *		'\e' - escape
245 *	%UNESCAPE_ANY:
246 *		all previous together
247 *
248 * Description:
249 * The function unquotes characters in the given string.
250 *
251 * Because the size of the output will be the same as or less than the size of
252 * the input, the transformation may be performed in place.
253 *
254 * Caller must provide valid source and destination pointers. Be aware that
255 * destination buffer will always be NULL-terminated. Source string must be
256 * NULL-terminated as well.
257 *
258 * Return:
259 * The amount of the characters processed to the destination buffer excluding
260 * trailing '\0' is returned.
261 */
262int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
263{
264	char *out = dst;
265
266	while (*src && --size) {
267		if (src[0] == '\\' && src[1] != '\0' && size > 1) {
268			src++;
269			size--;
270
271			if (flags & UNESCAPE_SPACE &&
272					unescape_space(&src, &out))
273				continue;
274
275			if (flags & UNESCAPE_OCTAL &&
276					unescape_octal(&src, &out))
277				continue;
278
279			if (flags & UNESCAPE_HEX &&
280					unescape_hex(&src, &out))
281				continue;
282
283			if (flags & UNESCAPE_SPECIAL &&
284					unescape_special(&src, &out))
285				continue;
286
287			*out++ = '\\';
288		}
289		*out++ = *src++;
290	}
291	*out = '\0';
292
293	return out - dst;
294}
295EXPORT_SYMBOL(string_unescape);
296
297static bool escape_passthrough(unsigned char c, char **dst, char *end)
298{
299	char *out = *dst;
300
301	if (out < end)
302		*out = c;
303	*dst = out + 1;
304	return true;
305}
306
307static bool escape_space(unsigned char c, char **dst, char *end)
308{
309	char *out = *dst;
310	unsigned char to;
311
312	switch (c) {
313	case '\n':
314		to = 'n';
315		break;
316	case '\r':
317		to = 'r';
318		break;
319	case '\t':
320		to = 't';
321		break;
322	case '\v':
323		to = 'v';
324		break;
325	case '\f':
326		to = 'f';
327		break;
328	default:
329		return false;
330	}
331
332	if (out < end)
333		*out = '\\';
334	++out;
335	if (out < end)
336		*out = to;
337	++out;
338
339	*dst = out;
340	return true;
341}
342
343static bool escape_special(unsigned char c, char **dst, char *end)
344{
345	char *out = *dst;
346	unsigned char to;
347
348	switch (c) {
349	case '\\':
350		to = '\\';
351		break;
352	case '\a':
353		to = 'a';
354		break;
355	case '\e':
356		to = 'e';
357		break;
358	default:
359		return false;
360	}
361
362	if (out < end)
363		*out = '\\';
364	++out;
365	if (out < end)
366		*out = to;
367	++out;
368
369	*dst = out;
370	return true;
371}
372
373static bool escape_null(unsigned char c, char **dst, char *end)
374{
375	char *out = *dst;
376
377	if (c)
378		return false;
379
380	if (out < end)
381		*out = '\\';
382	++out;
383	if (out < end)
384		*out = '0';
385	++out;
386
387	*dst = out;
388	return true;
389}
390
391static bool escape_octal(unsigned char c, char **dst, char *end)
392{
393	char *out = *dst;
394
395	if (out < end)
396		*out = '\\';
397	++out;
398	if (out < end)
399		*out = ((c >> 6) & 0x07) + '0';
400	++out;
401	if (out < end)
402		*out = ((c >> 3) & 0x07) + '0';
403	++out;
404	if (out < end)
405		*out = ((c >> 0) & 0x07) + '0';
406	++out;
407
408	*dst = out;
409	return true;
410}
411
412static bool escape_hex(unsigned char c, char **dst, char *end)
413{
414	char *out = *dst;
415
416	if (out < end)
417		*out = '\\';
418	++out;
419	if (out < end)
420		*out = 'x';
421	++out;
422	if (out < end)
423		*out = hex_asc_hi(c);
424	++out;
425	if (out < end)
426		*out = hex_asc_lo(c);
427	++out;
428
429	*dst = out;
430	return true;
431}
432
433/**
434 * string_escape_mem - quote characters in the given memory buffer
435 * @src:	source buffer (unescaped)
436 * @isz:	source buffer size
437 * @dst:	destination buffer (escaped)
438 * @osz:	destination buffer size
439 * @flags:	combination of the flags (bitwise OR):
440 *	%ESCAPE_SPACE:
441 *		'\f' - form feed
442 *		'\n' - new line
443 *		'\r' - carriage return
444 *		'\t' - horizontal tab
445 *		'\v' - vertical tab
446 *	%ESCAPE_SPECIAL:
447 *		'\\' - backslash
448 *		'\a' - alert (BEL)
449 *		'\e' - escape
450 *	%ESCAPE_NULL:
451 *		'\0' - null
452 *	%ESCAPE_OCTAL:
453 *		'\NNN' - byte with octal value NNN (3 digits)
454 *	%ESCAPE_ANY:
455 *		all previous together
456 *	%ESCAPE_NP:
457 *		escape only non-printable characters (checked by isprint)
458 *	%ESCAPE_ANY_NP:
459 *		all previous together
460 *	%ESCAPE_HEX:
461 *		'\xHH' - byte with hexadecimal value HH (2 digits)
462 * @esc:	NULL-terminated string of characters any of which, if found in
463 *		the source, has to be escaped
464 *
465 * Description:
466 * The process of escaping byte buffer includes several parts. They are applied
467 * in the following sequence.
468 *	1. The character is matched to the printable class, if asked, and in
469 *	   case of match it passes through to the output.
470 *	2. The character is not matched to the one from @esc string and thus
471 *	   must go as is to the output.
472 *	3. The character is checked if it falls into the class given by @flags.
473 *	   %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
474 *	   character. Note that they actually can't go together, otherwise
475 *	   %ESCAPE_HEX will be ignored.
476 *
477 * Caller must provide valid source and destination pointers. Be aware that
478 * destination buffer will not be NULL-terminated, thus caller have to append
479 * it if needs.
480 *
481 * Return:
482 * The total size of the escaped output that would be generated for
483 * the given input and flags. To check whether the output was
484 * truncated, compare the return value to osz. There is room left in
485 * dst for a '\0' terminator if and only if ret < osz.
486 */
487int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
488		      unsigned int flags, const char *esc)
489{
490	char *p = dst;
491	char *end = p + osz;
492	bool is_dict = esc && *esc;
493
494	while (isz--) {
495		unsigned char c = *src++;
496
497		/*
498		 * Apply rules in the following sequence:
499		 *	- the character is printable, when @flags has
500		 *	  %ESCAPE_NP bit set
501		 *	- the @esc string is supplied and does not contain a
502		 *	  character under question
503		 *	- the character doesn't fall into a class of symbols
504		 *	  defined by given @flags
505		 * In these cases we just pass through a character to the
506		 * output buffer.
507		 */
508		if ((flags & ESCAPE_NP && isprint(c)) ||
509		    (is_dict && !strchr(esc, c))) {
510			/* do nothing */
511		} else {
512			if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
513				continue;
514
515			if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
516				continue;
517
518			if (flags & ESCAPE_NULL && escape_null(c, &p, end))
519				continue;
520
521			/* ESCAPE_OCTAL and ESCAPE_HEX always go last */
522			if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
523				continue;
524
525			if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
526				continue;
527		}
528
529		escape_passthrough(c, &p, end);
530	}
531
532	return p - dst;
533}
534EXPORT_SYMBOL(string_escape_mem);
535