1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
4 * All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it would be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write the Free Software Foundation,
17 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19#include "xfs.h"
20#include "xfs_fs.h"
21#include "xfs_shared.h"
22#include "xfs_format.h"
23#include "xfs_log_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_bit.h"
26#include "xfs_mount.h"
27#include "xfs_da_format.h"
28#include "xfs_da_btree.h"
29#include "xfs_inode.h"
30#include "xfs_alloc.h"
31#include "xfs_trans.h"
32#include "xfs_inode_item.h"
33#include "xfs_bmap.h"
34#include "xfs_bmap_util.h"
35#include "xfs_attr.h"
36#include "xfs_attr_leaf.h"
37#include "xfs_attr_remote.h"
38#include "xfs_trans_space.h"
39#include "xfs_trace.h"
40#include "xfs_cksum.h"
41#include "xfs_buf_item.h"
42#include "xfs_error.h"
43
44#define ATTR_RMTVALUE_MAPSIZE	1	/* # of map entries at once */
45
46/*
47 * Each contiguous block has a header, so it is not just a simple attribute
48 * length to FSB conversion.
49 */
50int
51xfs_attr3_rmt_blocks(
52	struct xfs_mount *mp,
53	int		attrlen)
54{
55	if (xfs_sb_version_hascrc(&mp->m_sb)) {
56		int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
57		return (attrlen + buflen - 1) / buflen;
58	}
59	return XFS_B_TO_FSB(mp, attrlen);
60}
61
62/*
63 * Checking of the remote attribute header is split into two parts. The verifier
64 * does CRC, location and bounds checking, the unpacking function checks the
65 * attribute parameters and owner.
66 */
67static bool
68xfs_attr3_rmt_hdr_ok(
69	void			*ptr,
70	xfs_ino_t		ino,
71	uint32_t		offset,
72	uint32_t		size,
73	xfs_daddr_t		bno)
74{
75	struct xfs_attr3_rmt_hdr *rmt = ptr;
76
77	if (bno != be64_to_cpu(rmt->rm_blkno))
78		return false;
79	if (offset != be32_to_cpu(rmt->rm_offset))
80		return false;
81	if (size != be32_to_cpu(rmt->rm_bytes))
82		return false;
83	if (ino != be64_to_cpu(rmt->rm_owner))
84		return false;
85
86	/* ok */
87	return true;
88}
89
90static bool
91xfs_attr3_rmt_verify(
92	struct xfs_mount	*mp,
93	void			*ptr,
94	int			fsbsize,
95	xfs_daddr_t		bno)
96{
97	struct xfs_attr3_rmt_hdr *rmt = ptr;
98
99	if (!xfs_sb_version_hascrc(&mp->m_sb))
100		return false;
101	if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
102		return false;
103	if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
104		return false;
105	if (be64_to_cpu(rmt->rm_blkno) != bno)
106		return false;
107	if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
108		return false;
109	if (be32_to_cpu(rmt->rm_offset) +
110				be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX)
111		return false;
112	if (rmt->rm_owner == 0)
113		return false;
114
115	return true;
116}
117
118static void
119xfs_attr3_rmt_read_verify(
120	struct xfs_buf	*bp)
121{
122	struct xfs_mount *mp = bp->b_target->bt_mount;
123	char		*ptr;
124	int		len;
125	xfs_daddr_t	bno;
126	int		blksize = mp->m_attr_geo->blksize;
127
128	/* no verification of non-crc buffers */
129	if (!xfs_sb_version_hascrc(&mp->m_sb))
130		return;
131
132	ptr = bp->b_addr;
133	bno = bp->b_bn;
134	len = BBTOB(bp->b_length);
135	ASSERT(len >= blksize);
136
137	while (len > 0) {
138		if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
139			xfs_buf_ioerror(bp, -EFSBADCRC);
140			break;
141		}
142		if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
143			xfs_buf_ioerror(bp, -EFSCORRUPTED);
144			break;
145		}
146		len -= blksize;
147		ptr += blksize;
148		bno += BTOBB(blksize);
149	}
150
151	if (bp->b_error)
152		xfs_verifier_error(bp);
153	else
154		ASSERT(len == 0);
155}
156
157static void
158xfs_attr3_rmt_write_verify(
159	struct xfs_buf	*bp)
160{
161	struct xfs_mount *mp = bp->b_target->bt_mount;
162	int		blksize = mp->m_attr_geo->blksize;
163	char		*ptr;
164	int		len;
165	xfs_daddr_t	bno;
166
167	/* no verification of non-crc buffers */
168	if (!xfs_sb_version_hascrc(&mp->m_sb))
169		return;
170
171	ptr = bp->b_addr;
172	bno = bp->b_bn;
173	len = BBTOB(bp->b_length);
174	ASSERT(len >= blksize);
175
176	while (len > 0) {
177		struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
178
179		if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
180			xfs_buf_ioerror(bp, -EFSCORRUPTED);
181			xfs_verifier_error(bp);
182			return;
183		}
184
185		/*
186		 * Ensure we aren't writing bogus LSNs to disk. See
187		 * xfs_attr3_rmt_hdr_set() for the explanation.
188		 */
189		if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
190			xfs_buf_ioerror(bp, -EFSCORRUPTED);
191			xfs_verifier_error(bp);
192			return;
193		}
194		xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
195
196		len -= blksize;
197		ptr += blksize;
198		bno += BTOBB(blksize);
199	}
200	ASSERT(len == 0);
201}
202
203const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
204	.verify_read = xfs_attr3_rmt_read_verify,
205	.verify_write = xfs_attr3_rmt_write_verify,
206};
207
208STATIC int
209xfs_attr3_rmt_hdr_set(
210	struct xfs_mount	*mp,
211	void			*ptr,
212	xfs_ino_t		ino,
213	uint32_t		offset,
214	uint32_t		size,
215	xfs_daddr_t		bno)
216{
217	struct xfs_attr3_rmt_hdr *rmt = ptr;
218
219	if (!xfs_sb_version_hascrc(&mp->m_sb))
220		return 0;
221
222	rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
223	rmt->rm_offset = cpu_to_be32(offset);
224	rmt->rm_bytes = cpu_to_be32(size);
225	uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
226	rmt->rm_owner = cpu_to_be64(ino);
227	rmt->rm_blkno = cpu_to_be64(bno);
228
229	/*
230	 * Remote attribute blocks are written synchronously, so we don't
231	 * have an LSN that we can stamp in them that makes any sense to log
232	 * recovery. To ensure that log recovery handles overwrites of these
233	 * blocks sanely (i.e. once they've been freed and reallocated as some
234	 * other type of metadata) we need to ensure that the LSN has a value
235	 * that tells log recovery to ignore the LSN and overwrite the buffer
236	 * with whatever is in it's log. To do this, we use the magic
237	 * NULLCOMMITLSN to indicate that the LSN is invalid.
238	 */
239	rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN);
240
241	return sizeof(struct xfs_attr3_rmt_hdr);
242}
243
244/*
245 * Helper functions to copy attribute data in and out of the one disk extents
246 */
247STATIC int
248xfs_attr_rmtval_copyout(
249	struct xfs_mount *mp,
250	struct xfs_buf	*bp,
251	xfs_ino_t	ino,
252	int		*offset,
253	int		*valuelen,
254	__uint8_t	**dst)
255{
256	char		*src = bp->b_addr;
257	xfs_daddr_t	bno = bp->b_bn;
258	int		len = BBTOB(bp->b_length);
259	int		blksize = mp->m_attr_geo->blksize;
260
261	ASSERT(len >= blksize);
262
263	while (len > 0 && *valuelen > 0) {
264		int hdr_size = 0;
265		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
266
267		byte_cnt = min(*valuelen, byte_cnt);
268
269		if (xfs_sb_version_hascrc(&mp->m_sb)) {
270			if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset,
271						  byte_cnt, bno)) {
272				xfs_alert(mp,
273"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
274					bno, *offset, byte_cnt, ino);
275				return -EFSCORRUPTED;
276			}
277			hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
278		}
279
280		memcpy(*dst, src + hdr_size, byte_cnt);
281
282		/* roll buffer forwards */
283		len -= blksize;
284		src += blksize;
285		bno += BTOBB(blksize);
286
287		/* roll attribute data forwards */
288		*valuelen -= byte_cnt;
289		*dst += byte_cnt;
290		*offset += byte_cnt;
291	}
292	return 0;
293}
294
295STATIC void
296xfs_attr_rmtval_copyin(
297	struct xfs_mount *mp,
298	struct xfs_buf	*bp,
299	xfs_ino_t	ino,
300	int		*offset,
301	int		*valuelen,
302	__uint8_t	**src)
303{
304	char		*dst = bp->b_addr;
305	xfs_daddr_t	bno = bp->b_bn;
306	int		len = BBTOB(bp->b_length);
307	int		blksize = mp->m_attr_geo->blksize;
308
309	ASSERT(len >= blksize);
310
311	while (len > 0 && *valuelen > 0) {
312		int hdr_size;
313		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
314
315		byte_cnt = min(*valuelen, byte_cnt);
316		hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
317						 byte_cnt, bno);
318
319		memcpy(dst + hdr_size, *src, byte_cnt);
320
321		/*
322		 * If this is the last block, zero the remainder of it.
323		 * Check that we are actually the last block, too.
324		 */
325		if (byte_cnt + hdr_size < blksize) {
326			ASSERT(*valuelen - byte_cnt == 0);
327			ASSERT(len == blksize);
328			memset(dst + hdr_size + byte_cnt, 0,
329					blksize - hdr_size - byte_cnt);
330		}
331
332		/* roll buffer forwards */
333		len -= blksize;
334		dst += blksize;
335		bno += BTOBB(blksize);
336
337		/* roll attribute data forwards */
338		*valuelen -= byte_cnt;
339		*src += byte_cnt;
340		*offset += byte_cnt;
341	}
342}
343
344/*
345 * Read the value associated with an attribute from the out-of-line buffer
346 * that we stored it in.
347 */
348int
349xfs_attr_rmtval_get(
350	struct xfs_da_args	*args)
351{
352	struct xfs_bmbt_irec	map[ATTR_RMTVALUE_MAPSIZE];
353	struct xfs_mount	*mp = args->dp->i_mount;
354	struct xfs_buf		*bp;
355	xfs_dablk_t		lblkno = args->rmtblkno;
356	__uint8_t		*dst = args->value;
357	int			valuelen;
358	int			nmap;
359	int			error;
360	int			blkcnt = args->rmtblkcnt;
361	int			i;
362	int			offset = 0;
363
364	trace_xfs_attr_rmtval_get(args);
365
366	ASSERT(!(args->flags & ATTR_KERNOVAL));
367	ASSERT(args->rmtvaluelen == args->valuelen);
368
369	valuelen = args->rmtvaluelen;
370	while (valuelen > 0) {
371		nmap = ATTR_RMTVALUE_MAPSIZE;
372		error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
373				       blkcnt, map, &nmap,
374				       XFS_BMAPI_ATTRFORK);
375		if (error)
376			return error;
377		ASSERT(nmap >= 1);
378
379		for (i = 0; (i < nmap) && (valuelen > 0); i++) {
380			xfs_daddr_t	dblkno;
381			int		dblkcnt;
382
383			ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
384			       (map[i].br_startblock != HOLESTARTBLOCK));
385			dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
386			dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
387			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
388						   dblkno, dblkcnt, 0, &bp,
389						   &xfs_attr3_rmt_buf_ops);
390			if (error)
391				return error;
392
393			error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
394							&offset, &valuelen,
395							&dst);
396			xfs_buf_relse(bp);
397			if (error)
398				return error;
399
400			/* roll attribute extent map forwards */
401			lblkno += map[i].br_blockcount;
402			blkcnt -= map[i].br_blockcount;
403		}
404	}
405	ASSERT(valuelen == 0);
406	return 0;
407}
408
409/*
410 * Write the value associated with an attribute into the out-of-line buffer
411 * that we have defined for it.
412 */
413int
414xfs_attr_rmtval_set(
415	struct xfs_da_args	*args)
416{
417	struct xfs_inode	*dp = args->dp;
418	struct xfs_mount	*mp = dp->i_mount;
419	struct xfs_bmbt_irec	map;
420	xfs_dablk_t		lblkno;
421	xfs_fileoff_t		lfileoff = 0;
422	__uint8_t		*src = args->value;
423	int			blkcnt;
424	int			valuelen;
425	int			nmap;
426	int			error;
427	int			offset = 0;
428
429	trace_xfs_attr_rmtval_set(args);
430
431	/*
432	 * Find a "hole" in the attribute address space large enough for
433	 * us to drop the new attribute's value into. Because CRC enable
434	 * attributes have headers, we can't just do a straight byte to FSB
435	 * conversion and have to take the header space into account.
436	 */
437	blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
438	error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
439						   XFS_ATTR_FORK);
440	if (error)
441		return error;
442
443	args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
444	args->rmtblkcnt = blkcnt;
445
446	/*
447	 * Roll through the "value", allocating blocks on disk as required.
448	 */
449	while (blkcnt > 0) {
450		int	committed;
451
452		/*
453		 * Allocate a single extent, up to the size of the value.
454		 *
455		 * Note that we have to consider this a data allocation as we
456		 * write the remote attribute without logging the contents.
457		 * Hence we must ensure that we aren't using blocks that are on
458		 * the busy list so that we don't overwrite blocks which have
459		 * recently been freed but their transactions are not yet
460		 * committed to disk. If we overwrite the contents of a busy
461		 * extent and then crash then the block may not contain the
462		 * correct metadata after log recovery occurs.
463		 */
464		xfs_bmap_init(args->flist, args->firstblock);
465		nmap = 1;
466		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
467				  blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
468				  args->total, &map, &nmap, args->flist);
469		if (!error) {
470			error = xfs_bmap_finish(&args->trans, args->flist,
471						&committed);
472		}
473		if (error) {
474			ASSERT(committed);
475			args->trans = NULL;
476			xfs_bmap_cancel(args->flist);
477			return error;
478		}
479
480		/*
481		 * bmap_finish() may have committed the last trans and started
482		 * a new one.  We need the inode to be in all transactions.
483		 */
484		if (committed)
485			xfs_trans_ijoin(args->trans, dp, 0);
486
487		ASSERT(nmap == 1);
488		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
489		       (map.br_startblock != HOLESTARTBLOCK));
490		lblkno += map.br_blockcount;
491		blkcnt -= map.br_blockcount;
492
493		/*
494		 * Start the next trans in the chain.
495		 */
496		error = xfs_trans_roll(&args->trans, dp);
497		if (error)
498			return error;
499	}
500
501	/*
502	 * Roll through the "value", copying the attribute value to the
503	 * already-allocated blocks.  Blocks are written synchronously
504	 * so that we can know they are all on disk before we turn off
505	 * the INCOMPLETE flag.
506	 */
507	lblkno = args->rmtblkno;
508	blkcnt = args->rmtblkcnt;
509	valuelen = args->rmtvaluelen;
510	while (valuelen > 0) {
511		struct xfs_buf	*bp;
512		xfs_daddr_t	dblkno;
513		int		dblkcnt;
514
515		ASSERT(blkcnt > 0);
516
517		xfs_bmap_init(args->flist, args->firstblock);
518		nmap = 1;
519		error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
520				       blkcnt, &map, &nmap,
521				       XFS_BMAPI_ATTRFORK);
522		if (error)
523			return error;
524		ASSERT(nmap == 1);
525		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
526		       (map.br_startblock != HOLESTARTBLOCK));
527
528		dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
529		dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
530
531		bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
532		if (!bp)
533			return -ENOMEM;
534		bp->b_ops = &xfs_attr3_rmt_buf_ops;
535
536		xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
537				       &valuelen, &src);
538
539		error = xfs_bwrite(bp);	/* GROT: NOTE: synchronous write */
540		xfs_buf_relse(bp);
541		if (error)
542			return error;
543
544
545		/* roll attribute extent map forwards */
546		lblkno += map.br_blockcount;
547		blkcnt -= map.br_blockcount;
548	}
549	ASSERT(valuelen == 0);
550	return 0;
551}
552
553/*
554 * Remove the value associated with an attribute by deleting the
555 * out-of-line buffer that it is stored on.
556 */
557int
558xfs_attr_rmtval_remove(
559	struct xfs_da_args	*args)
560{
561	struct xfs_mount	*mp = args->dp->i_mount;
562	xfs_dablk_t		lblkno;
563	int			blkcnt;
564	int			error;
565	int			done;
566
567	trace_xfs_attr_rmtval_remove(args);
568
569	/*
570	 * Roll through the "value", invalidating the attribute value's blocks.
571	 */
572	lblkno = args->rmtblkno;
573	blkcnt = args->rmtblkcnt;
574	while (blkcnt > 0) {
575		struct xfs_bmbt_irec	map;
576		struct xfs_buf		*bp;
577		xfs_daddr_t		dblkno;
578		int			dblkcnt;
579		int			nmap;
580
581		/*
582		 * Try to remember where we decided to put the value.
583		 */
584		nmap = 1;
585		error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
586				       blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
587		if (error)
588			return error;
589		ASSERT(nmap == 1);
590		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
591		       (map.br_startblock != HOLESTARTBLOCK));
592
593		dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
594		dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
595
596		/*
597		 * If the "remote" value is in the cache, remove it.
598		 */
599		bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
600		if (bp) {
601			xfs_buf_stale(bp);
602			xfs_buf_relse(bp);
603			bp = NULL;
604		}
605
606		lblkno += map.br_blockcount;
607		blkcnt -= map.br_blockcount;
608	}
609
610	/*
611	 * Keep de-allocating extents until the remote-value region is gone.
612	 */
613	lblkno = args->rmtblkno;
614	blkcnt = args->rmtblkcnt;
615	done = 0;
616	while (!done) {
617		int committed;
618
619		xfs_bmap_init(args->flist, args->firstblock);
620		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
621				    XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
622				    1, args->firstblock, args->flist,
623				    &done);
624		if (!error) {
625			error = xfs_bmap_finish(&args->trans, args->flist,
626						&committed);
627		}
628		if (error) {
629			ASSERT(committed);
630			args->trans = NULL;
631			xfs_bmap_cancel(args->flist);
632			return error;
633		}
634
635		/*
636		 * bmap_finish() may have committed the last trans and started
637		 * a new one.  We need the inode to be in all transactions.
638		 */
639		if (committed)
640			xfs_trans_ijoin(args->trans, args->dp, 0);
641
642		/*
643		 * Close out trans and start the next one in the chain.
644		 */
645		error = xfs_trans_roll(&args->trans, args->dp);
646		if (error)
647			return error;
648	}
649	return 0;
650}
651