1/*
2 * linux/fs/ext4/xattr.c
3 *
4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5 *
6 * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7 * Ext4 code with a lot of help from Eric Jarman <ejarman@acm.org>.
8 * Extended attributes for symlinks and special files added per
9 *  suggestion of Luka Renko <luka.renko@hermes.si>.
10 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
11 *  Red Hat Inc.
12 * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
13 *  and Andreas Gruenbacher <agruen@suse.de>.
14 */
15
16/*
17 * Extended attributes are stored directly in inodes (on file systems with
18 * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
19 * field contains the block number if an inode uses an additional block. All
20 * attributes must fit in the inode and one additional block. Blocks that
21 * contain the identical set of attributes may be shared among several inodes.
22 * Identical blocks are detected by keeping a cache of blocks that have
23 * recently been accessed.
24 *
25 * The attributes in inodes and on blocks have a different header; the entries
26 * are stored in the same format:
27 *
28 *   +------------------+
29 *   | header           |
30 *   | entry 1          | |
31 *   | entry 2          | | growing downwards
32 *   | entry 3          | v
33 *   | four null bytes  |
34 *   | . . .            |
35 *   | value 1          | ^
36 *   | value 3          | | growing upwards
37 *   | value 2          | |
38 *   +------------------+
39 *
40 * The header is followed by multiple entry descriptors. In disk blocks, the
41 * entry descriptors are kept sorted. In inodes, they are unsorted. The
42 * attribute values are aligned to the end of the block in no specific order.
43 *
44 * Locking strategy
45 * ----------------
46 * EXT4_I(inode)->i_file_acl is protected by EXT4_I(inode)->xattr_sem.
47 * EA blocks are only changed if they are exclusive to an inode, so
48 * holding xattr_sem also means that nothing but the EA block's reference
49 * count can change. Multiple writers to the same block are synchronized
50 * by the buffer lock.
51 */
52
53#include <linux/init.h>
54#include <linux/fs.h>
55#include <linux/slab.h>
56#include <linux/mbcache.h>
57#include <linux/quotaops.h>
58#include "ext4_jbd2.h"
59#include "ext4.h"
60#include "xattr.h"
61#include "acl.h"
62
63#ifdef EXT4_XATTR_DEBUG
64# define ea_idebug(inode, f...) do { \
65		printk(KERN_DEBUG "inode %s:%lu: ", \
66			inode->i_sb->s_id, inode->i_ino); \
67		printk(f); \
68		printk("\n"); \
69	} while (0)
70# define ea_bdebug(bh, f...) do { \
71		char b[BDEVNAME_SIZE]; \
72		printk(KERN_DEBUG "block %s:%lu: ", \
73			bdevname(bh->b_bdev, b), \
74			(unsigned long) bh->b_blocknr); \
75		printk(f); \
76		printk("\n"); \
77	} while (0)
78#else
79# define ea_idebug(inode, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
80# define ea_bdebug(bh, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
81#endif
82
83static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
84static struct buffer_head *ext4_xattr_cache_find(struct inode *,
85						 struct ext4_xattr_header *,
86						 struct mb_cache_entry **);
87static void ext4_xattr_rehash(struct ext4_xattr_header *,
88			      struct ext4_xattr_entry *);
89static int ext4_xattr_list(struct dentry *dentry, char *buffer,
90			   size_t buffer_size);
91
92static const struct xattr_handler *ext4_xattr_handler_map[] = {
93	[EXT4_XATTR_INDEX_USER]		     = &ext4_xattr_user_handler,
94#ifdef CONFIG_EXT4_FS_POSIX_ACL
95	[EXT4_XATTR_INDEX_POSIX_ACL_ACCESS]  = &posix_acl_access_xattr_handler,
96	[EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,
97#endif
98	[EXT4_XATTR_INDEX_TRUSTED]	     = &ext4_xattr_trusted_handler,
99#ifdef CONFIG_EXT4_FS_SECURITY
100	[EXT4_XATTR_INDEX_SECURITY]	     = &ext4_xattr_security_handler,
101#endif
102};
103
104const struct xattr_handler *ext4_xattr_handlers[] = {
105	&ext4_xattr_user_handler,
106	&ext4_xattr_trusted_handler,
107#ifdef CONFIG_EXT4_FS_POSIX_ACL
108	&posix_acl_access_xattr_handler,
109	&posix_acl_default_xattr_handler,
110#endif
111#ifdef CONFIG_EXT4_FS_SECURITY
112	&ext4_xattr_security_handler,
113#endif
114	NULL
115};
116
117#define EXT4_GET_MB_CACHE(inode)	(((struct ext4_sb_info *) \
118				inode->i_sb->s_fs_info)->s_mb_cache)
119
120static __le32 ext4_xattr_block_csum(struct inode *inode,
121				    sector_t block_nr,
122				    struct ext4_xattr_header *hdr)
123{
124	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
125	__u32 csum;
126	__le32 save_csum;
127	__le64 dsk_block_nr = cpu_to_le64(block_nr);
128
129	save_csum = hdr->h_checksum;
130	hdr->h_checksum = 0;
131	csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&dsk_block_nr,
132			   sizeof(dsk_block_nr));
133	csum = ext4_chksum(sbi, csum, (__u8 *)hdr,
134			   EXT4_BLOCK_SIZE(inode->i_sb));
135
136	hdr->h_checksum = save_csum;
137	return cpu_to_le32(csum);
138}
139
140static int ext4_xattr_block_csum_verify(struct inode *inode,
141					sector_t block_nr,
142					struct ext4_xattr_header *hdr)
143{
144	if (ext4_has_metadata_csum(inode->i_sb) &&
145	    (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
146		return 0;
147	return 1;
148}
149
150static void ext4_xattr_block_csum_set(struct inode *inode,
151				      sector_t block_nr,
152				      struct ext4_xattr_header *hdr)
153{
154	if (!ext4_has_metadata_csum(inode->i_sb))
155		return;
156
157	hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
158}
159
160static inline int ext4_handle_dirty_xattr_block(handle_t *handle,
161						struct inode *inode,
162						struct buffer_head *bh)
163{
164	ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh));
165	return ext4_handle_dirty_metadata(handle, inode, bh);
166}
167
168static inline const struct xattr_handler *
169ext4_xattr_handler(int name_index)
170{
171	const struct xattr_handler *handler = NULL;
172
173	if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map))
174		handler = ext4_xattr_handler_map[name_index];
175	return handler;
176}
177
178/*
179 * Inode operation listxattr()
180 *
181 * d_inode(dentry)->i_mutex: don't care
182 */
183ssize_t
184ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
185{
186	return ext4_xattr_list(dentry, buffer, size);
187}
188
189static int
190ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
191		       void *value_start)
192{
193	struct ext4_xattr_entry *e = entry;
194
195	while (!IS_LAST_ENTRY(e)) {
196		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
197		if ((void *)next >= end)
198			return -EIO;
199		e = next;
200	}
201
202	while (!IS_LAST_ENTRY(entry)) {
203		if (entry->e_value_size != 0 &&
204		    (value_start + le16_to_cpu(entry->e_value_offs) <
205		     (void *)e + sizeof(__u32) ||
206		     value_start + le16_to_cpu(entry->e_value_offs) +
207		    le32_to_cpu(entry->e_value_size) > end))
208			return -EIO;
209		entry = EXT4_XATTR_NEXT(entry);
210	}
211
212	return 0;
213}
214
215static inline int
216ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
217{
218	int error;
219
220	if (buffer_verified(bh))
221		return 0;
222
223	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
224	    BHDR(bh)->h_blocks != cpu_to_le32(1))
225		return -EIO;
226	if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
227		return -EIO;
228	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
229				       bh->b_data);
230	if (!error)
231		set_buffer_verified(bh);
232	return error;
233}
234
235static inline int
236ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
237{
238	size_t value_size = le32_to_cpu(entry->e_value_size);
239
240	if (entry->e_value_block != 0 || value_size > size ||
241	    le16_to_cpu(entry->e_value_offs) + value_size > size)
242		return -EIO;
243	return 0;
244}
245
246static int
247ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
248		      const char *name, size_t size, int sorted)
249{
250	struct ext4_xattr_entry *entry;
251	size_t name_len;
252	int cmp = 1;
253
254	if (name == NULL)
255		return -EINVAL;
256	name_len = strlen(name);
257	entry = *pentry;
258	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
259		cmp = name_index - entry->e_name_index;
260		if (!cmp)
261			cmp = name_len - entry->e_name_len;
262		if (!cmp)
263			cmp = memcmp(name, entry->e_name, name_len);
264		if (cmp <= 0 && (sorted || cmp == 0))
265			break;
266	}
267	*pentry = entry;
268	if (!cmp && ext4_xattr_check_entry(entry, size))
269			return -EIO;
270	return cmp ? -ENODATA : 0;
271}
272
273static int
274ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
275		     void *buffer, size_t buffer_size)
276{
277	struct buffer_head *bh = NULL;
278	struct ext4_xattr_entry *entry;
279	size_t size;
280	int error;
281	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
282
283	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
284		  name_index, name, buffer, (long)buffer_size);
285
286	error = -ENODATA;
287	if (!EXT4_I(inode)->i_file_acl)
288		goto cleanup;
289	ea_idebug(inode, "reading block %llu",
290		  (unsigned long long)EXT4_I(inode)->i_file_acl);
291	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
292	if (!bh)
293		goto cleanup;
294	ea_bdebug(bh, "b_count=%d, refcount=%d",
295		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
296	if (ext4_xattr_check_block(inode, bh)) {
297bad_block:
298		EXT4_ERROR_INODE(inode, "bad block %llu",
299				 EXT4_I(inode)->i_file_acl);
300		error = -EIO;
301		goto cleanup;
302	}
303	ext4_xattr_cache_insert(ext4_mb_cache, bh);
304	entry = BFIRST(bh);
305	error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
306	if (error == -EIO)
307		goto bad_block;
308	if (error)
309		goto cleanup;
310	size = le32_to_cpu(entry->e_value_size);
311	if (buffer) {
312		error = -ERANGE;
313		if (size > buffer_size)
314			goto cleanup;
315		memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
316		       size);
317	}
318	error = size;
319
320cleanup:
321	brelse(bh);
322	return error;
323}
324
325int
326ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
327		     void *buffer, size_t buffer_size)
328{
329	struct ext4_xattr_ibody_header *header;
330	struct ext4_xattr_entry *entry;
331	struct ext4_inode *raw_inode;
332	struct ext4_iloc iloc;
333	size_t size;
334	void *end;
335	int error;
336
337	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
338		return -ENODATA;
339	error = ext4_get_inode_loc(inode, &iloc);
340	if (error)
341		return error;
342	raw_inode = ext4_raw_inode(&iloc);
343	header = IHDR(inode, raw_inode);
344	entry = IFIRST(header);
345	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
346	error = ext4_xattr_check_names(entry, end, entry);
347	if (error)
348		goto cleanup;
349	error = ext4_xattr_find_entry(&entry, name_index, name,
350				      end - (void *)entry, 0);
351	if (error)
352		goto cleanup;
353	size = le32_to_cpu(entry->e_value_size);
354	if (buffer) {
355		error = -ERANGE;
356		if (size > buffer_size)
357			goto cleanup;
358		memcpy(buffer, (void *)IFIRST(header) +
359		       le16_to_cpu(entry->e_value_offs), size);
360	}
361	error = size;
362
363cleanup:
364	brelse(iloc.bh);
365	return error;
366}
367
368/*
369 * ext4_xattr_get()
370 *
371 * Copy an extended attribute into the buffer
372 * provided, or compute the buffer size required.
373 * Buffer is NULL to compute the size of the buffer required.
374 *
375 * Returns a negative error number on failure, or the number of bytes
376 * used / required on success.
377 */
378int
379ext4_xattr_get(struct inode *inode, int name_index, const char *name,
380	       void *buffer, size_t buffer_size)
381{
382	int error;
383
384	if (strlen(name) > 255)
385		return -ERANGE;
386
387	down_read(&EXT4_I(inode)->xattr_sem);
388	error = ext4_xattr_ibody_get(inode, name_index, name, buffer,
389				     buffer_size);
390	if (error == -ENODATA)
391		error = ext4_xattr_block_get(inode, name_index, name, buffer,
392					     buffer_size);
393	up_read(&EXT4_I(inode)->xattr_sem);
394	return error;
395}
396
397static int
398ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
399			char *buffer, size_t buffer_size)
400{
401	size_t rest = buffer_size;
402
403	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
404		const struct xattr_handler *handler =
405			ext4_xattr_handler(entry->e_name_index);
406
407		if (handler) {
408			size_t size = handler->list(dentry, buffer, rest,
409						    entry->e_name,
410						    entry->e_name_len,
411						    handler->flags);
412			if (buffer) {
413				if (size > rest)
414					return -ERANGE;
415				buffer += size;
416			}
417			rest -= size;
418		}
419	}
420	return buffer_size - rest;
421}
422
423static int
424ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
425{
426	struct inode *inode = d_inode(dentry);
427	struct buffer_head *bh = NULL;
428	int error;
429	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
430
431	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
432		  buffer, (long)buffer_size);
433
434	error = 0;
435	if (!EXT4_I(inode)->i_file_acl)
436		goto cleanup;
437	ea_idebug(inode, "reading block %llu",
438		  (unsigned long long)EXT4_I(inode)->i_file_acl);
439	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
440	error = -EIO;
441	if (!bh)
442		goto cleanup;
443	ea_bdebug(bh, "b_count=%d, refcount=%d",
444		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
445	if (ext4_xattr_check_block(inode, bh)) {
446		EXT4_ERROR_INODE(inode, "bad block %llu",
447				 EXT4_I(inode)->i_file_acl);
448		error = -EIO;
449		goto cleanup;
450	}
451	ext4_xattr_cache_insert(ext4_mb_cache, bh);
452	error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
453
454cleanup:
455	brelse(bh);
456
457	return error;
458}
459
460static int
461ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
462{
463	struct inode *inode = d_inode(dentry);
464	struct ext4_xattr_ibody_header *header;
465	struct ext4_inode *raw_inode;
466	struct ext4_iloc iloc;
467	void *end;
468	int error;
469
470	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
471		return 0;
472	error = ext4_get_inode_loc(inode, &iloc);
473	if (error)
474		return error;
475	raw_inode = ext4_raw_inode(&iloc);
476	header = IHDR(inode, raw_inode);
477	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
478	error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
479	if (error)
480		goto cleanup;
481	error = ext4_xattr_list_entries(dentry, IFIRST(header),
482					buffer, buffer_size);
483
484cleanup:
485	brelse(iloc.bh);
486	return error;
487}
488
489/*
490 * ext4_xattr_list()
491 *
492 * Copy a list of attribute names into the buffer
493 * provided, or compute the buffer size required.
494 * Buffer is NULL to compute the size of the buffer required.
495 *
496 * Returns a negative error number on failure, or the number of bytes
497 * used / required on success.
498 */
499static int
500ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
501{
502	int ret, ret2;
503
504	down_read(&EXT4_I(d_inode(dentry))->xattr_sem);
505	ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
506	if (ret < 0)
507		goto errout;
508	if (buffer) {
509		buffer += ret;
510		buffer_size -= ret;
511	}
512	ret = ext4_xattr_block_list(dentry, buffer, buffer_size);
513	if (ret < 0)
514		goto errout;
515	ret += ret2;
516errout:
517	up_read(&EXT4_I(d_inode(dentry))->xattr_sem);
518	return ret;
519}
520
521/*
522 * If the EXT4_FEATURE_COMPAT_EXT_ATTR feature of this file system is
523 * not set, set it.
524 */
525static void ext4_xattr_update_super_block(handle_t *handle,
526					  struct super_block *sb)
527{
528	if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR))
529		return;
530
531	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
532	if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
533		EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
534		ext4_handle_dirty_super(handle, sb);
535	}
536}
537
538/*
539 * Release the xattr block BH: If the reference count is > 1, decrement it;
540 * otherwise free the block.
541 */
542static void
543ext4_xattr_release_block(handle_t *handle, struct inode *inode,
544			 struct buffer_head *bh)
545{
546	struct mb_cache_entry *ce = NULL;
547	int error = 0;
548	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
549
550	ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
551	BUFFER_TRACE(bh, "get_write_access");
552	error = ext4_journal_get_write_access(handle, bh);
553	if (error)
554		goto out;
555
556	lock_buffer(bh);
557	if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
558		ea_bdebug(bh, "refcount now=0; freeing");
559		if (ce)
560			mb_cache_entry_free(ce);
561		get_bh(bh);
562		unlock_buffer(bh);
563		ext4_free_blocks(handle, inode, bh, 0, 1,
564				 EXT4_FREE_BLOCKS_METADATA |
565				 EXT4_FREE_BLOCKS_FORGET);
566	} else {
567		le32_add_cpu(&BHDR(bh)->h_refcount, -1);
568		if (ce)
569			mb_cache_entry_release(ce);
570		/*
571		 * Beware of this ugliness: Releasing of xattr block references
572		 * from different inodes can race and so we have to protect
573		 * from a race where someone else frees the block (and releases
574		 * its journal_head) before we are done dirtying the buffer. In
575		 * nojournal mode this race is harmless and we actually cannot
576		 * call ext4_handle_dirty_xattr_block() with locked buffer as
577		 * that function can call sync_dirty_buffer() so for that case
578		 * we handle the dirtying after unlocking the buffer.
579		 */
580		if (ext4_handle_valid(handle))
581			error = ext4_handle_dirty_xattr_block(handle, inode,
582							      bh);
583		unlock_buffer(bh);
584		if (!ext4_handle_valid(handle))
585			error = ext4_handle_dirty_xattr_block(handle, inode,
586							      bh);
587		if (IS_SYNC(inode))
588			ext4_handle_sync(handle);
589		dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
590		ea_bdebug(bh, "refcount now=%d; releasing",
591			  le32_to_cpu(BHDR(bh)->h_refcount));
592	}
593out:
594	ext4_std_error(inode->i_sb, error);
595	return;
596}
597
598/*
599 * Find the available free space for EAs. This also returns the total number of
600 * bytes used by EA entries.
601 */
602static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
603				    size_t *min_offs, void *base, int *total)
604{
605	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
606		if (!last->e_value_block && last->e_value_size) {
607			size_t offs = le16_to_cpu(last->e_value_offs);
608			if (offs < *min_offs)
609				*min_offs = offs;
610		}
611		if (total)
612			*total += EXT4_XATTR_LEN(last->e_name_len);
613	}
614	return (*min_offs - ((void *)last - base) - sizeof(__u32));
615}
616
617static int
618ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
619{
620	struct ext4_xattr_entry *last;
621	size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
622
623	/* Compute min_offs and last. */
624	last = s->first;
625	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
626		if (!last->e_value_block && last->e_value_size) {
627			size_t offs = le16_to_cpu(last->e_value_offs);
628			if (offs < min_offs)
629				min_offs = offs;
630		}
631	}
632	free = min_offs - ((void *)last - s->base) - sizeof(__u32);
633	if (!s->not_found) {
634		if (!s->here->e_value_block && s->here->e_value_size) {
635			size_t size = le32_to_cpu(s->here->e_value_size);
636			free += EXT4_XATTR_SIZE(size);
637		}
638		free += EXT4_XATTR_LEN(name_len);
639	}
640	if (i->value) {
641		if (free < EXT4_XATTR_LEN(name_len) +
642			   EXT4_XATTR_SIZE(i->value_len))
643			return -ENOSPC;
644	}
645
646	if (i->value && s->not_found) {
647		/* Insert the new name. */
648		size_t size = EXT4_XATTR_LEN(name_len);
649		size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
650		memmove((void *)s->here + size, s->here, rest);
651		memset(s->here, 0, size);
652		s->here->e_name_index = i->name_index;
653		s->here->e_name_len = name_len;
654		memcpy(s->here->e_name, i->name, name_len);
655	} else {
656		if (!s->here->e_value_block && s->here->e_value_size) {
657			void *first_val = s->base + min_offs;
658			size_t offs = le16_to_cpu(s->here->e_value_offs);
659			void *val = s->base + offs;
660			size_t size = EXT4_XATTR_SIZE(
661				le32_to_cpu(s->here->e_value_size));
662
663			if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) {
664				/* The old and the new value have the same
665				   size. Just replace. */
666				s->here->e_value_size =
667					cpu_to_le32(i->value_len);
668				if (i->value == EXT4_ZERO_XATTR_VALUE) {
669					memset(val, 0, size);
670				} else {
671					/* Clear pad bytes first. */
672					memset(val + size - EXT4_XATTR_PAD, 0,
673					       EXT4_XATTR_PAD);
674					memcpy(val, i->value, i->value_len);
675				}
676				return 0;
677			}
678
679			/* Remove the old value. */
680			memmove(first_val + size, first_val, val - first_val);
681			memset(first_val, 0, size);
682			s->here->e_value_size = 0;
683			s->here->e_value_offs = 0;
684			min_offs += size;
685
686			/* Adjust all value offsets. */
687			last = s->first;
688			while (!IS_LAST_ENTRY(last)) {
689				size_t o = le16_to_cpu(last->e_value_offs);
690				if (!last->e_value_block &&
691				    last->e_value_size && o < offs)
692					last->e_value_offs =
693						cpu_to_le16(o + size);
694				last = EXT4_XATTR_NEXT(last);
695			}
696		}
697		if (!i->value) {
698			/* Remove the old name. */
699			size_t size = EXT4_XATTR_LEN(name_len);
700			last = ENTRY((void *)last - size);
701			memmove(s->here, (void *)s->here + size,
702				(void *)last - (void *)s->here + sizeof(__u32));
703			memset(last, 0, size);
704		}
705	}
706
707	if (i->value) {
708		/* Insert the new value. */
709		s->here->e_value_size = cpu_to_le32(i->value_len);
710		if (i->value_len) {
711			size_t size = EXT4_XATTR_SIZE(i->value_len);
712			void *val = s->base + min_offs - size;
713			s->here->e_value_offs = cpu_to_le16(min_offs - size);
714			if (i->value == EXT4_ZERO_XATTR_VALUE) {
715				memset(val, 0, size);
716			} else {
717				/* Clear the pad bytes first. */
718				memset(val + size - EXT4_XATTR_PAD, 0,
719				       EXT4_XATTR_PAD);
720				memcpy(val, i->value, i->value_len);
721			}
722		}
723	}
724	return 0;
725}
726
727struct ext4_xattr_block_find {
728	struct ext4_xattr_search s;
729	struct buffer_head *bh;
730};
731
732static int
733ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
734		      struct ext4_xattr_block_find *bs)
735{
736	struct super_block *sb = inode->i_sb;
737	int error;
738
739	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
740		  i->name_index, i->name, i->value, (long)i->value_len);
741
742	if (EXT4_I(inode)->i_file_acl) {
743		/* The inode already has an extended attribute block. */
744		bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl);
745		error = -EIO;
746		if (!bs->bh)
747			goto cleanup;
748		ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
749			atomic_read(&(bs->bh->b_count)),
750			le32_to_cpu(BHDR(bs->bh)->h_refcount));
751		if (ext4_xattr_check_block(inode, bs->bh)) {
752			EXT4_ERROR_INODE(inode, "bad block %llu",
753					 EXT4_I(inode)->i_file_acl);
754			error = -EIO;
755			goto cleanup;
756		}
757		/* Find the named attribute. */
758		bs->s.base = BHDR(bs->bh);
759		bs->s.first = BFIRST(bs->bh);
760		bs->s.end = bs->bh->b_data + bs->bh->b_size;
761		bs->s.here = bs->s.first;
762		error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
763					      i->name, bs->bh->b_size, 1);
764		if (error && error != -ENODATA)
765			goto cleanup;
766		bs->s.not_found = error;
767	}
768	error = 0;
769
770cleanup:
771	return error;
772}
773
774static int
775ext4_xattr_block_set(handle_t *handle, struct inode *inode,
776		     struct ext4_xattr_info *i,
777		     struct ext4_xattr_block_find *bs)
778{
779	struct super_block *sb = inode->i_sb;
780	struct buffer_head *new_bh = NULL;
781	struct ext4_xattr_search *s = &bs->s;
782	struct mb_cache_entry *ce = NULL;
783	int error = 0;
784	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
785
786#define header(x) ((struct ext4_xattr_header *)(x))
787
788	if (i->value && i->value_len > sb->s_blocksize)
789		return -ENOSPC;
790	if (s->base) {
791		ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
792					bs->bh->b_blocknr);
793		BUFFER_TRACE(bs->bh, "get_write_access");
794		error = ext4_journal_get_write_access(handle, bs->bh);
795		if (error)
796			goto cleanup;
797		lock_buffer(bs->bh);
798
799		if (header(s->base)->h_refcount == cpu_to_le32(1)) {
800			if (ce) {
801				mb_cache_entry_free(ce);
802				ce = NULL;
803			}
804			ea_bdebug(bs->bh, "modifying in-place");
805			error = ext4_xattr_set_entry(i, s);
806			if (!error) {
807				if (!IS_LAST_ENTRY(s->first))
808					ext4_xattr_rehash(header(s->base),
809							  s->here);
810				ext4_xattr_cache_insert(ext4_mb_cache,
811					bs->bh);
812			}
813			unlock_buffer(bs->bh);
814			if (error == -EIO)
815				goto bad_block;
816			if (!error)
817				error = ext4_handle_dirty_xattr_block(handle,
818								      inode,
819								      bs->bh);
820			if (error)
821				goto cleanup;
822			goto inserted;
823		} else {
824			int offset = (char *)s->here - bs->bh->b_data;
825
826			unlock_buffer(bs->bh);
827			if (ce) {
828				mb_cache_entry_release(ce);
829				ce = NULL;
830			}
831			ea_bdebug(bs->bh, "cloning");
832			s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
833			error = -ENOMEM;
834			if (s->base == NULL)
835				goto cleanup;
836			memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
837			s->first = ENTRY(header(s->base)+1);
838			header(s->base)->h_refcount = cpu_to_le32(1);
839			s->here = ENTRY(s->base + offset);
840			s->end = s->base + bs->bh->b_size;
841		}
842	} else {
843		/* Allocate a buffer where we construct the new block. */
844		s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
845		/* assert(header == s->base) */
846		error = -ENOMEM;
847		if (s->base == NULL)
848			goto cleanup;
849		header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
850		header(s->base)->h_blocks = cpu_to_le32(1);
851		header(s->base)->h_refcount = cpu_to_le32(1);
852		s->first = ENTRY(header(s->base)+1);
853		s->here = ENTRY(header(s->base)+1);
854		s->end = s->base + sb->s_blocksize;
855	}
856
857	error = ext4_xattr_set_entry(i, s);
858	if (error == -EIO)
859		goto bad_block;
860	if (error)
861		goto cleanup;
862	if (!IS_LAST_ENTRY(s->first))
863		ext4_xattr_rehash(header(s->base), s->here);
864
865inserted:
866	if (!IS_LAST_ENTRY(s->first)) {
867		new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce);
868		if (new_bh) {
869			/* We found an identical block in the cache. */
870			if (new_bh == bs->bh)
871				ea_bdebug(new_bh, "keeping");
872			else {
873				/* The old block is released after updating
874				   the inode. */
875				error = dquot_alloc_block(inode,
876						EXT4_C2B(EXT4_SB(sb), 1));
877				if (error)
878					goto cleanup;
879				BUFFER_TRACE(new_bh, "get_write_access");
880				error = ext4_journal_get_write_access(handle,
881								      new_bh);
882				if (error)
883					goto cleanup_dquot;
884				lock_buffer(new_bh);
885				le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
886				ea_bdebug(new_bh, "reusing; refcount now=%d",
887					le32_to_cpu(BHDR(new_bh)->h_refcount));
888				unlock_buffer(new_bh);
889				error = ext4_handle_dirty_xattr_block(handle,
890								      inode,
891								      new_bh);
892				if (error)
893					goto cleanup_dquot;
894			}
895			mb_cache_entry_release(ce);
896			ce = NULL;
897		} else if (bs->bh && s->base == bs->bh->b_data) {
898			/* We were modifying this block in-place. */
899			ea_bdebug(bs->bh, "keeping this block");
900			new_bh = bs->bh;
901			get_bh(new_bh);
902		} else {
903			/* We need to allocate a new block */
904			ext4_fsblk_t goal, block;
905
906			goal = ext4_group_first_block_no(sb,
907						EXT4_I(inode)->i_block_group);
908
909			/* non-extent files can't have physical blocks past 2^32 */
910			if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
911				goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
912
913			block = ext4_new_meta_blocks(handle, inode, goal, 0,
914						     NULL, &error);
915			if (error)
916				goto cleanup;
917
918			if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
919				BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
920
921			ea_idebug(inode, "creating block %llu",
922				  (unsigned long long)block);
923
924			new_bh = sb_getblk(sb, block);
925			if (unlikely(!new_bh)) {
926				error = -ENOMEM;
927getblk_failed:
928				ext4_free_blocks(handle, inode, NULL, block, 1,
929						 EXT4_FREE_BLOCKS_METADATA);
930				goto cleanup;
931			}
932			lock_buffer(new_bh);
933			error = ext4_journal_get_create_access(handle, new_bh);
934			if (error) {
935				unlock_buffer(new_bh);
936				error = -EIO;
937				goto getblk_failed;
938			}
939			memcpy(new_bh->b_data, s->base, new_bh->b_size);
940			set_buffer_uptodate(new_bh);
941			unlock_buffer(new_bh);
942			ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
943			error = ext4_handle_dirty_xattr_block(handle,
944							      inode, new_bh);
945			if (error)
946				goto cleanup;
947		}
948	}
949
950	/* Update the inode. */
951	EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
952
953	/* Drop the previous xattr block. */
954	if (bs->bh && bs->bh != new_bh)
955		ext4_xattr_release_block(handle, inode, bs->bh);
956	error = 0;
957
958cleanup:
959	if (ce)
960		mb_cache_entry_release(ce);
961	brelse(new_bh);
962	if (!(bs->bh && s->base == bs->bh->b_data))
963		kfree(s->base);
964
965	return error;
966
967cleanup_dquot:
968	dquot_free_block(inode, EXT4_C2B(EXT4_SB(sb), 1));
969	goto cleanup;
970
971bad_block:
972	EXT4_ERROR_INODE(inode, "bad block %llu",
973			 EXT4_I(inode)->i_file_acl);
974	goto cleanup;
975
976#undef header
977}
978
979int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
980			  struct ext4_xattr_ibody_find *is)
981{
982	struct ext4_xattr_ibody_header *header;
983	struct ext4_inode *raw_inode;
984	int error;
985
986	if (EXT4_I(inode)->i_extra_isize == 0)
987		return 0;
988	raw_inode = ext4_raw_inode(&is->iloc);
989	header = IHDR(inode, raw_inode);
990	is->s.base = is->s.first = IFIRST(header);
991	is->s.here = is->s.first;
992	is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
993	if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
994		error = ext4_xattr_check_names(IFIRST(header), is->s.end,
995					       IFIRST(header));
996		if (error)
997			return error;
998		/* Find the named attribute. */
999		error = ext4_xattr_find_entry(&is->s.here, i->name_index,
1000					      i->name, is->s.end -
1001					      (void *)is->s.base, 0);
1002		if (error && error != -ENODATA)
1003			return error;
1004		is->s.not_found = error;
1005	}
1006	return 0;
1007}
1008
1009int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
1010				struct ext4_xattr_info *i,
1011				struct ext4_xattr_ibody_find *is)
1012{
1013	struct ext4_xattr_ibody_header *header;
1014	struct ext4_xattr_search *s = &is->s;
1015	int error;
1016
1017	if (EXT4_I(inode)->i_extra_isize == 0)
1018		return -ENOSPC;
1019	error = ext4_xattr_set_entry(i, s);
1020	if (error) {
1021		if (error == -ENOSPC &&
1022		    ext4_has_inline_data(inode)) {
1023			error = ext4_try_to_evict_inline_data(handle, inode,
1024					EXT4_XATTR_LEN(strlen(i->name) +
1025					EXT4_XATTR_SIZE(i->value_len)));
1026			if (error)
1027				return error;
1028			error = ext4_xattr_ibody_find(inode, i, is);
1029			if (error)
1030				return error;
1031			error = ext4_xattr_set_entry(i, s);
1032		}
1033		if (error)
1034			return error;
1035	}
1036	header = IHDR(inode, ext4_raw_inode(&is->iloc));
1037	if (!IS_LAST_ENTRY(s->first)) {
1038		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
1039		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
1040	} else {
1041		header->h_magic = cpu_to_le32(0);
1042		ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
1043	}
1044	return 0;
1045}
1046
1047static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
1048				struct ext4_xattr_info *i,
1049				struct ext4_xattr_ibody_find *is)
1050{
1051	struct ext4_xattr_ibody_header *header;
1052	struct ext4_xattr_search *s = &is->s;
1053	int error;
1054
1055	if (EXT4_I(inode)->i_extra_isize == 0)
1056		return -ENOSPC;
1057	error = ext4_xattr_set_entry(i, s);
1058	if (error)
1059		return error;
1060	header = IHDR(inode, ext4_raw_inode(&is->iloc));
1061	if (!IS_LAST_ENTRY(s->first)) {
1062		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
1063		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
1064	} else {
1065		header->h_magic = cpu_to_le32(0);
1066		ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
1067	}
1068	return 0;
1069}
1070
1071/*
1072 * ext4_xattr_set_handle()
1073 *
1074 * Create, replace or remove an extended attribute for this inode.  Value
1075 * is NULL to remove an existing extended attribute, and non-NULL to
1076 * either replace an existing extended attribute, or create a new extended
1077 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
1078 * specify that an extended attribute must exist and must not exist
1079 * previous to the call, respectively.
1080 *
1081 * Returns 0, or a negative error number on failure.
1082 */
1083int
1084ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1085		      const char *name, const void *value, size_t value_len,
1086		      int flags)
1087{
1088	struct ext4_xattr_info i = {
1089		.name_index = name_index,
1090		.name = name,
1091		.value = value,
1092		.value_len = value_len,
1093
1094	};
1095	struct ext4_xattr_ibody_find is = {
1096		.s = { .not_found = -ENODATA, },
1097	};
1098	struct ext4_xattr_block_find bs = {
1099		.s = { .not_found = -ENODATA, },
1100	};
1101	unsigned long no_expand;
1102	int error;
1103
1104	if (!name)
1105		return -EINVAL;
1106	if (strlen(name) > 255)
1107		return -ERANGE;
1108	down_write(&EXT4_I(inode)->xattr_sem);
1109	no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
1110	ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
1111
1112	error = ext4_reserve_inode_write(handle, inode, &is.iloc);
1113	if (error)
1114		goto cleanup;
1115
1116	if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) {
1117		struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
1118		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
1119		ext4_clear_inode_state(inode, EXT4_STATE_NEW);
1120	}
1121
1122	error = ext4_xattr_ibody_find(inode, &i, &is);
1123	if (error)
1124		goto cleanup;
1125	if (is.s.not_found)
1126		error = ext4_xattr_block_find(inode, &i, &bs);
1127	if (error)
1128		goto cleanup;
1129	if (is.s.not_found && bs.s.not_found) {
1130		error = -ENODATA;
1131		if (flags & XATTR_REPLACE)
1132			goto cleanup;
1133		error = 0;
1134		if (!value)
1135			goto cleanup;
1136	} else {
1137		error = -EEXIST;
1138		if (flags & XATTR_CREATE)
1139			goto cleanup;
1140	}
1141	if (!value) {
1142		if (!is.s.not_found)
1143			error = ext4_xattr_ibody_set(handle, inode, &i, &is);
1144		else if (!bs.s.not_found)
1145			error = ext4_xattr_block_set(handle, inode, &i, &bs);
1146	} else {
1147		error = ext4_xattr_ibody_set(handle, inode, &i, &is);
1148		if (!error && !bs.s.not_found) {
1149			i.value = NULL;
1150			error = ext4_xattr_block_set(handle, inode, &i, &bs);
1151		} else if (error == -ENOSPC) {
1152			if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
1153				error = ext4_xattr_block_find(inode, &i, &bs);
1154				if (error)
1155					goto cleanup;
1156			}
1157			error = ext4_xattr_block_set(handle, inode, &i, &bs);
1158			if (error)
1159				goto cleanup;
1160			if (!is.s.not_found) {
1161				i.value = NULL;
1162				error = ext4_xattr_ibody_set(handle, inode, &i,
1163							     &is);
1164			}
1165		}
1166	}
1167	if (!error) {
1168		ext4_xattr_update_super_block(handle, inode->i_sb);
1169		inode->i_ctime = ext4_current_time(inode);
1170		if (!value)
1171			ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1172		error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
1173		/*
1174		 * The bh is consumed by ext4_mark_iloc_dirty, even with
1175		 * error != 0.
1176		 */
1177		is.iloc.bh = NULL;
1178		if (IS_SYNC(inode))
1179			ext4_handle_sync(handle);
1180	}
1181
1182cleanup:
1183	brelse(is.iloc.bh);
1184	brelse(bs.bh);
1185	if (no_expand == 0)
1186		ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1187	up_write(&EXT4_I(inode)->xattr_sem);
1188	return error;
1189}
1190
1191/*
1192 * ext4_xattr_set()
1193 *
1194 * Like ext4_xattr_set_handle, but start from an inode. This extended
1195 * attribute modification is a filesystem transaction by itself.
1196 *
1197 * Returns 0, or a negative error number on failure.
1198 */
1199int
1200ext4_xattr_set(struct inode *inode, int name_index, const char *name,
1201	       const void *value, size_t value_len, int flags)
1202{
1203	handle_t *handle;
1204	int error, retries = 0;
1205	int credits = ext4_jbd2_credits_xattr(inode);
1206
1207retry:
1208	handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
1209	if (IS_ERR(handle)) {
1210		error = PTR_ERR(handle);
1211	} else {
1212		int error2;
1213
1214		error = ext4_xattr_set_handle(handle, inode, name_index, name,
1215					      value, value_len, flags);
1216		error2 = ext4_journal_stop(handle);
1217		if (error == -ENOSPC &&
1218		    ext4_should_retry_alloc(inode->i_sb, &retries))
1219			goto retry;
1220		if (error == 0)
1221			error = error2;
1222	}
1223
1224	return error;
1225}
1226
1227/*
1228 * Shift the EA entries in the inode to create space for the increased
1229 * i_extra_isize.
1230 */
1231static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
1232				     int value_offs_shift, void *to,
1233				     void *from, size_t n, int blocksize)
1234{
1235	struct ext4_xattr_entry *last = entry;
1236	int new_offs;
1237
1238	/* Adjust the value offsets of the entries */
1239	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
1240		if (!last->e_value_block && last->e_value_size) {
1241			new_offs = le16_to_cpu(last->e_value_offs) +
1242							value_offs_shift;
1243			BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
1244				 > blocksize);
1245			last->e_value_offs = cpu_to_le16(new_offs);
1246		}
1247	}
1248	/* Shift the entries by n bytes */
1249	memmove(to, from, n);
1250}
1251
1252/*
1253 * Expand an inode by new_extra_isize bytes when EAs are present.
1254 * Returns 0 on success or negative error number on failure.
1255 */
1256int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
1257			       struct ext4_inode *raw_inode, handle_t *handle)
1258{
1259	struct ext4_xattr_ibody_header *header;
1260	struct ext4_xattr_entry *entry, *last, *first;
1261	struct buffer_head *bh = NULL;
1262	struct ext4_xattr_ibody_find *is = NULL;
1263	struct ext4_xattr_block_find *bs = NULL;
1264	char *buffer = NULL, *b_entry_name = NULL;
1265	size_t min_offs, free;
1266	int total_ino;
1267	void *base, *start, *end;
1268	int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
1269	int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
1270
1271	down_write(&EXT4_I(inode)->xattr_sem);
1272retry:
1273	if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
1274		up_write(&EXT4_I(inode)->xattr_sem);
1275		return 0;
1276	}
1277
1278	header = IHDR(inode, raw_inode);
1279	entry = IFIRST(header);
1280
1281	/*
1282	 * Check if enough free space is available in the inode to shift the
1283	 * entries ahead by new_extra_isize.
1284	 */
1285
1286	base = start = entry;
1287	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
1288	min_offs = end - base;
1289	last = entry;
1290	total_ino = sizeof(struct ext4_xattr_ibody_header);
1291
1292	free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
1293	if (free >= new_extra_isize) {
1294		entry = IFIRST(header);
1295		ext4_xattr_shift_entries(entry,	EXT4_I(inode)->i_extra_isize
1296				- new_extra_isize, (void *)raw_inode +
1297				EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
1298				(void *)header, total_ino,
1299				inode->i_sb->s_blocksize);
1300		EXT4_I(inode)->i_extra_isize = new_extra_isize;
1301		error = 0;
1302		goto cleanup;
1303	}
1304
1305	/*
1306	 * Enough free space isn't available in the inode, check if
1307	 * EA block can hold new_extra_isize bytes.
1308	 */
1309	if (EXT4_I(inode)->i_file_acl) {
1310		bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1311		error = -EIO;
1312		if (!bh)
1313			goto cleanup;
1314		if (ext4_xattr_check_block(inode, bh)) {
1315			EXT4_ERROR_INODE(inode, "bad block %llu",
1316					 EXT4_I(inode)->i_file_acl);
1317			error = -EIO;
1318			goto cleanup;
1319		}
1320		base = BHDR(bh);
1321		first = BFIRST(bh);
1322		end = bh->b_data + bh->b_size;
1323		min_offs = end - base;
1324		free = ext4_xattr_free_space(first, &min_offs, base, NULL);
1325		if (free < new_extra_isize) {
1326			if (!tried_min_extra_isize && s_min_extra_isize) {
1327				tried_min_extra_isize++;
1328				new_extra_isize = s_min_extra_isize;
1329				brelse(bh);
1330				goto retry;
1331			}
1332			error = -1;
1333			goto cleanup;
1334		}
1335	} else {
1336		free = inode->i_sb->s_blocksize;
1337	}
1338
1339	while (new_extra_isize > 0) {
1340		size_t offs, size, entry_size;
1341		struct ext4_xattr_entry *small_entry = NULL;
1342		struct ext4_xattr_info i = {
1343			.value = NULL,
1344			.value_len = 0,
1345		};
1346		unsigned int total_size;  /* EA entry size + value size */
1347		unsigned int shift_bytes; /* No. of bytes to shift EAs by? */
1348		unsigned int min_total_size = ~0U;
1349
1350		is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
1351		bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
1352		if (!is || !bs) {
1353			error = -ENOMEM;
1354			goto cleanup;
1355		}
1356
1357		is->s.not_found = -ENODATA;
1358		bs->s.not_found = -ENODATA;
1359		is->iloc.bh = NULL;
1360		bs->bh = NULL;
1361
1362		last = IFIRST(header);
1363		/* Find the entry best suited to be pushed into EA block */
1364		entry = NULL;
1365		for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
1366			total_size =
1367			EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
1368					EXT4_XATTR_LEN(last->e_name_len);
1369			if (total_size <= free && total_size < min_total_size) {
1370				if (total_size < new_extra_isize) {
1371					small_entry = last;
1372				} else {
1373					entry = last;
1374					min_total_size = total_size;
1375				}
1376			}
1377		}
1378
1379		if (entry == NULL) {
1380			if (small_entry) {
1381				entry = small_entry;
1382			} else {
1383				if (!tried_min_extra_isize &&
1384				    s_min_extra_isize) {
1385					tried_min_extra_isize++;
1386					new_extra_isize = s_min_extra_isize;
1387					kfree(is); is = NULL;
1388					kfree(bs); bs = NULL;
1389					brelse(bh);
1390					goto retry;
1391				}
1392				error = -1;
1393				goto cleanup;
1394			}
1395		}
1396		offs = le16_to_cpu(entry->e_value_offs);
1397		size = le32_to_cpu(entry->e_value_size);
1398		entry_size = EXT4_XATTR_LEN(entry->e_name_len);
1399		i.name_index = entry->e_name_index,
1400		buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS);
1401		b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
1402		if (!buffer || !b_entry_name) {
1403			error = -ENOMEM;
1404			goto cleanup;
1405		}
1406		/* Save the entry name and the entry value */
1407		memcpy(buffer, (void *)IFIRST(header) + offs,
1408		       EXT4_XATTR_SIZE(size));
1409		memcpy(b_entry_name, entry->e_name, entry->e_name_len);
1410		b_entry_name[entry->e_name_len] = '\0';
1411		i.name = b_entry_name;
1412
1413		error = ext4_get_inode_loc(inode, &is->iloc);
1414		if (error)
1415			goto cleanup;
1416
1417		error = ext4_xattr_ibody_find(inode, &i, is);
1418		if (error)
1419			goto cleanup;
1420
1421		/* Remove the chosen entry from the inode */
1422		error = ext4_xattr_ibody_set(handle, inode, &i, is);
1423		if (error)
1424			goto cleanup;
1425
1426		entry = IFIRST(header);
1427		if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
1428			shift_bytes = new_extra_isize;
1429		else
1430			shift_bytes = entry_size + size;
1431		/* Adjust the offsets and shift the remaining entries ahead */
1432		ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
1433			shift_bytes, (void *)raw_inode +
1434			EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
1435			(void *)header, total_ino - entry_size,
1436			inode->i_sb->s_blocksize);
1437
1438		extra_isize += shift_bytes;
1439		new_extra_isize -= shift_bytes;
1440		EXT4_I(inode)->i_extra_isize = extra_isize;
1441
1442		i.name = b_entry_name;
1443		i.value = buffer;
1444		i.value_len = size;
1445		error = ext4_xattr_block_find(inode, &i, bs);
1446		if (error)
1447			goto cleanup;
1448
1449		/* Add entry which was removed from the inode into the block */
1450		error = ext4_xattr_block_set(handle, inode, &i, bs);
1451		if (error)
1452			goto cleanup;
1453		kfree(b_entry_name);
1454		kfree(buffer);
1455		b_entry_name = NULL;
1456		buffer = NULL;
1457		brelse(is->iloc.bh);
1458		kfree(is);
1459		kfree(bs);
1460	}
1461	brelse(bh);
1462	up_write(&EXT4_I(inode)->xattr_sem);
1463	return 0;
1464
1465cleanup:
1466	kfree(b_entry_name);
1467	kfree(buffer);
1468	if (is)
1469		brelse(is->iloc.bh);
1470	kfree(is);
1471	kfree(bs);
1472	brelse(bh);
1473	up_write(&EXT4_I(inode)->xattr_sem);
1474	return error;
1475}
1476
1477
1478
1479/*
1480 * ext4_xattr_delete_inode()
1481 *
1482 * Free extended attribute resources associated with this inode. This
1483 * is called immediately before an inode is freed. We have exclusive
1484 * access to the inode.
1485 */
1486void
1487ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
1488{
1489	struct buffer_head *bh = NULL;
1490
1491	if (!EXT4_I(inode)->i_file_acl)
1492		goto cleanup;
1493	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1494	if (!bh) {
1495		EXT4_ERROR_INODE(inode, "block %llu read error",
1496				 EXT4_I(inode)->i_file_acl);
1497		goto cleanup;
1498	}
1499	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
1500	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1501		EXT4_ERROR_INODE(inode, "bad block %llu",
1502				 EXT4_I(inode)->i_file_acl);
1503		goto cleanup;
1504	}
1505	ext4_xattr_release_block(handle, inode, bh);
1506	EXT4_I(inode)->i_file_acl = 0;
1507
1508cleanup:
1509	brelse(bh);
1510}
1511
1512/*
1513 * ext4_xattr_put_super()
1514 *
1515 * This is called when a file system is unmounted.
1516 */
1517void
1518ext4_xattr_put_super(struct super_block *sb)
1519{
1520	mb_cache_shrink(sb->s_bdev);
1521}
1522
1523/*
1524 * ext4_xattr_cache_insert()
1525 *
1526 * Create a new entry in the extended attribute cache, and insert
1527 * it unless such an entry is already in the cache.
1528 *
1529 * Returns 0, or a negative error number on failure.
1530 */
1531static void
1532ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
1533{
1534	__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
1535	struct mb_cache_entry *ce;
1536	int error;
1537
1538	ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS);
1539	if (!ce) {
1540		ea_bdebug(bh, "out of memory");
1541		return;
1542	}
1543	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
1544	if (error) {
1545		mb_cache_entry_free(ce);
1546		if (error == -EBUSY) {
1547			ea_bdebug(bh, "already in cache");
1548			error = 0;
1549		}
1550	} else {
1551		ea_bdebug(bh, "inserting [%x]", (int)hash);
1552		mb_cache_entry_release(ce);
1553	}
1554}
1555
1556/*
1557 * ext4_xattr_cmp()
1558 *
1559 * Compare two extended attribute blocks for equality.
1560 *
1561 * Returns 0 if the blocks are equal, 1 if they differ, and
1562 * a negative error number on errors.
1563 */
1564static int
1565ext4_xattr_cmp(struct ext4_xattr_header *header1,
1566	       struct ext4_xattr_header *header2)
1567{
1568	struct ext4_xattr_entry *entry1, *entry2;
1569
1570	entry1 = ENTRY(header1+1);
1571	entry2 = ENTRY(header2+1);
1572	while (!IS_LAST_ENTRY(entry1)) {
1573		if (IS_LAST_ENTRY(entry2))
1574			return 1;
1575		if (entry1->e_hash != entry2->e_hash ||
1576		    entry1->e_name_index != entry2->e_name_index ||
1577		    entry1->e_name_len != entry2->e_name_len ||
1578		    entry1->e_value_size != entry2->e_value_size ||
1579		    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
1580			return 1;
1581		if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
1582			return -EIO;
1583		if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
1584			   (char *)header2 + le16_to_cpu(entry2->e_value_offs),
1585			   le32_to_cpu(entry1->e_value_size)))
1586			return 1;
1587
1588		entry1 = EXT4_XATTR_NEXT(entry1);
1589		entry2 = EXT4_XATTR_NEXT(entry2);
1590	}
1591	if (!IS_LAST_ENTRY(entry2))
1592		return 1;
1593	return 0;
1594}
1595
1596/*
1597 * ext4_xattr_cache_find()
1598 *
1599 * Find an identical extended attribute block.
1600 *
1601 * Returns a pointer to the block found, or NULL if such a block was
1602 * not found or an error occurred.
1603 */
1604static struct buffer_head *
1605ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
1606		      struct mb_cache_entry **pce)
1607{
1608	__u32 hash = le32_to_cpu(header->h_hash);
1609	struct mb_cache_entry *ce;
1610	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
1611
1612	if (!header->h_hash)
1613		return NULL;  /* never share */
1614	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1615again:
1616	ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev,
1617				       hash);
1618	while (ce) {
1619		struct buffer_head *bh;
1620
1621		if (IS_ERR(ce)) {
1622			if (PTR_ERR(ce) == -EAGAIN)
1623				goto again;
1624			break;
1625		}
1626		bh = sb_bread(inode->i_sb, ce->e_block);
1627		if (!bh) {
1628			EXT4_ERROR_INODE(inode, "block %lu read error",
1629					 (unsigned long) ce->e_block);
1630		} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
1631				EXT4_XATTR_REFCOUNT_MAX) {
1632			ea_idebug(inode, "block %lu refcount %d>=%d",
1633				  (unsigned long) ce->e_block,
1634				  le32_to_cpu(BHDR(bh)->h_refcount),
1635					  EXT4_XATTR_REFCOUNT_MAX);
1636		} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
1637			*pce = ce;
1638			return bh;
1639		}
1640		brelse(bh);
1641		ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
1642	}
1643	return NULL;
1644}
1645
1646#define NAME_HASH_SHIFT 5
1647#define VALUE_HASH_SHIFT 16
1648
1649/*
1650 * ext4_xattr_hash_entry()
1651 *
1652 * Compute the hash of an extended attribute.
1653 */
1654static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
1655					 struct ext4_xattr_entry *entry)
1656{
1657	__u32 hash = 0;
1658	char *name = entry->e_name;
1659	int n;
1660
1661	for (n = 0; n < entry->e_name_len; n++) {
1662		hash = (hash << NAME_HASH_SHIFT) ^
1663		       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1664		       *name++;
1665	}
1666
1667	if (entry->e_value_block == 0 && entry->e_value_size != 0) {
1668		__le32 *value = (__le32 *)((char *)header +
1669			le16_to_cpu(entry->e_value_offs));
1670		for (n = (le32_to_cpu(entry->e_value_size) +
1671		     EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
1672			hash = (hash << VALUE_HASH_SHIFT) ^
1673			       (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1674			       le32_to_cpu(*value++);
1675		}
1676	}
1677	entry->e_hash = cpu_to_le32(hash);
1678}
1679
1680#undef NAME_HASH_SHIFT
1681#undef VALUE_HASH_SHIFT
1682
1683#define BLOCK_HASH_SHIFT 16
1684
1685/*
1686 * ext4_xattr_rehash()
1687 *
1688 * Re-compute the extended attribute hash value after an entry has changed.
1689 */
1690static void ext4_xattr_rehash(struct ext4_xattr_header *header,
1691			      struct ext4_xattr_entry *entry)
1692{
1693	struct ext4_xattr_entry *here;
1694	__u32 hash = 0;
1695
1696	ext4_xattr_hash_entry(header, entry);
1697	here = ENTRY(header+1);
1698	while (!IS_LAST_ENTRY(here)) {
1699		if (!here->e_hash) {
1700			/* Block is not shared if an entry's hash value == 0 */
1701			hash = 0;
1702			break;
1703		}
1704		hash = (hash << BLOCK_HASH_SHIFT) ^
1705		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1706		       le32_to_cpu(here->e_hash);
1707		here = EXT4_XATTR_NEXT(here);
1708	}
1709	header->h_hash = cpu_to_le32(hash);
1710}
1711
1712#undef BLOCK_HASH_SHIFT
1713
1714#define	HASH_BUCKET_BITS	10
1715
1716struct mb_cache *
1717ext4_xattr_create_cache(char *name)
1718{
1719	return mb_cache_create(name, HASH_BUCKET_BITS);
1720}
1721
1722void ext4_xattr_destroy_cache(struct mb_cache *cache)
1723{
1724	if (cache)
1725		mb_cache_destroy(cache);
1726}
1727
1728