1/*
2 * Copyright (c) 2012 Taobao.
3 * Written by Tao Ma <boyu.mt@taobao.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 */
14
15#include <linux/fiemap.h>
16
17#include "ext4_jbd2.h"
18#include "ext4.h"
19#include "xattr.h"
20#include "truncate.h"
21
22#define EXT4_XATTR_SYSTEM_DATA	"data"
23#define EXT4_MIN_INLINE_DATA_SIZE	((sizeof(__le32) * EXT4_N_BLOCKS))
24#define EXT4_INLINE_DOTDOT_OFFSET	2
25#define EXT4_INLINE_DOTDOT_SIZE		4
26
27static int ext4_get_inline_size(struct inode *inode)
28{
29	if (EXT4_I(inode)->i_inline_off)
30		return EXT4_I(inode)->i_inline_size;
31
32	return 0;
33}
34
35static int get_max_inline_xattr_value_size(struct inode *inode,
36					   struct ext4_iloc *iloc)
37{
38	struct ext4_xattr_ibody_header *header;
39	struct ext4_xattr_entry *entry;
40	struct ext4_inode *raw_inode;
41	int free, min_offs;
42
43	min_offs = EXT4_SB(inode->i_sb)->s_inode_size -
44			EXT4_GOOD_OLD_INODE_SIZE -
45			EXT4_I(inode)->i_extra_isize -
46			sizeof(struct ext4_xattr_ibody_header);
47
48	/*
49	 * We need to subtract another sizeof(__u32) since an in-inode xattr
50	 * needs an empty 4 bytes to indicate the gap between the xattr entry
51	 * and the name/value pair.
52	 */
53	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
54		return EXT4_XATTR_SIZE(min_offs -
55			EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)) -
56			EXT4_XATTR_ROUND - sizeof(__u32));
57
58	raw_inode = ext4_raw_inode(iloc);
59	header = IHDR(inode, raw_inode);
60	entry = IFIRST(header);
61
62	/* Compute min_offs. */
63	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
64		if (!entry->e_value_block && entry->e_value_size) {
65			size_t offs = le16_to_cpu(entry->e_value_offs);
66			if (offs < min_offs)
67				min_offs = offs;
68		}
69	}
70	free = min_offs -
71		((void *)entry - (void *)IFIRST(header)) - sizeof(__u32);
72
73	if (EXT4_I(inode)->i_inline_off) {
74		entry = (struct ext4_xattr_entry *)
75			((void *)raw_inode + EXT4_I(inode)->i_inline_off);
76
77		free += EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size));
78		goto out;
79	}
80
81	free -= EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA));
82
83	if (free > EXT4_XATTR_ROUND)
84		free = EXT4_XATTR_SIZE(free - EXT4_XATTR_ROUND);
85	else
86		free = 0;
87
88out:
89	return free;
90}
91
92/*
93 * Get the maximum size we now can store in an inode.
94 * If we can't find the space for a xattr entry, don't use the space
95 * of the extents since we have no space to indicate the inline data.
96 */
97int ext4_get_max_inline_size(struct inode *inode)
98{
99	int error, max_inline_size;
100	struct ext4_iloc iloc;
101
102	if (EXT4_I(inode)->i_extra_isize == 0)
103		return 0;
104
105	error = ext4_get_inode_loc(inode, &iloc);
106	if (error) {
107		ext4_error_inode(inode, __func__, __LINE__, 0,
108				 "can't get inode location %lu",
109				 inode->i_ino);
110		return 0;
111	}
112
113	down_read(&EXT4_I(inode)->xattr_sem);
114	max_inline_size = get_max_inline_xattr_value_size(inode, &iloc);
115	up_read(&EXT4_I(inode)->xattr_sem);
116
117	brelse(iloc.bh);
118
119	if (!max_inline_size)
120		return 0;
121
122	return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE;
123}
124
125/*
126 * this function does not take xattr_sem, which is OK because it is
127 * currently only used in a code path coming form ext4_iget, before
128 * the new inode has been unlocked
129 */
130int ext4_find_inline_data_nolock(struct inode *inode)
131{
132	struct ext4_xattr_ibody_find is = {
133		.s = { .not_found = -ENODATA, },
134	};
135	struct ext4_xattr_info i = {
136		.name_index = EXT4_XATTR_INDEX_SYSTEM,
137		.name = EXT4_XATTR_SYSTEM_DATA,
138	};
139	int error;
140
141	if (EXT4_I(inode)->i_extra_isize == 0)
142		return 0;
143
144	error = ext4_get_inode_loc(inode, &is.iloc);
145	if (error)
146		return error;
147
148	error = ext4_xattr_ibody_find(inode, &i, &is);
149	if (error)
150		goto out;
151
152	if (!is.s.not_found) {
153		EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
154					(void *)ext4_raw_inode(&is.iloc));
155		EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
156				le32_to_cpu(is.s.here->e_value_size);
157		ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
158	}
159out:
160	brelse(is.iloc.bh);
161	return error;
162}
163
164static int ext4_read_inline_data(struct inode *inode, void *buffer,
165				 unsigned int len,
166				 struct ext4_iloc *iloc)
167{
168	struct ext4_xattr_entry *entry;
169	struct ext4_xattr_ibody_header *header;
170	int cp_len = 0;
171	struct ext4_inode *raw_inode;
172
173	if (!len)
174		return 0;
175
176	BUG_ON(len > EXT4_I(inode)->i_inline_size);
177
178	cp_len = len < EXT4_MIN_INLINE_DATA_SIZE ?
179			len : EXT4_MIN_INLINE_DATA_SIZE;
180
181	raw_inode = ext4_raw_inode(iloc);
182	memcpy(buffer, (void *)(raw_inode->i_block), cp_len);
183
184	len -= cp_len;
185	buffer += cp_len;
186
187	if (!len)
188		goto out;
189
190	header = IHDR(inode, raw_inode);
191	entry = (struct ext4_xattr_entry *)((void *)raw_inode +
192					    EXT4_I(inode)->i_inline_off);
193	len = min_t(unsigned int, len,
194		    (unsigned int)le32_to_cpu(entry->e_value_size));
195
196	memcpy(buffer,
197	       (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len);
198	cp_len += len;
199
200out:
201	return cp_len;
202}
203
204/*
205 * write the buffer to the inline inode.
206 * If 'create' is set, we don't need to do the extra copy in the xattr
207 * value since it is already handled by ext4_xattr_ibody_inline_set.
208 * That saves us one memcpy.
209 */
210static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
211				   void *buffer, loff_t pos, unsigned int len)
212{
213	struct ext4_xattr_entry *entry;
214	struct ext4_xattr_ibody_header *header;
215	struct ext4_inode *raw_inode;
216	int cp_len = 0;
217
218	BUG_ON(!EXT4_I(inode)->i_inline_off);
219	BUG_ON(pos + len > EXT4_I(inode)->i_inline_size);
220
221	raw_inode = ext4_raw_inode(iloc);
222	buffer += pos;
223
224	if (pos < EXT4_MIN_INLINE_DATA_SIZE) {
225		cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ?
226			 EXT4_MIN_INLINE_DATA_SIZE - pos : len;
227		memcpy((void *)raw_inode->i_block + pos, buffer, cp_len);
228
229		len -= cp_len;
230		buffer += cp_len;
231		pos += cp_len;
232	}
233
234	if (!len)
235		return;
236
237	pos -= EXT4_MIN_INLINE_DATA_SIZE;
238	header = IHDR(inode, raw_inode);
239	entry = (struct ext4_xattr_entry *)((void *)raw_inode +
240					    EXT4_I(inode)->i_inline_off);
241
242	memcpy((void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs) + pos,
243	       buffer, len);
244}
245
246static int ext4_create_inline_data(handle_t *handle,
247				   struct inode *inode, unsigned len)
248{
249	int error;
250	void *value = NULL;
251	struct ext4_xattr_ibody_find is = {
252		.s = { .not_found = -ENODATA, },
253	};
254	struct ext4_xattr_info i = {
255		.name_index = EXT4_XATTR_INDEX_SYSTEM,
256		.name = EXT4_XATTR_SYSTEM_DATA,
257	};
258
259	error = ext4_get_inode_loc(inode, &is.iloc);
260	if (error)
261		return error;
262
263	BUFFER_TRACE(is.iloc.bh, "get_write_access");
264	error = ext4_journal_get_write_access(handle, is.iloc.bh);
265	if (error)
266		goto out;
267
268	if (len > EXT4_MIN_INLINE_DATA_SIZE) {
269		value = EXT4_ZERO_XATTR_VALUE;
270		len -= EXT4_MIN_INLINE_DATA_SIZE;
271	} else {
272		value = "";
273		len = 0;
274	}
275
276	/* Insert the the xttr entry. */
277	i.value = value;
278	i.value_len = len;
279
280	error = ext4_xattr_ibody_find(inode, &i, &is);
281	if (error)
282		goto out;
283
284	BUG_ON(!is.s.not_found);
285
286	error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
287	if (error) {
288		if (error == -ENOSPC)
289			ext4_clear_inode_state(inode,
290					       EXT4_STATE_MAY_INLINE_DATA);
291		goto out;
292	}
293
294	memset((void *)ext4_raw_inode(&is.iloc)->i_block,
295		0, EXT4_MIN_INLINE_DATA_SIZE);
296
297	EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
298				      (void *)ext4_raw_inode(&is.iloc));
299	EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE;
300	ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
301	ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA);
302	get_bh(is.iloc.bh);
303	error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
304
305out:
306	brelse(is.iloc.bh);
307	return error;
308}
309
310static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
311				   unsigned int len)
312{
313	int error;
314	void *value = NULL;
315	struct ext4_xattr_ibody_find is = {
316		.s = { .not_found = -ENODATA, },
317	};
318	struct ext4_xattr_info i = {
319		.name_index = EXT4_XATTR_INDEX_SYSTEM,
320		.name = EXT4_XATTR_SYSTEM_DATA,
321	};
322
323	/* If the old space is ok, write the data directly. */
324	if (len <= EXT4_I(inode)->i_inline_size)
325		return 0;
326
327	error = ext4_get_inode_loc(inode, &is.iloc);
328	if (error)
329		return error;
330
331	error = ext4_xattr_ibody_find(inode, &i, &is);
332	if (error)
333		goto out;
334
335	BUG_ON(is.s.not_found);
336
337	len -= EXT4_MIN_INLINE_DATA_SIZE;
338	value = kzalloc(len, GFP_NOFS);
339	if (!value)
340		goto out;
341
342	error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
343				     value, len);
344	if (error == -ENODATA)
345		goto out;
346
347	BUFFER_TRACE(is.iloc.bh, "get_write_access");
348	error = ext4_journal_get_write_access(handle, is.iloc.bh);
349	if (error)
350		goto out;
351
352	/* Update the xttr entry. */
353	i.value = value;
354	i.value_len = len;
355
356	error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
357	if (error)
358		goto out;
359
360	EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
361				      (void *)ext4_raw_inode(&is.iloc));
362	EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
363				le32_to_cpu(is.s.here->e_value_size);
364	ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
365	get_bh(is.iloc.bh);
366	error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
367
368out:
369	kfree(value);
370	brelse(is.iloc.bh);
371	return error;
372}
373
374static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
375				    unsigned int len)
376{
377	int ret, size;
378	struct ext4_inode_info *ei = EXT4_I(inode);
379
380	if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
381		return -ENOSPC;
382
383	size = ext4_get_max_inline_size(inode);
384	if (size < len)
385		return -ENOSPC;
386
387	down_write(&EXT4_I(inode)->xattr_sem);
388
389	if (ei->i_inline_off)
390		ret = ext4_update_inline_data(handle, inode, len);
391	else
392		ret = ext4_create_inline_data(handle, inode, len);
393
394	up_write(&EXT4_I(inode)->xattr_sem);
395
396	return ret;
397}
398
399static int ext4_destroy_inline_data_nolock(handle_t *handle,
400					   struct inode *inode)
401{
402	struct ext4_inode_info *ei = EXT4_I(inode);
403	struct ext4_xattr_ibody_find is = {
404		.s = { .not_found = 0, },
405	};
406	struct ext4_xattr_info i = {
407		.name_index = EXT4_XATTR_INDEX_SYSTEM,
408		.name = EXT4_XATTR_SYSTEM_DATA,
409		.value = NULL,
410		.value_len = 0,
411	};
412	int error;
413
414	if (!ei->i_inline_off)
415		return 0;
416
417	error = ext4_get_inode_loc(inode, &is.iloc);
418	if (error)
419		return error;
420
421	error = ext4_xattr_ibody_find(inode, &i, &is);
422	if (error)
423		goto out;
424
425	BUFFER_TRACE(is.iloc.bh, "get_write_access");
426	error = ext4_journal_get_write_access(handle, is.iloc.bh);
427	if (error)
428		goto out;
429
430	error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
431	if (error)
432		goto out;
433
434	memset((void *)ext4_raw_inode(&is.iloc)->i_block,
435		0, EXT4_MIN_INLINE_DATA_SIZE);
436
437	if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
438				      EXT4_FEATURE_INCOMPAT_EXTENTS)) {
439		if (S_ISDIR(inode->i_mode) ||
440		    S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) {
441			ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
442			ext4_ext_tree_init(handle, inode);
443		}
444	}
445	ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA);
446
447	get_bh(is.iloc.bh);
448	error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
449
450	EXT4_I(inode)->i_inline_off = 0;
451	EXT4_I(inode)->i_inline_size = 0;
452	ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
453out:
454	brelse(is.iloc.bh);
455	if (error == -ENODATA)
456		error = 0;
457	return error;
458}
459
460static int ext4_read_inline_page(struct inode *inode, struct page *page)
461{
462	void *kaddr;
463	int ret = 0;
464	size_t len;
465	struct ext4_iloc iloc;
466
467	BUG_ON(!PageLocked(page));
468	BUG_ON(!ext4_has_inline_data(inode));
469	BUG_ON(page->index);
470
471	if (!EXT4_I(inode)->i_inline_off) {
472		ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.",
473			     inode->i_ino);
474		goto out;
475	}
476
477	ret = ext4_get_inode_loc(inode, &iloc);
478	if (ret)
479		goto out;
480
481	len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode));
482	kaddr = kmap_atomic(page);
483	ret = ext4_read_inline_data(inode, kaddr, len, &iloc);
484	flush_dcache_page(page);
485	kunmap_atomic(kaddr);
486	zero_user_segment(page, len, PAGE_CACHE_SIZE);
487	SetPageUptodate(page);
488	brelse(iloc.bh);
489
490out:
491	return ret;
492}
493
494int ext4_readpage_inline(struct inode *inode, struct page *page)
495{
496	int ret = 0;
497
498	down_read(&EXT4_I(inode)->xattr_sem);
499	if (!ext4_has_inline_data(inode)) {
500		up_read(&EXT4_I(inode)->xattr_sem);
501		return -EAGAIN;
502	}
503
504	/*
505	 * Current inline data can only exist in the 1st page,
506	 * So for all the other pages, just set them uptodate.
507	 */
508	if (!page->index)
509		ret = ext4_read_inline_page(inode, page);
510	else if (!PageUptodate(page)) {
511		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
512		SetPageUptodate(page);
513	}
514
515	up_read(&EXT4_I(inode)->xattr_sem);
516
517	unlock_page(page);
518	return ret >= 0 ? 0 : ret;
519}
520
521static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
522					      struct inode *inode,
523					      unsigned flags)
524{
525	int ret, needed_blocks;
526	handle_t *handle = NULL;
527	int retries = 0, sem_held = 0;
528	struct page *page = NULL;
529	unsigned from, to;
530	struct ext4_iloc iloc;
531
532	if (!ext4_has_inline_data(inode)) {
533		/*
534		 * clear the flag so that no new write
535		 * will trap here again.
536		 */
537		ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
538		return 0;
539	}
540
541	needed_blocks = ext4_writepage_trans_blocks(inode);
542
543	ret = ext4_get_inode_loc(inode, &iloc);
544	if (ret)
545		return ret;
546
547retry:
548	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
549	if (IS_ERR(handle)) {
550		ret = PTR_ERR(handle);
551		handle = NULL;
552		goto out;
553	}
554
555	/* We cannot recurse into the filesystem as the transaction is already
556	 * started */
557	flags |= AOP_FLAG_NOFS;
558
559	page = grab_cache_page_write_begin(mapping, 0, flags);
560	if (!page) {
561		ret = -ENOMEM;
562		goto out;
563	}
564
565	down_write(&EXT4_I(inode)->xattr_sem);
566	sem_held = 1;
567	/* If some one has already done this for us, just exit. */
568	if (!ext4_has_inline_data(inode)) {
569		ret = 0;
570		goto out;
571	}
572
573	from = 0;
574	to = ext4_get_inline_size(inode);
575	if (!PageUptodate(page)) {
576		ret = ext4_read_inline_page(inode, page);
577		if (ret < 0)
578			goto out;
579	}
580
581	ret = ext4_destroy_inline_data_nolock(handle, inode);
582	if (ret)
583		goto out;
584
585	if (ext4_should_dioread_nolock(inode))
586		ret = __block_write_begin(page, from, to, ext4_get_block_write);
587	else
588		ret = __block_write_begin(page, from, to, ext4_get_block);
589
590	if (!ret && ext4_should_journal_data(inode)) {
591		ret = ext4_walk_page_buffers(handle, page_buffers(page),
592					     from, to, NULL,
593					     do_journal_get_write_access);
594	}
595
596	if (ret) {
597		unlock_page(page);
598		page_cache_release(page);
599		page = NULL;
600		ext4_orphan_add(handle, inode);
601		up_write(&EXT4_I(inode)->xattr_sem);
602		sem_held = 0;
603		ext4_journal_stop(handle);
604		handle = NULL;
605		ext4_truncate_failed_write(inode);
606		/*
607		 * If truncate failed early the inode might
608		 * still be on the orphan list; we need to
609		 * make sure the inode is removed from the
610		 * orphan list in that case.
611		 */
612		if (inode->i_nlink)
613			ext4_orphan_del(NULL, inode);
614	}
615
616	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
617		goto retry;
618
619	if (page)
620		block_commit_write(page, from, to);
621out:
622	if (page) {
623		unlock_page(page);
624		page_cache_release(page);
625	}
626	if (sem_held)
627		up_write(&EXT4_I(inode)->xattr_sem);
628	if (handle)
629		ext4_journal_stop(handle);
630	brelse(iloc.bh);
631	return ret;
632}
633
634/*
635 * Try to write data in the inode.
636 * If the inode has inline data, check whether the new write can be
637 * in the inode also. If not, create the page the handle, move the data
638 * to the page make it update and let the later codes create extent for it.
639 */
640int ext4_try_to_write_inline_data(struct address_space *mapping,
641				  struct inode *inode,
642				  loff_t pos, unsigned len,
643				  unsigned flags,
644				  struct page **pagep)
645{
646	int ret;
647	handle_t *handle;
648	struct page *page;
649	struct ext4_iloc iloc;
650
651	if (pos + len > ext4_get_max_inline_size(inode))
652		goto convert;
653
654	ret = ext4_get_inode_loc(inode, &iloc);
655	if (ret)
656		return ret;
657
658	/*
659	 * The possible write could happen in the inode,
660	 * so try to reserve the space in inode first.
661	 */
662	handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
663	if (IS_ERR(handle)) {
664		ret = PTR_ERR(handle);
665		handle = NULL;
666		goto out;
667	}
668
669	ret = ext4_prepare_inline_data(handle, inode, pos + len);
670	if (ret && ret != -ENOSPC)
671		goto out;
672
673	/* We don't have space in inline inode, so convert it to extent. */
674	if (ret == -ENOSPC) {
675		ext4_journal_stop(handle);
676		brelse(iloc.bh);
677		goto convert;
678	}
679
680	flags |= AOP_FLAG_NOFS;
681
682	page = grab_cache_page_write_begin(mapping, 0, flags);
683	if (!page) {
684		ret = -ENOMEM;
685		goto out;
686	}
687
688	*pagep = page;
689	down_read(&EXT4_I(inode)->xattr_sem);
690	if (!ext4_has_inline_data(inode)) {
691		ret = 0;
692		unlock_page(page);
693		page_cache_release(page);
694		goto out_up_read;
695	}
696
697	if (!PageUptodate(page)) {
698		ret = ext4_read_inline_page(inode, page);
699		if (ret < 0)
700			goto out_up_read;
701	}
702
703	ret = 1;
704	handle = NULL;
705out_up_read:
706	up_read(&EXT4_I(inode)->xattr_sem);
707out:
708	if (handle)
709		ext4_journal_stop(handle);
710	brelse(iloc.bh);
711	return ret;
712convert:
713	return ext4_convert_inline_data_to_extent(mapping,
714						  inode, flags);
715}
716
717int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
718			       unsigned copied, struct page *page)
719{
720	int ret;
721	void *kaddr;
722	struct ext4_iloc iloc;
723
724	if (unlikely(copied < len)) {
725		if (!PageUptodate(page)) {
726			copied = 0;
727			goto out;
728		}
729	}
730
731	ret = ext4_get_inode_loc(inode, &iloc);
732	if (ret) {
733		ext4_std_error(inode->i_sb, ret);
734		copied = 0;
735		goto out;
736	}
737
738	down_write(&EXT4_I(inode)->xattr_sem);
739	BUG_ON(!ext4_has_inline_data(inode));
740
741	kaddr = kmap_atomic(page);
742	ext4_write_inline_data(inode, &iloc, kaddr, pos, len);
743	kunmap_atomic(kaddr);
744	SetPageUptodate(page);
745	/* clear page dirty so that writepages wouldn't work for us. */
746	ClearPageDirty(page);
747
748	up_write(&EXT4_I(inode)->xattr_sem);
749	brelse(iloc.bh);
750out:
751	return copied;
752}
753
754struct buffer_head *
755ext4_journalled_write_inline_data(struct inode *inode,
756				  unsigned len,
757				  struct page *page)
758{
759	int ret;
760	void *kaddr;
761	struct ext4_iloc iloc;
762
763	ret = ext4_get_inode_loc(inode, &iloc);
764	if (ret) {
765		ext4_std_error(inode->i_sb, ret);
766		return NULL;
767	}
768
769	down_write(&EXT4_I(inode)->xattr_sem);
770	kaddr = kmap_atomic(page);
771	ext4_write_inline_data(inode, &iloc, kaddr, 0, len);
772	kunmap_atomic(kaddr);
773	up_write(&EXT4_I(inode)->xattr_sem);
774
775	return iloc.bh;
776}
777
778/*
779 * Try to make the page cache and handle ready for the inline data case.
780 * We can call this function in 2 cases:
781 * 1. The inode is created and the first write exceeds inline size. We can
782 *    clear the inode state safely.
783 * 2. The inode has inline data, then we need to read the data, make it
784 *    update and dirty so that ext4_da_writepages can handle it. We don't
785 *    need to start the journal since the file's metatdata isn't changed now.
786 */
787static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
788						 struct inode *inode,
789						 unsigned flags,
790						 void **fsdata)
791{
792	int ret = 0, inline_size;
793	struct page *page;
794
795	page = grab_cache_page_write_begin(mapping, 0, flags);
796	if (!page)
797		return -ENOMEM;
798
799	down_read(&EXT4_I(inode)->xattr_sem);
800	if (!ext4_has_inline_data(inode)) {
801		ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
802		goto out;
803	}
804
805	inline_size = ext4_get_inline_size(inode);
806
807	if (!PageUptodate(page)) {
808		ret = ext4_read_inline_page(inode, page);
809		if (ret < 0)
810			goto out;
811	}
812
813	ret = __block_write_begin(page, 0, inline_size,
814				  ext4_da_get_block_prep);
815	if (ret) {
816		up_read(&EXT4_I(inode)->xattr_sem);
817		unlock_page(page);
818		page_cache_release(page);
819		ext4_truncate_failed_write(inode);
820		return ret;
821	}
822
823	SetPageDirty(page);
824	SetPageUptodate(page);
825	ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
826	*fsdata = (void *)CONVERT_INLINE_DATA;
827
828out:
829	up_read(&EXT4_I(inode)->xattr_sem);
830	if (page) {
831		unlock_page(page);
832		page_cache_release(page);
833	}
834	return ret;
835}
836
837/*
838 * Prepare the write for the inline data.
839 * If the the data can be written into the inode, we just read
840 * the page and make it uptodate, and start the journal.
841 * Otherwise read the page, makes it dirty so that it can be
842 * handle in writepages(the i_disksize update is left to the
843 * normal ext4_da_write_end).
844 */
845int ext4_da_write_inline_data_begin(struct address_space *mapping,
846				    struct inode *inode,
847				    loff_t pos, unsigned len,
848				    unsigned flags,
849				    struct page **pagep,
850				    void **fsdata)
851{
852	int ret, inline_size;
853	handle_t *handle;
854	struct page *page;
855	struct ext4_iloc iloc;
856	int retries;
857
858	ret = ext4_get_inode_loc(inode, &iloc);
859	if (ret)
860		return ret;
861
862retry_journal:
863	handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
864	if (IS_ERR(handle)) {
865		ret = PTR_ERR(handle);
866		goto out;
867	}
868
869	inline_size = ext4_get_max_inline_size(inode);
870
871	ret = -ENOSPC;
872	if (inline_size >= pos + len) {
873		ret = ext4_prepare_inline_data(handle, inode, pos + len);
874		if (ret && ret != -ENOSPC)
875			goto out_journal;
876	}
877
878	/*
879	 * We cannot recurse into the filesystem as the transaction
880	 * is already started.
881	 */
882	flags |= AOP_FLAG_NOFS;
883
884	if (ret == -ENOSPC) {
885		ret = ext4_da_convert_inline_data_to_extent(mapping,
886							    inode,
887							    flags,
888							    fsdata);
889		ext4_journal_stop(handle);
890		if (ret == -ENOSPC &&
891		    ext4_should_retry_alloc(inode->i_sb, &retries))
892			goto retry_journal;
893		goto out;
894	}
895
896
897	page = grab_cache_page_write_begin(mapping, 0, flags);
898	if (!page) {
899		ret = -ENOMEM;
900		goto out_journal;
901	}
902
903	down_read(&EXT4_I(inode)->xattr_sem);
904	if (!ext4_has_inline_data(inode)) {
905		ret = 0;
906		goto out_release_page;
907	}
908
909	if (!PageUptodate(page)) {
910		ret = ext4_read_inline_page(inode, page);
911		if (ret < 0)
912			goto out_release_page;
913	}
914
915	up_read(&EXT4_I(inode)->xattr_sem);
916	*pagep = page;
917	brelse(iloc.bh);
918	return 1;
919out_release_page:
920	up_read(&EXT4_I(inode)->xattr_sem);
921	unlock_page(page);
922	page_cache_release(page);
923out_journal:
924	ext4_journal_stop(handle);
925out:
926	brelse(iloc.bh);
927	return ret;
928}
929
930int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
931				  unsigned len, unsigned copied,
932				  struct page *page)
933{
934	int i_size_changed = 0;
935
936	copied = ext4_write_inline_data_end(inode, pos, len, copied, page);
937
938	/*
939	 * No need to use i_size_read() here, the i_size
940	 * cannot change under us because we hold i_mutex.
941	 *
942	 * But it's important to update i_size while still holding page lock:
943	 * page writeout could otherwise come in and zero beyond i_size.
944	 */
945	if (pos+copied > inode->i_size) {
946		i_size_write(inode, pos+copied);
947		i_size_changed = 1;
948	}
949	unlock_page(page);
950	page_cache_release(page);
951
952	/*
953	 * Don't mark the inode dirty under page lock. First, it unnecessarily
954	 * makes the holding time of page lock longer. Second, it forces lock
955	 * ordering of page lock and transaction start for journaling
956	 * filesystems.
957	 */
958	if (i_size_changed)
959		mark_inode_dirty(inode);
960
961	return copied;
962}
963
964#ifdef INLINE_DIR_DEBUG
965void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
966			  void *inline_start, int inline_size)
967{
968	int offset;
969	unsigned short de_len;
970	struct ext4_dir_entry_2 *de = inline_start;
971	void *dlimit = inline_start + inline_size;
972
973	trace_printk("inode %lu\n", dir->i_ino);
974	offset = 0;
975	while ((void *)de < dlimit) {
976		de_len = ext4_rec_len_from_disk(de->rec_len, inline_size);
977		trace_printk("de: off %u rlen %u name %.*s nlen %u ino %u\n",
978			     offset, de_len, de->name_len, de->name,
979			     de->name_len, le32_to_cpu(de->inode));
980		if (ext4_check_dir_entry(dir, NULL, de, bh,
981					 inline_start, inline_size, offset))
982			BUG();
983
984		offset += de_len;
985		de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
986	}
987}
988#else
989#define ext4_show_inline_dir(dir, bh, inline_start, inline_size)
990#endif
991
992/*
993 * Add a new entry into a inline dir.
994 * It will return -ENOSPC if no space is available, and -EIO
995 * and -EEXIST if directory entry already exists.
996 */
997static int ext4_add_dirent_to_inline(handle_t *handle,
998				     struct dentry *dentry,
999				     struct inode *inode,
1000				     struct ext4_iloc *iloc,
1001				     void *inline_start, int inline_size)
1002{
1003	struct inode	*dir = d_inode(dentry->d_parent);
1004	const char	*name = dentry->d_name.name;
1005	int		namelen = dentry->d_name.len;
1006	int		err;
1007	struct ext4_dir_entry_2 *de;
1008
1009	err = ext4_find_dest_de(dir, inode, iloc->bh,
1010				inline_start, inline_size,
1011				name, namelen, &de);
1012	if (err)
1013		return err;
1014
1015	BUFFER_TRACE(iloc->bh, "get_write_access");
1016	err = ext4_journal_get_write_access(handle, iloc->bh);
1017	if (err)
1018		return err;
1019	ext4_insert_dentry(dir, inode, de, inline_size, &dentry->d_name,
1020			   name, namelen);
1021
1022	ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
1023
1024	/*
1025	 * XXX shouldn't update any times until successful
1026	 * completion of syscall, but too many callers depend
1027	 * on this.
1028	 *
1029	 * XXX similarly, too many callers depend on
1030	 * ext4_new_inode() setting the times, but error
1031	 * recovery deletes the inode, so the worst that can
1032	 * happen is that the times are slightly out of date
1033	 * and/or different from the directory change time.
1034	 */
1035	dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
1036	ext4_update_dx_flag(dir);
1037	dir->i_version++;
1038	ext4_mark_inode_dirty(handle, dir);
1039	return 1;
1040}
1041
1042static void *ext4_get_inline_xattr_pos(struct inode *inode,
1043				       struct ext4_iloc *iloc)
1044{
1045	struct ext4_xattr_entry *entry;
1046	struct ext4_xattr_ibody_header *header;
1047
1048	BUG_ON(!EXT4_I(inode)->i_inline_off);
1049
1050	header = IHDR(inode, ext4_raw_inode(iloc));
1051	entry = (struct ext4_xattr_entry *)((void *)ext4_raw_inode(iloc) +
1052					    EXT4_I(inode)->i_inline_off);
1053
1054	return (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs);
1055}
1056
1057/* Set the final de to cover the whole block. */
1058static void ext4_update_final_de(void *de_buf, int old_size, int new_size)
1059{
1060	struct ext4_dir_entry_2 *de, *prev_de;
1061	void *limit;
1062	int de_len;
1063
1064	de = (struct ext4_dir_entry_2 *)de_buf;
1065	if (old_size) {
1066		limit = de_buf + old_size;
1067		do {
1068			prev_de = de;
1069			de_len = ext4_rec_len_from_disk(de->rec_len, old_size);
1070			de_buf += de_len;
1071			de = (struct ext4_dir_entry_2 *)de_buf;
1072		} while (de_buf < limit);
1073
1074		prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size -
1075							old_size, new_size);
1076	} else {
1077		/* this is just created, so create an empty entry. */
1078		de->inode = 0;
1079		de->rec_len = ext4_rec_len_to_disk(new_size, new_size);
1080	}
1081}
1082
1083static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
1084				  struct ext4_iloc *iloc)
1085{
1086	int ret;
1087	int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE;
1088	int new_size = get_max_inline_xattr_value_size(dir, iloc);
1089
1090	if (new_size - old_size <= EXT4_DIR_REC_LEN(1))
1091		return -ENOSPC;
1092
1093	ret = ext4_update_inline_data(handle, dir,
1094				      new_size + EXT4_MIN_INLINE_DATA_SIZE);
1095	if (ret)
1096		return ret;
1097
1098	ext4_update_final_de(ext4_get_inline_xattr_pos(dir, iloc), old_size,
1099			     EXT4_I(dir)->i_inline_size -
1100						EXT4_MIN_INLINE_DATA_SIZE);
1101	dir->i_size = EXT4_I(dir)->i_disksize = EXT4_I(dir)->i_inline_size;
1102	return 0;
1103}
1104
1105static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
1106				     struct ext4_iloc *iloc,
1107				     void *buf, int inline_size)
1108{
1109	ext4_create_inline_data(handle, inode, inline_size);
1110	ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
1111	ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
1112}
1113
1114static int ext4_finish_convert_inline_dir(handle_t *handle,
1115					  struct inode *inode,
1116					  struct buffer_head *dir_block,
1117					  void *buf,
1118					  int inline_size)
1119{
1120	int err, csum_size = 0, header_size = 0;
1121	struct ext4_dir_entry_2 *de;
1122	struct ext4_dir_entry_tail *t;
1123	void *target = dir_block->b_data;
1124
1125	/*
1126	 * First create "." and ".." and then copy the dir information
1127	 * back to the block.
1128	 */
1129	de = (struct ext4_dir_entry_2 *)target;
1130	de = ext4_init_dot_dotdot(inode, de,
1131		inode->i_sb->s_blocksize, csum_size,
1132		le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1);
1133	header_size = (void *)de - target;
1134
1135	memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE,
1136		inline_size - EXT4_INLINE_DOTDOT_SIZE);
1137
1138	if (ext4_has_metadata_csum(inode->i_sb))
1139		csum_size = sizeof(struct ext4_dir_entry_tail);
1140
1141	inode->i_size = inode->i_sb->s_blocksize;
1142	i_size_write(inode, inode->i_sb->s_blocksize);
1143	EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
1144	ext4_update_final_de(dir_block->b_data,
1145			inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size,
1146			inode->i_sb->s_blocksize - csum_size);
1147
1148	if (csum_size) {
1149		t = EXT4_DIRENT_TAIL(dir_block->b_data,
1150				     inode->i_sb->s_blocksize);
1151		initialize_dirent_tail(t, inode->i_sb->s_blocksize);
1152	}
1153	set_buffer_uptodate(dir_block);
1154	err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
1155	if (err)
1156		goto out;
1157	set_buffer_verified(dir_block);
1158out:
1159	return err;
1160}
1161
1162static int ext4_convert_inline_data_nolock(handle_t *handle,
1163					   struct inode *inode,
1164					   struct ext4_iloc *iloc)
1165{
1166	int error;
1167	void *buf = NULL;
1168	struct buffer_head *data_bh = NULL;
1169	struct ext4_map_blocks map;
1170	int inline_size;
1171
1172	inline_size = ext4_get_inline_size(inode);
1173	buf = kmalloc(inline_size, GFP_NOFS);
1174	if (!buf) {
1175		error = -ENOMEM;
1176		goto out;
1177	}
1178
1179	error = ext4_read_inline_data(inode, buf, inline_size, iloc);
1180	if (error < 0)
1181		goto out;
1182
1183	/*
1184	 * Make sure the inline directory entries pass checks before we try to
1185	 * convert them, so that we avoid touching stuff that needs fsck.
1186	 */
1187	if (S_ISDIR(inode->i_mode)) {
1188		error = ext4_check_all_de(inode, iloc->bh,
1189					buf + EXT4_INLINE_DOTDOT_SIZE,
1190					inline_size - EXT4_INLINE_DOTDOT_SIZE);
1191		if (error)
1192			goto out;
1193	}
1194
1195	error = ext4_destroy_inline_data_nolock(handle, inode);
1196	if (error)
1197		goto out;
1198
1199	map.m_lblk = 0;
1200	map.m_len = 1;
1201	map.m_flags = 0;
1202	error = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_CREATE);
1203	if (error < 0)
1204		goto out_restore;
1205	if (!(map.m_flags & EXT4_MAP_MAPPED)) {
1206		error = -EIO;
1207		goto out_restore;
1208	}
1209
1210	data_bh = sb_getblk(inode->i_sb, map.m_pblk);
1211	if (!data_bh) {
1212		error = -ENOMEM;
1213		goto out_restore;
1214	}
1215
1216	lock_buffer(data_bh);
1217	error = ext4_journal_get_create_access(handle, data_bh);
1218	if (error) {
1219		unlock_buffer(data_bh);
1220		error = -EIO;
1221		goto out_restore;
1222	}
1223	memset(data_bh->b_data, 0, inode->i_sb->s_blocksize);
1224
1225	if (!S_ISDIR(inode->i_mode)) {
1226		memcpy(data_bh->b_data, buf, inline_size);
1227		set_buffer_uptodate(data_bh);
1228		error = ext4_handle_dirty_metadata(handle,
1229						   inode, data_bh);
1230	} else {
1231		error = ext4_finish_convert_inline_dir(handle, inode, data_bh,
1232						       buf, inline_size);
1233	}
1234
1235	unlock_buffer(data_bh);
1236out_restore:
1237	if (error)
1238		ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
1239
1240out:
1241	brelse(data_bh);
1242	kfree(buf);
1243	return error;
1244}
1245
1246/*
1247 * Try to add the new entry to the inline data.
1248 * If succeeds, return 0. If not, extended the inline dir and copied data to
1249 * the new created block.
1250 */
1251int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
1252			      struct inode *inode)
1253{
1254	int ret, inline_size;
1255	void *inline_start;
1256	struct ext4_iloc iloc;
1257	struct inode *dir = d_inode(dentry->d_parent);
1258
1259	ret = ext4_get_inode_loc(dir, &iloc);
1260	if (ret)
1261		return ret;
1262
1263	down_write(&EXT4_I(dir)->xattr_sem);
1264	if (!ext4_has_inline_data(dir))
1265		goto out;
1266
1267	inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
1268						 EXT4_INLINE_DOTDOT_SIZE;
1269	inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
1270
1271	ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
1272					inline_start, inline_size);
1273	if (ret != -ENOSPC)
1274		goto out;
1275
1276	/* check whether it can be inserted to inline xattr space. */
1277	inline_size = EXT4_I(dir)->i_inline_size -
1278			EXT4_MIN_INLINE_DATA_SIZE;
1279	if (!inline_size) {
1280		/* Try to use the xattr space.*/
1281		ret = ext4_update_inline_dir(handle, dir, &iloc);
1282		if (ret && ret != -ENOSPC)
1283			goto out;
1284
1285		inline_size = EXT4_I(dir)->i_inline_size -
1286				EXT4_MIN_INLINE_DATA_SIZE;
1287	}
1288
1289	if (inline_size) {
1290		inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
1291
1292		ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
1293						inline_start, inline_size);
1294
1295		if (ret != -ENOSPC)
1296			goto out;
1297	}
1298
1299	/*
1300	 * The inline space is filled up, so create a new block for it.
1301	 * As the extent tree will be created, we have to save the inline
1302	 * dir first.
1303	 */
1304	ret = ext4_convert_inline_data_nolock(handle, dir, &iloc);
1305
1306out:
1307	ext4_mark_inode_dirty(handle, dir);
1308	up_write(&EXT4_I(dir)->xattr_sem);
1309	brelse(iloc.bh);
1310	return ret;
1311}
1312
1313/*
1314 * This function fills a red-black tree with information from an
1315 * inlined dir.  It returns the number directory entries loaded
1316 * into the tree.  If there is an error it is returned in err.
1317 */
1318int htree_inlinedir_to_tree(struct file *dir_file,
1319			    struct inode *dir, ext4_lblk_t block,
1320			    struct dx_hash_info *hinfo,
1321			    __u32 start_hash, __u32 start_minor_hash,
1322			    int *has_inline_data)
1323{
1324	int err = 0, count = 0;
1325	unsigned int parent_ino;
1326	int pos;
1327	struct ext4_dir_entry_2 *de;
1328	struct inode *inode = file_inode(dir_file);
1329	int ret, inline_size = 0;
1330	struct ext4_iloc iloc;
1331	void *dir_buf = NULL;
1332	struct ext4_dir_entry_2 fake;
1333	struct ext4_str tmp_str;
1334
1335	ret = ext4_get_inode_loc(inode, &iloc);
1336	if (ret)
1337		return ret;
1338
1339	down_read(&EXT4_I(inode)->xattr_sem);
1340	if (!ext4_has_inline_data(inode)) {
1341		up_read(&EXT4_I(inode)->xattr_sem);
1342		*has_inline_data = 0;
1343		goto out;
1344	}
1345
1346	inline_size = ext4_get_inline_size(inode);
1347	dir_buf = kmalloc(inline_size, GFP_NOFS);
1348	if (!dir_buf) {
1349		ret = -ENOMEM;
1350		up_read(&EXT4_I(inode)->xattr_sem);
1351		goto out;
1352	}
1353
1354	ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc);
1355	up_read(&EXT4_I(inode)->xattr_sem);
1356	if (ret < 0)
1357		goto out;
1358
1359	pos = 0;
1360	parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode);
1361	while (pos < inline_size) {
1362		/*
1363		 * As inlined dir doesn't store any information about '.' and
1364		 * only the inode number of '..' is stored, we have to handle
1365		 * them differently.
1366		 */
1367		if (pos == 0) {
1368			fake.inode = cpu_to_le32(inode->i_ino);
1369			fake.name_len = 1;
1370			strcpy(fake.name, ".");
1371			fake.rec_len = ext4_rec_len_to_disk(
1372						EXT4_DIR_REC_LEN(fake.name_len),
1373						inline_size);
1374			ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
1375			de = &fake;
1376			pos = EXT4_INLINE_DOTDOT_OFFSET;
1377		} else if (pos == EXT4_INLINE_DOTDOT_OFFSET) {
1378			fake.inode = cpu_to_le32(parent_ino);
1379			fake.name_len = 2;
1380			strcpy(fake.name, "..");
1381			fake.rec_len = ext4_rec_len_to_disk(
1382						EXT4_DIR_REC_LEN(fake.name_len),
1383						inline_size);
1384			ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
1385			de = &fake;
1386			pos = EXT4_INLINE_DOTDOT_SIZE;
1387		} else {
1388			de = (struct ext4_dir_entry_2 *)(dir_buf + pos);
1389			pos += ext4_rec_len_from_disk(de->rec_len, inline_size);
1390			if (ext4_check_dir_entry(inode, dir_file, de,
1391					 iloc.bh, dir_buf,
1392					 inline_size, pos)) {
1393				ret = count;
1394				goto out;
1395			}
1396		}
1397
1398		ext4fs_dirhash(de->name, de->name_len, hinfo);
1399		if ((hinfo->hash < start_hash) ||
1400		    ((hinfo->hash == start_hash) &&
1401		     (hinfo->minor_hash < start_minor_hash)))
1402			continue;
1403		if (de->inode == 0)
1404			continue;
1405		tmp_str.name = de->name;
1406		tmp_str.len = de->name_len;
1407		err = ext4_htree_store_dirent(dir_file, hinfo->hash,
1408					      hinfo->minor_hash, de, &tmp_str);
1409		if (err) {
1410			count = err;
1411			goto out;
1412		}
1413		count++;
1414	}
1415	ret = count;
1416out:
1417	kfree(dir_buf);
1418	brelse(iloc.bh);
1419	return ret;
1420}
1421
1422/*
1423 * So this function is called when the volume is mkfsed with
1424 * dir_index disabled. In order to keep f_pos persistent
1425 * after we convert from an inlined dir to a blocked based,
1426 * we just pretend that we are a normal dir and return the
1427 * offset as if '.' and '..' really take place.
1428 *
1429 */
1430int ext4_read_inline_dir(struct file *file,
1431			 struct dir_context *ctx,
1432			 int *has_inline_data)
1433{
1434	unsigned int offset, parent_ino;
1435	int i;
1436	struct ext4_dir_entry_2 *de;
1437	struct super_block *sb;
1438	struct inode *inode = file_inode(file);
1439	int ret, inline_size = 0;
1440	struct ext4_iloc iloc;
1441	void *dir_buf = NULL;
1442	int dotdot_offset, dotdot_size, extra_offset, extra_size;
1443
1444	ret = ext4_get_inode_loc(inode, &iloc);
1445	if (ret)
1446		return ret;
1447
1448	down_read(&EXT4_I(inode)->xattr_sem);
1449	if (!ext4_has_inline_data(inode)) {
1450		up_read(&EXT4_I(inode)->xattr_sem);
1451		*has_inline_data = 0;
1452		goto out;
1453	}
1454
1455	inline_size = ext4_get_inline_size(inode);
1456	dir_buf = kmalloc(inline_size, GFP_NOFS);
1457	if (!dir_buf) {
1458		ret = -ENOMEM;
1459		up_read(&EXT4_I(inode)->xattr_sem);
1460		goto out;
1461	}
1462
1463	ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc);
1464	up_read(&EXT4_I(inode)->xattr_sem);
1465	if (ret < 0)
1466		goto out;
1467
1468	ret = 0;
1469	sb = inode->i_sb;
1470	parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode);
1471	offset = ctx->pos;
1472
1473	/*
1474	 * dotdot_offset and dotdot_size is the real offset and
1475	 * size for ".." and "." if the dir is block based while
1476	 * the real size for them are only EXT4_INLINE_DOTDOT_SIZE.
1477	 * So we will use extra_offset and extra_size to indicate them
1478	 * during the inline dir iteration.
1479	 */
1480	dotdot_offset = EXT4_DIR_REC_LEN(1);
1481	dotdot_size = dotdot_offset + EXT4_DIR_REC_LEN(2);
1482	extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE;
1483	extra_size = extra_offset + inline_size;
1484
1485	/*
1486	 * If the version has changed since the last call to
1487	 * readdir(2), then we might be pointing to an invalid
1488	 * dirent right now.  Scan from the start of the inline
1489	 * dir to make sure.
1490	 */
1491	if (file->f_version != inode->i_version) {
1492		for (i = 0; i < extra_size && i < offset;) {
1493			/*
1494			 * "." is with offset 0 and
1495			 * ".." is dotdot_offset.
1496			 */
1497			if (!i) {
1498				i = dotdot_offset;
1499				continue;
1500			} else if (i == dotdot_offset) {
1501				i = dotdot_size;
1502				continue;
1503			}
1504			/* for other entry, the real offset in
1505			 * the buf has to be tuned accordingly.
1506			 */
1507			de = (struct ext4_dir_entry_2 *)
1508				(dir_buf + i - extra_offset);
1509			/* It's too expensive to do a full
1510			 * dirent test each time round this
1511			 * loop, but we do have to test at
1512			 * least that it is non-zero.  A
1513			 * failure will be detected in the
1514			 * dirent test below. */
1515			if (ext4_rec_len_from_disk(de->rec_len, extra_size)
1516				< EXT4_DIR_REC_LEN(1))
1517				break;
1518			i += ext4_rec_len_from_disk(de->rec_len,
1519						    extra_size);
1520		}
1521		offset = i;
1522		ctx->pos = offset;
1523		file->f_version = inode->i_version;
1524	}
1525
1526	while (ctx->pos < extra_size) {
1527		if (ctx->pos == 0) {
1528			if (!dir_emit(ctx, ".", 1, inode->i_ino, DT_DIR))
1529				goto out;
1530			ctx->pos = dotdot_offset;
1531			continue;
1532		}
1533
1534		if (ctx->pos == dotdot_offset) {
1535			if (!dir_emit(ctx, "..", 2, parent_ino, DT_DIR))
1536				goto out;
1537			ctx->pos = dotdot_size;
1538			continue;
1539		}
1540
1541		de = (struct ext4_dir_entry_2 *)
1542			(dir_buf + ctx->pos - extra_offset);
1543		if (ext4_check_dir_entry(inode, file, de, iloc.bh, dir_buf,
1544					 extra_size, ctx->pos))
1545			goto out;
1546		if (le32_to_cpu(de->inode)) {
1547			if (!dir_emit(ctx, de->name, de->name_len,
1548				      le32_to_cpu(de->inode),
1549				      get_dtype(sb, de->file_type)))
1550				goto out;
1551		}
1552		ctx->pos += ext4_rec_len_from_disk(de->rec_len, extra_size);
1553	}
1554out:
1555	kfree(dir_buf);
1556	brelse(iloc.bh);
1557	return ret;
1558}
1559
1560struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
1561					struct ext4_dir_entry_2 **parent_de,
1562					int *retval)
1563{
1564	struct ext4_iloc iloc;
1565
1566	*retval = ext4_get_inode_loc(inode, &iloc);
1567	if (*retval)
1568		return NULL;
1569
1570	*parent_de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block;
1571
1572	return iloc.bh;
1573}
1574
1575/*
1576 * Try to create the inline data for the new dir.
1577 * If it succeeds, return 0, otherwise return the error.
1578 * In case of ENOSPC, the caller should create the normal disk layout dir.
1579 */
1580int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent,
1581			       struct inode *inode)
1582{
1583	int ret, inline_size = EXT4_MIN_INLINE_DATA_SIZE;
1584	struct ext4_iloc iloc;
1585	struct ext4_dir_entry_2 *de;
1586
1587	ret = ext4_get_inode_loc(inode, &iloc);
1588	if (ret)
1589		return ret;
1590
1591	ret = ext4_prepare_inline_data(handle, inode, inline_size);
1592	if (ret)
1593		goto out;
1594
1595	/*
1596	 * For inline dir, we only save the inode information for the ".."
1597	 * and create a fake dentry to cover the left space.
1598	 */
1599	de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block;
1600	de->inode = cpu_to_le32(parent->i_ino);
1601	de = (struct ext4_dir_entry_2 *)((void *)de + EXT4_INLINE_DOTDOT_SIZE);
1602	de->inode = 0;
1603	de->rec_len = ext4_rec_len_to_disk(
1604				inline_size - EXT4_INLINE_DOTDOT_SIZE,
1605				inline_size);
1606	set_nlink(inode, 2);
1607	inode->i_size = EXT4_I(inode)->i_disksize = inline_size;
1608out:
1609	brelse(iloc.bh);
1610	return ret;
1611}
1612
1613struct buffer_head *ext4_find_inline_entry(struct inode *dir,
1614					const struct qstr *d_name,
1615					struct ext4_dir_entry_2 **res_dir,
1616					int *has_inline_data)
1617{
1618	int ret;
1619	struct ext4_iloc iloc;
1620	void *inline_start;
1621	int inline_size;
1622
1623	if (ext4_get_inode_loc(dir, &iloc))
1624		return NULL;
1625
1626	down_read(&EXT4_I(dir)->xattr_sem);
1627	if (!ext4_has_inline_data(dir)) {
1628		*has_inline_data = 0;
1629		goto out;
1630	}
1631
1632	inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
1633						EXT4_INLINE_DOTDOT_SIZE;
1634	inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
1635	ret = search_dir(iloc.bh, inline_start, inline_size,
1636			 dir, d_name, 0, res_dir);
1637	if (ret == 1)
1638		goto out_find;
1639	if (ret < 0)
1640		goto out;
1641
1642	if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE)
1643		goto out;
1644
1645	inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
1646	inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;
1647
1648	ret = search_dir(iloc.bh, inline_start, inline_size,
1649			 dir, d_name, 0, res_dir);
1650	if (ret == 1)
1651		goto out_find;
1652
1653out:
1654	brelse(iloc.bh);
1655	iloc.bh = NULL;
1656out_find:
1657	up_read(&EXT4_I(dir)->xattr_sem);
1658	return iloc.bh;
1659}
1660
1661int ext4_delete_inline_entry(handle_t *handle,
1662			     struct inode *dir,
1663			     struct ext4_dir_entry_2 *de_del,
1664			     struct buffer_head *bh,
1665			     int *has_inline_data)
1666{
1667	int err, inline_size;
1668	struct ext4_iloc iloc;
1669	void *inline_start;
1670
1671	err = ext4_get_inode_loc(dir, &iloc);
1672	if (err)
1673		return err;
1674
1675	down_write(&EXT4_I(dir)->xattr_sem);
1676	if (!ext4_has_inline_data(dir)) {
1677		*has_inline_data = 0;
1678		goto out;
1679	}
1680
1681	if ((void *)de_del - ((void *)ext4_raw_inode(&iloc)->i_block) <
1682		EXT4_MIN_INLINE_DATA_SIZE) {
1683		inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
1684					EXT4_INLINE_DOTDOT_SIZE;
1685		inline_size = EXT4_MIN_INLINE_DATA_SIZE -
1686				EXT4_INLINE_DOTDOT_SIZE;
1687	} else {
1688		inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
1689		inline_size = ext4_get_inline_size(dir) -
1690				EXT4_MIN_INLINE_DATA_SIZE;
1691	}
1692
1693	BUFFER_TRACE(bh, "get_write_access");
1694	err = ext4_journal_get_write_access(handle, bh);
1695	if (err)
1696		goto out;
1697
1698	err = ext4_generic_delete_entry(handle, dir, de_del, bh,
1699					inline_start, inline_size, 0);
1700	if (err)
1701		goto out;
1702
1703	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1704	err = ext4_mark_inode_dirty(handle, dir);
1705	if (unlikely(err))
1706		goto out;
1707
1708	ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size);
1709out:
1710	up_write(&EXT4_I(dir)->xattr_sem);
1711	brelse(iloc.bh);
1712	if (err != -ENOENT)
1713		ext4_std_error(dir->i_sb, err);
1714	return err;
1715}
1716
1717/*
1718 * Get the inline dentry at offset.
1719 */
1720static inline struct ext4_dir_entry_2 *
1721ext4_get_inline_entry(struct inode *inode,
1722		      struct ext4_iloc *iloc,
1723		      unsigned int offset,
1724		      void **inline_start,
1725		      int *inline_size)
1726{
1727	void *inline_pos;
1728
1729	BUG_ON(offset > ext4_get_inline_size(inode));
1730
1731	if (offset < EXT4_MIN_INLINE_DATA_SIZE) {
1732		inline_pos = (void *)ext4_raw_inode(iloc)->i_block;
1733		*inline_size = EXT4_MIN_INLINE_DATA_SIZE;
1734	} else {
1735		inline_pos = ext4_get_inline_xattr_pos(inode, iloc);
1736		offset -= EXT4_MIN_INLINE_DATA_SIZE;
1737		*inline_size = ext4_get_inline_size(inode) -
1738				EXT4_MIN_INLINE_DATA_SIZE;
1739	}
1740
1741	if (inline_start)
1742		*inline_start = inline_pos;
1743	return (struct ext4_dir_entry_2 *)(inline_pos + offset);
1744}
1745
1746int empty_inline_dir(struct inode *dir, int *has_inline_data)
1747{
1748	int err, inline_size;
1749	struct ext4_iloc iloc;
1750	void *inline_pos;
1751	unsigned int offset;
1752	struct ext4_dir_entry_2 *de;
1753	int ret = 1;
1754
1755	err = ext4_get_inode_loc(dir, &iloc);
1756	if (err) {
1757		EXT4_ERROR_INODE(dir, "error %d getting inode %lu block",
1758				 err, dir->i_ino);
1759		return 1;
1760	}
1761
1762	down_read(&EXT4_I(dir)->xattr_sem);
1763	if (!ext4_has_inline_data(dir)) {
1764		*has_inline_data = 0;
1765		goto out;
1766	}
1767
1768	de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block;
1769	if (!le32_to_cpu(de->inode)) {
1770		ext4_warning(dir->i_sb,
1771			     "bad inline directory (dir #%lu) - no `..'",
1772			     dir->i_ino);
1773		ret = 1;
1774		goto out;
1775	}
1776
1777	offset = EXT4_INLINE_DOTDOT_SIZE;
1778	while (offset < dir->i_size) {
1779		de = ext4_get_inline_entry(dir, &iloc, offset,
1780					   &inline_pos, &inline_size);
1781		if (ext4_check_dir_entry(dir, NULL, de,
1782					 iloc.bh, inline_pos,
1783					 inline_size, offset)) {
1784			ext4_warning(dir->i_sb,
1785				     "bad inline directory (dir #%lu) - "
1786				     "inode %u, rec_len %u, name_len %d"
1787				     "inline size %d\n",
1788				     dir->i_ino, le32_to_cpu(de->inode),
1789				     le16_to_cpu(de->rec_len), de->name_len,
1790				     inline_size);
1791			ret = 1;
1792			goto out;
1793		}
1794		if (le32_to_cpu(de->inode)) {
1795			ret = 0;
1796			goto out;
1797		}
1798		offset += ext4_rec_len_from_disk(de->rec_len, inline_size);
1799	}
1800
1801out:
1802	up_read(&EXT4_I(dir)->xattr_sem);
1803	brelse(iloc.bh);
1804	return ret;
1805}
1806
1807int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
1808{
1809	int ret;
1810
1811	down_write(&EXT4_I(inode)->xattr_sem);
1812	ret = ext4_destroy_inline_data_nolock(handle, inode);
1813	up_write(&EXT4_I(inode)->xattr_sem);
1814
1815	return ret;
1816}
1817
1818int ext4_inline_data_fiemap(struct inode *inode,
1819			    struct fiemap_extent_info *fieinfo,
1820			    int *has_inline, __u64 start, __u64 len)
1821{
1822	__u64 physical = 0;
1823	__u64 inline_len;
1824	__u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED |
1825		FIEMAP_EXTENT_LAST;
1826	int error = 0;
1827	struct ext4_iloc iloc;
1828
1829	down_read(&EXT4_I(inode)->xattr_sem);
1830	if (!ext4_has_inline_data(inode)) {
1831		*has_inline = 0;
1832		goto out;
1833	}
1834	inline_len = min_t(size_t, ext4_get_inline_size(inode),
1835			   i_size_read(inode));
1836	if (start >= inline_len)
1837		goto out;
1838	if (start + len < inline_len)
1839		inline_len = start + len;
1840	inline_len -= start;
1841
1842	error = ext4_get_inode_loc(inode, &iloc);
1843	if (error)
1844		goto out;
1845
1846	physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits;
1847	physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
1848	physical += offsetof(struct ext4_inode, i_block);
1849
1850	if (physical)
1851		error = fiemap_fill_next_extent(fieinfo, start, physical,
1852						inline_len, flags);
1853	brelse(iloc.bh);
1854out:
1855	up_read(&EXT4_I(inode)->xattr_sem);
1856	return (error < 0 ? error : 0);
1857}
1858
1859/*
1860 * Called during xattr set, and if we can sparse space 'needed',
1861 * just create the extent tree evict the data to the outer block.
1862 *
1863 * We use jbd2 instead of page cache to move data to the 1st block
1864 * so that the whole transaction can be committed as a whole and
1865 * the data isn't lost because of the delayed page cache write.
1866 */
1867int ext4_try_to_evict_inline_data(handle_t *handle,
1868				  struct inode *inode,
1869				  int needed)
1870{
1871	int error;
1872	struct ext4_xattr_entry *entry;
1873	struct ext4_inode *raw_inode;
1874	struct ext4_iloc iloc;
1875
1876	error = ext4_get_inode_loc(inode, &iloc);
1877	if (error)
1878		return error;
1879
1880	raw_inode = ext4_raw_inode(&iloc);
1881	entry = (struct ext4_xattr_entry *)((void *)raw_inode +
1882					    EXT4_I(inode)->i_inline_off);
1883	if (EXT4_XATTR_LEN(entry->e_name_len) +
1884	    EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) {
1885		error = -ENOSPC;
1886		goto out;
1887	}
1888
1889	error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
1890out:
1891	brelse(iloc.bh);
1892	return error;
1893}
1894
1895void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
1896{
1897	handle_t *handle;
1898	int inline_size, value_len, needed_blocks;
1899	size_t i_size;
1900	void *value = NULL;
1901	struct ext4_xattr_ibody_find is = {
1902		.s = { .not_found = -ENODATA, },
1903	};
1904	struct ext4_xattr_info i = {
1905		.name_index = EXT4_XATTR_INDEX_SYSTEM,
1906		.name = EXT4_XATTR_SYSTEM_DATA,
1907	};
1908
1909
1910	needed_blocks = ext4_writepage_trans_blocks(inode);
1911	handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks);
1912	if (IS_ERR(handle))
1913		return;
1914
1915	down_write(&EXT4_I(inode)->xattr_sem);
1916	if (!ext4_has_inline_data(inode)) {
1917		*has_inline = 0;
1918		ext4_journal_stop(handle);
1919		return;
1920	}
1921
1922	if (ext4_orphan_add(handle, inode))
1923		goto out;
1924
1925	if (ext4_get_inode_loc(inode, &is.iloc))
1926		goto out;
1927
1928	down_write(&EXT4_I(inode)->i_data_sem);
1929	i_size = inode->i_size;
1930	inline_size = ext4_get_inline_size(inode);
1931	EXT4_I(inode)->i_disksize = i_size;
1932
1933	if (i_size < inline_size) {
1934		/* Clear the content in the xattr space. */
1935		if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) {
1936			if (ext4_xattr_ibody_find(inode, &i, &is))
1937				goto out_error;
1938
1939			BUG_ON(is.s.not_found);
1940
1941			value_len = le32_to_cpu(is.s.here->e_value_size);
1942			value = kmalloc(value_len, GFP_NOFS);
1943			if (!value)
1944				goto out_error;
1945
1946			if (ext4_xattr_ibody_get(inode, i.name_index, i.name,
1947						value, value_len))
1948				goto out_error;
1949
1950			i.value = value;
1951			i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ?
1952					i_size - EXT4_MIN_INLINE_DATA_SIZE : 0;
1953			if (ext4_xattr_ibody_inline_set(handle, inode, &i, &is))
1954				goto out_error;
1955		}
1956
1957		/* Clear the content within i_blocks. */
1958		if (i_size < EXT4_MIN_INLINE_DATA_SIZE) {
1959			void *p = (void *) ext4_raw_inode(&is.iloc)->i_block;
1960			memset(p + i_size, 0,
1961			       EXT4_MIN_INLINE_DATA_SIZE - i_size);
1962		}
1963
1964		EXT4_I(inode)->i_inline_size = i_size <
1965					EXT4_MIN_INLINE_DATA_SIZE ?
1966					EXT4_MIN_INLINE_DATA_SIZE : i_size;
1967	}
1968
1969out_error:
1970	up_write(&EXT4_I(inode)->i_data_sem);
1971out:
1972	brelse(is.iloc.bh);
1973	up_write(&EXT4_I(inode)->xattr_sem);
1974	kfree(value);
1975	if (inode->i_nlink)
1976		ext4_orphan_del(handle, inode);
1977
1978	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
1979	ext4_mark_inode_dirty(handle, inode);
1980	if (IS_SYNC(inode))
1981		ext4_handle_sync(handle);
1982
1983	ext4_journal_stop(handle);
1984	return;
1985}
1986
1987int ext4_convert_inline_data(struct inode *inode)
1988{
1989	int error, needed_blocks;
1990	handle_t *handle;
1991	struct ext4_iloc iloc;
1992
1993	if (!ext4_has_inline_data(inode)) {
1994		ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
1995		return 0;
1996	}
1997
1998	needed_blocks = ext4_writepage_trans_blocks(inode);
1999
2000	iloc.bh = NULL;
2001	error = ext4_get_inode_loc(inode, &iloc);
2002	if (error)
2003		return error;
2004
2005	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
2006	if (IS_ERR(handle)) {
2007		error = PTR_ERR(handle);
2008		goto out_free;
2009	}
2010
2011	down_write(&EXT4_I(inode)->xattr_sem);
2012	if (!ext4_has_inline_data(inode)) {
2013		up_write(&EXT4_I(inode)->xattr_sem);
2014		goto out;
2015	}
2016
2017	error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
2018	up_write(&EXT4_I(inode)->xattr_sem);
2019out:
2020	ext4_journal_stop(handle);
2021out_free:
2022	brelse(iloc.bh);
2023	return error;
2024}
2025