1/*
2 *  linux/fs/hfsplus/btree.c
3 *
4 * Copyright (C) 2001
5 * Brad Boyer (flar@allandria.com)
6 * (C) 2003 Ardis Technologies <roman@ardistech.com>
7 *
8 * Handle opening/closing btree
9 */
10
11#include <linux/slab.h>
12#include <linux/pagemap.h>
13#include <linux/log2.h>
14
15#include "hfsplus_fs.h"
16#include "hfsplus_raw.h"
17
18/*
19 * Initial source code of clump size calculation is gotten
20 * from http://opensource.apple.com/tarballs/diskdev_cmds/
21 */
22#define CLUMP_ENTRIES	15
23
24static short clumptbl[CLUMP_ENTRIES * 3] = {
25/*
26 *	    Volume	Attributes	 Catalog	 Extents
27 *	     Size	Clump (MB)	Clump (MB)	Clump (MB)
28 */
29	/*   1GB */	  4,		  4,		 4,
30	/*   2GB */	  6,		  6,		 4,
31	/*   4GB */	  8,		  8,		 4,
32	/*   8GB */	 11,		 11,		 5,
33	/*
34	 * For volumes 16GB and larger, we want to make sure that a full OS
35	 * install won't require fragmentation of the Catalog or Attributes
36	 * B-trees.  We do this by making the clump sizes sufficiently large,
37	 * and by leaving a gap after the B-trees for them to grow into.
38	 *
39	 * For SnowLeopard 10A298, a FullNetInstall with all packages selected
40	 * results in:
41	 * Catalog B-tree Header
42	 *	nodeSize:          8192
43	 *	totalNodes:       31616
44	 *	freeNodes:         1978
45	 * (used = 231.55 MB)
46	 * Attributes B-tree Header
47	 *	nodeSize:          8192
48	 *	totalNodes:       63232
49	 *	freeNodes:          958
50	 * (used = 486.52 MB)
51	 *
52	 * We also want Time Machine backup volumes to have a sufficiently
53	 * large clump size to reduce fragmentation.
54	 *
55	 * The series of numbers for Catalog and Attribute form a geometric
56	 * series. For Catalog (16GB to 512GB), each term is 8**(1/5) times
57	 * the previous term.  For Attributes (16GB to 512GB), each term is
58	 * 4**(1/5) times the previous term.  For 1TB to 16TB, each term is
59	 * 2**(1/5) times the previous term.
60	 */
61	/*  16GB */	 64,		 32,		 5,
62	/*  32GB */	 84,		 49,		 6,
63	/*  64GB */	111,		 74,		 7,
64	/* 128GB */	147,		111,		 8,
65	/* 256GB */	194,		169,		 9,
66	/* 512GB */	256,		256,		11,
67	/*   1TB */	294,		294,		14,
68	/*   2TB */	338,		338,		16,
69	/*   4TB */	388,		388,		20,
70	/*   8TB */	446,		446,		25,
71	/*  16TB */	512,		512,		32
72};
73
74u32 hfsplus_calc_btree_clump_size(u32 block_size, u32 node_size,
75					u64 sectors, int file_id)
76{
77	u32 mod = max(node_size, block_size);
78	u32 clump_size;
79	int column;
80	int i;
81
82	/* Figure out which column of the above table to use for this file. */
83	switch (file_id) {
84	case HFSPLUS_ATTR_CNID:
85		column = 0;
86		break;
87	case HFSPLUS_CAT_CNID:
88		column = 1;
89		break;
90	default:
91		column = 2;
92		break;
93	}
94
95	/*
96	 * The default clump size is 0.8% of the volume size. And
97	 * it must also be a multiple of the node and block size.
98	 */
99	if (sectors < 0x200000) {
100		clump_size = sectors << 2;	/*  0.8 %  */
101		if (clump_size < (8 * node_size))
102			clump_size = 8 * node_size;
103	} else {
104		/* turn exponent into table index... */
105		for (i = 0, sectors = sectors >> 22;
106		     sectors && (i < CLUMP_ENTRIES - 1);
107		     ++i, sectors = sectors >> 1) {
108			/* empty body */
109		}
110
111		clump_size = clumptbl[column + (i) * 3] * 1024 * 1024;
112	}
113
114	/*
115	 * Round the clump size to a multiple of node and block size.
116	 * NOTE: This rounds down.
117	 */
118	clump_size /= mod;
119	clump_size *= mod;
120
121	/*
122	 * Rounding down could have rounded down to 0 if the block size was
123	 * greater than the clump size.  If so, just use one block or node.
124	 */
125	if (clump_size == 0)
126		clump_size = mod;
127
128	return clump_size;
129}
130
131/* Get a reference to a B*Tree and do some initial checks */
132struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
133{
134	struct hfs_btree *tree;
135	struct hfs_btree_header_rec *head;
136	struct address_space *mapping;
137	struct inode *inode;
138	struct page *page;
139	unsigned int size;
140
141	tree = kzalloc(sizeof(*tree), GFP_KERNEL);
142	if (!tree)
143		return NULL;
144
145	mutex_init(&tree->tree_lock);
146	spin_lock_init(&tree->hash_lock);
147	tree->sb = sb;
148	tree->cnid = id;
149	inode = hfsplus_iget(sb, id);
150	if (IS_ERR(inode))
151		goto free_tree;
152	tree->inode = inode;
153
154	if (!HFSPLUS_I(tree->inode)->first_blocks) {
155		pr_err("invalid btree extent records (0 size)\n");
156		goto free_inode;
157	}
158
159	mapping = tree->inode->i_mapping;
160	page = read_mapping_page(mapping, 0, NULL);
161	if (IS_ERR(page))
162		goto free_inode;
163
164	/* Load the header */
165	head = (struct hfs_btree_header_rec *)(kmap(page) +
166		sizeof(struct hfs_bnode_desc));
167	tree->root = be32_to_cpu(head->root);
168	tree->leaf_count = be32_to_cpu(head->leaf_count);
169	tree->leaf_head = be32_to_cpu(head->leaf_head);
170	tree->leaf_tail = be32_to_cpu(head->leaf_tail);
171	tree->node_count = be32_to_cpu(head->node_count);
172	tree->free_nodes = be32_to_cpu(head->free_nodes);
173	tree->attributes = be32_to_cpu(head->attributes);
174	tree->node_size = be16_to_cpu(head->node_size);
175	tree->max_key_len = be16_to_cpu(head->max_key_len);
176	tree->depth = be16_to_cpu(head->depth);
177
178	/* Verify the tree and set the correct compare function */
179	switch (id) {
180	case HFSPLUS_EXT_CNID:
181		if (tree->max_key_len != HFSPLUS_EXT_KEYLEN - sizeof(u16)) {
182			pr_err("invalid extent max_key_len %d\n",
183				tree->max_key_len);
184			goto fail_page;
185		}
186		if (tree->attributes & HFS_TREE_VARIDXKEYS) {
187			pr_err("invalid extent btree flag\n");
188			goto fail_page;
189		}
190
191		tree->keycmp = hfsplus_ext_cmp_key;
192		break;
193	case HFSPLUS_CAT_CNID:
194		if (tree->max_key_len != HFSPLUS_CAT_KEYLEN - sizeof(u16)) {
195			pr_err("invalid catalog max_key_len %d\n",
196				tree->max_key_len);
197			goto fail_page;
198		}
199		if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) {
200			pr_err("invalid catalog btree flag\n");
201			goto fail_page;
202		}
203
204		if (test_bit(HFSPLUS_SB_HFSX, &HFSPLUS_SB(sb)->flags) &&
205		    (head->key_type == HFSPLUS_KEY_BINARY))
206			tree->keycmp = hfsplus_cat_bin_cmp_key;
207		else {
208			tree->keycmp = hfsplus_cat_case_cmp_key;
209			set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
210		}
211		break;
212	case HFSPLUS_ATTR_CNID:
213		if (tree->max_key_len != HFSPLUS_ATTR_KEYLEN - sizeof(u16)) {
214			pr_err("invalid attributes max_key_len %d\n",
215				tree->max_key_len);
216			goto fail_page;
217		}
218		tree->keycmp = hfsplus_attr_bin_cmp_key;
219		break;
220	default:
221		pr_err("unknown B*Tree requested\n");
222		goto fail_page;
223	}
224
225	if (!(tree->attributes & HFS_TREE_BIGKEYS)) {
226		pr_err("invalid btree flag\n");
227		goto fail_page;
228	}
229
230	size = tree->node_size;
231	if (!is_power_of_2(size))
232		goto fail_page;
233	if (!tree->node_count)
234		goto fail_page;
235
236	tree->node_size_shift = ffs(size) - 1;
237
238	tree->pages_per_bnode =
239		(tree->node_size + PAGE_CACHE_SIZE - 1) >>
240		PAGE_CACHE_SHIFT;
241
242	kunmap(page);
243	page_cache_release(page);
244	return tree;
245
246 fail_page:
247	page_cache_release(page);
248 free_inode:
249	tree->inode->i_mapping->a_ops = &hfsplus_aops;
250	iput(tree->inode);
251 free_tree:
252	kfree(tree);
253	return NULL;
254}
255
256/* Release resources used by a btree */
257void hfs_btree_close(struct hfs_btree *tree)
258{
259	struct hfs_bnode *node;
260	int i;
261
262	if (!tree)
263		return;
264
265	for (i = 0; i < NODE_HASH_SIZE; i++) {
266		while ((node = tree->node_hash[i])) {
267			tree->node_hash[i] = node->next_hash;
268			if (atomic_read(&node->refcnt))
269				pr_crit("node %d:%d "
270						"still has %d user(s)!\n",
271					node->tree->cnid, node->this,
272					atomic_read(&node->refcnt));
273			hfs_bnode_free(node);
274			tree->node_hash_cnt--;
275		}
276	}
277	iput(tree->inode);
278	kfree(tree);
279}
280
281int hfs_btree_write(struct hfs_btree *tree)
282{
283	struct hfs_btree_header_rec *head;
284	struct hfs_bnode *node;
285	struct page *page;
286
287	node = hfs_bnode_find(tree, 0);
288	if (IS_ERR(node))
289		/* panic? */
290		return -EIO;
291	/* Load the header */
292	page = node->page[0];
293	head = (struct hfs_btree_header_rec *)(kmap(page) +
294		sizeof(struct hfs_bnode_desc));
295
296	head->root = cpu_to_be32(tree->root);
297	head->leaf_count = cpu_to_be32(tree->leaf_count);
298	head->leaf_head = cpu_to_be32(tree->leaf_head);
299	head->leaf_tail = cpu_to_be32(tree->leaf_tail);
300	head->node_count = cpu_to_be32(tree->node_count);
301	head->free_nodes = cpu_to_be32(tree->free_nodes);
302	head->attributes = cpu_to_be32(tree->attributes);
303	head->depth = cpu_to_be16(tree->depth);
304
305	kunmap(page);
306	set_page_dirty(page);
307	hfs_bnode_put(node);
308	return 0;
309}
310
311static struct hfs_bnode *hfs_bmap_new_bmap(struct hfs_bnode *prev, u32 idx)
312{
313	struct hfs_btree *tree = prev->tree;
314	struct hfs_bnode *node;
315	struct hfs_bnode_desc desc;
316	__be32 cnid;
317
318	node = hfs_bnode_create(tree, idx);
319	if (IS_ERR(node))
320		return node;
321
322	tree->free_nodes--;
323	prev->next = idx;
324	cnid = cpu_to_be32(idx);
325	hfs_bnode_write(prev, &cnid, offsetof(struct hfs_bnode_desc, next), 4);
326
327	node->type = HFS_NODE_MAP;
328	node->num_recs = 1;
329	hfs_bnode_clear(node, 0, tree->node_size);
330	desc.next = 0;
331	desc.prev = 0;
332	desc.type = HFS_NODE_MAP;
333	desc.height = 0;
334	desc.num_recs = cpu_to_be16(1);
335	desc.reserved = 0;
336	hfs_bnode_write(node, &desc, 0, sizeof(desc));
337	hfs_bnode_write_u16(node, 14, 0x8000);
338	hfs_bnode_write_u16(node, tree->node_size - 2, 14);
339	hfs_bnode_write_u16(node, tree->node_size - 4, tree->node_size - 6);
340
341	return node;
342}
343
344struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
345{
346	struct hfs_bnode *node, *next_node;
347	struct page **pagep;
348	u32 nidx, idx;
349	unsigned off;
350	u16 off16;
351	u16 len;
352	u8 *data, byte, m;
353	int i;
354
355	while (!tree->free_nodes) {
356		struct inode *inode = tree->inode;
357		struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
358		u32 count;
359		int res;
360
361		res = hfsplus_file_extend(inode, hfs_bnode_need_zeroout(tree));
362		if (res)
363			return ERR_PTR(res);
364		hip->phys_size = inode->i_size =
365			(loff_t)hip->alloc_blocks <<
366				HFSPLUS_SB(tree->sb)->alloc_blksz_shift;
367		hip->fs_blocks =
368			hip->alloc_blocks << HFSPLUS_SB(tree->sb)->fs_shift;
369		inode_set_bytes(inode, inode->i_size);
370		count = inode->i_size >> tree->node_size_shift;
371		tree->free_nodes = count - tree->node_count;
372		tree->node_count = count;
373	}
374
375	nidx = 0;
376	node = hfs_bnode_find(tree, nidx);
377	if (IS_ERR(node))
378		return node;
379	len = hfs_brec_lenoff(node, 2, &off16);
380	off = off16;
381
382	off += node->page_offset;
383	pagep = node->page + (off >> PAGE_CACHE_SHIFT);
384	data = kmap(*pagep);
385	off &= ~PAGE_CACHE_MASK;
386	idx = 0;
387
388	for (;;) {
389		while (len) {
390			byte = data[off];
391			if (byte != 0xff) {
392				for (m = 0x80, i = 0; i < 8; m >>= 1, i++) {
393					if (!(byte & m)) {
394						idx += i;
395						data[off] |= m;
396						set_page_dirty(*pagep);
397						kunmap(*pagep);
398						tree->free_nodes--;
399						mark_inode_dirty(tree->inode);
400						hfs_bnode_put(node);
401						return hfs_bnode_create(tree,
402							idx);
403					}
404				}
405			}
406			if (++off >= PAGE_CACHE_SIZE) {
407				kunmap(*pagep);
408				data = kmap(*++pagep);
409				off = 0;
410			}
411			idx += 8;
412			len--;
413		}
414		kunmap(*pagep);
415		nidx = node->next;
416		if (!nidx) {
417			hfs_dbg(BNODE_MOD, "create new bmap node\n");
418			next_node = hfs_bmap_new_bmap(node, idx);
419		} else
420			next_node = hfs_bnode_find(tree, nidx);
421		hfs_bnode_put(node);
422		if (IS_ERR(next_node))
423			return next_node;
424		node = next_node;
425
426		len = hfs_brec_lenoff(node, 0, &off16);
427		off = off16;
428		off += node->page_offset;
429		pagep = node->page + (off >> PAGE_CACHE_SHIFT);
430		data = kmap(*pagep);
431		off &= ~PAGE_CACHE_MASK;
432	}
433}
434
435void hfs_bmap_free(struct hfs_bnode *node)
436{
437	struct hfs_btree *tree;
438	struct page *page;
439	u16 off, len;
440	u32 nidx;
441	u8 *data, byte, m;
442
443	hfs_dbg(BNODE_MOD, "btree_free_node: %u\n", node->this);
444	BUG_ON(!node->this);
445	tree = node->tree;
446	nidx = node->this;
447	node = hfs_bnode_find(tree, 0);
448	if (IS_ERR(node))
449		return;
450	len = hfs_brec_lenoff(node, 2, &off);
451	while (nidx >= len * 8) {
452		u32 i;
453
454		nidx -= len * 8;
455		i = node->next;
456		hfs_bnode_put(node);
457		if (!i) {
458			/* panic */;
459			pr_crit("unable to free bnode %u. "
460					"bmap not found!\n",
461				node->this);
462			return;
463		}
464		node = hfs_bnode_find(tree, i);
465		if (IS_ERR(node))
466			return;
467		if (node->type != HFS_NODE_MAP) {
468			/* panic */;
469			pr_crit("invalid bmap found! "
470					"(%u,%d)\n",
471				node->this, node->type);
472			hfs_bnode_put(node);
473			return;
474		}
475		len = hfs_brec_lenoff(node, 0, &off);
476	}
477	off += node->page_offset + nidx / 8;
478	page = node->page[off >> PAGE_CACHE_SHIFT];
479	data = kmap(page);
480	off &= ~PAGE_CACHE_MASK;
481	m = 1 << (~nidx & 7);
482	byte = data[off];
483	if (!(byte & m)) {
484		pr_crit("trying to free free bnode "
485				"%u(%d)\n",
486			node->this, node->type);
487		kunmap(page);
488		hfs_bnode_put(node);
489		return;
490	}
491	data[off] = byte & ~m;
492	set_page_dirty(page);
493	kunmap(page);
494	hfs_bnode_put(node);
495	tree->free_nodes++;
496	mark_inode_dirty(tree->inode);
497}
498