1/*
2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <linux/slab.h>
13#include <linux/spinlock.h>
14#include <linux/completion.h>
15#include <linux/buffer_head.h>
16#include <linux/fs.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/prefetch.h>
19#include <linux/blkdev.h>
20#include <linux/rbtree.h>
21#include <linux/random.h>
22
23#include "gfs2.h"
24#include "incore.h"
25#include "glock.h"
26#include "glops.h"
27#include "lops.h"
28#include "meta_io.h"
29#include "quota.h"
30#include "rgrp.h"
31#include "super.h"
32#include "trans.h"
33#include "util.h"
34#include "log.h"
35#include "inode.h"
36#include "trace_gfs2.h"
37
38#define BFITNOENT ((u32)~0)
39#define NO_BLOCK ((u64)~0)
40
41#if BITS_PER_LONG == 32
42#define LBITMASK   (0x55555555UL)
43#define LBITSKIP55 (0x55555555UL)
44#define LBITSKIP00 (0x00000000UL)
45#else
46#define LBITMASK   (0x5555555555555555UL)
47#define LBITSKIP55 (0x5555555555555555UL)
48#define LBITSKIP00 (0x0000000000000000UL)
49#endif
50
51/*
52 * These routines are used by the resource group routines (rgrp.c)
53 * to keep track of block allocation.  Each block is represented by two
54 * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
55 *
56 * 0 = Free
57 * 1 = Used (not metadata)
58 * 2 = Unlinked (still in use) inode
59 * 3 = Used (metadata)
60 */
61
62struct gfs2_extent {
63	struct gfs2_rbm rbm;
64	u32 len;
65};
66
67static const char valid_change[16] = {
68	        /* current */
69	/* n */ 0, 1, 1, 1,
70	/* e */ 1, 0, 0, 0,
71	/* w */ 0, 0, 0, 1,
72	        1, 0, 0, 0
73};
74
75static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
76			 const struct gfs2_inode *ip, bool nowrap,
77			 const struct gfs2_alloc_parms *ap);
78
79
80/**
81 * gfs2_setbit - Set a bit in the bitmaps
82 * @rbm: The position of the bit to set
83 * @do_clone: Also set the clone bitmap, if it exists
84 * @new_state: the new state of the block
85 *
86 */
87
88static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
89			       unsigned char new_state)
90{
91	unsigned char *byte1, *byte2, *end, cur_state;
92	struct gfs2_bitmap *bi = rbm_bi(rbm);
93	unsigned int buflen = bi->bi_len;
94	const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
95
96	byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
97	end = bi->bi_bh->b_data + bi->bi_offset + buflen;
98
99	BUG_ON(byte1 >= end);
100
101	cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
102
103	if (unlikely(!valid_change[new_state * 4 + cur_state])) {
104		pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n",
105			rbm->offset, cur_state, new_state);
106		pr_warn("rgrp=0x%llx bi_start=0x%x\n",
107			(unsigned long long)rbm->rgd->rd_addr, bi->bi_start);
108		pr_warn("bi_offset=0x%x bi_len=0x%x\n",
109			bi->bi_offset, bi->bi_len);
110		dump_stack();
111		gfs2_consist_rgrpd(rbm->rgd);
112		return;
113	}
114	*byte1 ^= (cur_state ^ new_state) << bit;
115
116	if (do_clone && bi->bi_clone) {
117		byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
118		cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
119		*byte2 ^= (cur_state ^ new_state) << bit;
120	}
121}
122
123/**
124 * gfs2_testbit - test a bit in the bitmaps
125 * @rbm: The bit to test
126 *
127 * Returns: The two bit block state of the requested bit
128 */
129
130static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
131{
132	struct gfs2_bitmap *bi = rbm_bi(rbm);
133	const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
134	const u8 *byte;
135	unsigned int bit;
136
137	byte = buffer + (rbm->offset / GFS2_NBBY);
138	bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
139
140	return (*byte >> bit) & GFS2_BIT_MASK;
141}
142
143/**
144 * gfs2_bit_search
145 * @ptr: Pointer to bitmap data
146 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
147 * @state: The state we are searching for
148 *
149 * We xor the bitmap data with a patter which is the bitwise opposite
150 * of what we are looking for, this gives rise to a pattern of ones
151 * wherever there is a match. Since we have two bits per entry, we
152 * take this pattern, shift it down by one place and then and it with
153 * the original. All the even bit positions (0,2,4, etc) then represent
154 * successful matches, so we mask with 0x55555..... to remove the unwanted
155 * odd bit positions.
156 *
157 * This allows searching of a whole u64 at once (32 blocks) with a
158 * single test (on 64 bit arches).
159 */
160
161static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
162{
163	u64 tmp;
164	static const u64 search[] = {
165		[0] = 0xffffffffffffffffULL,
166		[1] = 0xaaaaaaaaaaaaaaaaULL,
167		[2] = 0x5555555555555555ULL,
168		[3] = 0x0000000000000000ULL,
169	};
170	tmp = le64_to_cpu(*ptr) ^ search[state];
171	tmp &= (tmp >> 1);
172	tmp &= mask;
173	return tmp;
174}
175
176/**
177 * rs_cmp - multi-block reservation range compare
178 * @blk: absolute file system block number of the new reservation
179 * @len: number of blocks in the new reservation
180 * @rs: existing reservation to compare against
181 *
182 * returns: 1 if the block range is beyond the reach of the reservation
183 *         -1 if the block range is before the start of the reservation
184 *          0 if the block range overlaps with the reservation
185 */
186static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
187{
188	u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
189
190	if (blk >= startblk + rs->rs_free)
191		return 1;
192	if (blk + len - 1 < startblk)
193		return -1;
194	return 0;
195}
196
197/**
198 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
199 *       a block in a given allocation state.
200 * @buf: the buffer that holds the bitmaps
201 * @len: the length (in bytes) of the buffer
202 * @goal: start search at this block's bit-pair (within @buffer)
203 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
204 *
205 * Scope of @goal and returned block number is only within this bitmap buffer,
206 * not entire rgrp or filesystem.  @buffer will be offset from the actual
207 * beginning of a bitmap block buffer, skipping any header structures, but
208 * headers are always a multiple of 64 bits long so that the buffer is
209 * always aligned to a 64 bit boundary.
210 *
211 * The size of the buffer is in bytes, but is it assumed that it is
212 * always ok to read a complete multiple of 64 bits at the end
213 * of the block in case the end is no aligned to a natural boundary.
214 *
215 * Return: the block number (bitmap buffer scope) that was found
216 */
217
218static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
219		       u32 goal, u8 state)
220{
221	u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
222	const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
223	const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
224	u64 tmp;
225	u64 mask = 0x5555555555555555ULL;
226	u32 bit;
227
228	/* Mask off bits we don't care about at the start of the search */
229	mask <<= spoint;
230	tmp = gfs2_bit_search(ptr, mask, state);
231	ptr++;
232	while(tmp == 0 && ptr < end) {
233		tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
234		ptr++;
235	}
236	/* Mask off any bits which are more than len bytes from the start */
237	if (ptr == end && (len & (sizeof(u64) - 1)))
238		tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
239	/* Didn't find anything, so return */
240	if (tmp == 0)
241		return BFITNOENT;
242	ptr--;
243	bit = __ffs64(tmp);
244	bit /= 2;	/* two bits per entry in the bitmap */
245	return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
246}
247
248/**
249 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
250 * @rbm: The rbm with rgd already set correctly
251 * @block: The block number (filesystem relative)
252 *
253 * This sets the bi and offset members of an rbm based on a
254 * resource group and a filesystem relative block number. The
255 * resource group must be set in the rbm on entry, the bi and
256 * offset members will be set by this function.
257 *
258 * Returns: 0 on success, or an error code
259 */
260
261static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
262{
263	u64 rblock = block - rbm->rgd->rd_data0;
264
265	if (WARN_ON_ONCE(rblock > UINT_MAX))
266		return -EINVAL;
267	if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
268		return -E2BIG;
269
270	rbm->bii = 0;
271	rbm->offset = (u32)(rblock);
272	/* Check if the block is within the first block */
273	if (rbm->offset < rbm_bi(rbm)->bi_blocks)
274		return 0;
275
276	/* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
277	rbm->offset += (sizeof(struct gfs2_rgrp) -
278			sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
279	rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
280	rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
281	return 0;
282}
283
284/**
285 * gfs2_rbm_incr - increment an rbm structure
286 * @rbm: The rbm with rgd already set correctly
287 *
288 * This function takes an existing rbm structure and increments it to the next
289 * viable block offset.
290 *
291 * Returns: If incrementing the offset would cause the rbm to go past the
292 *          end of the rgrp, true is returned, otherwise false.
293 *
294 */
295
296static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
297{
298	if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
299		rbm->offset++;
300		return false;
301	}
302	if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
303		return true;
304
305	rbm->offset = 0;
306	rbm->bii++;
307	return false;
308}
309
310/**
311 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
312 * @rbm: Position to search (value/result)
313 * @n_unaligned: Number of unaligned blocks to check
314 * @len: Decremented for each block found (terminate on zero)
315 *
316 * Returns: true if a non-free block is encountered
317 */
318
319static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
320{
321	u32 n;
322	u8 res;
323
324	for (n = 0; n < n_unaligned; n++) {
325		res = gfs2_testbit(rbm);
326		if (res != GFS2_BLKST_FREE)
327			return true;
328		(*len)--;
329		if (*len == 0)
330			return true;
331		if (gfs2_rbm_incr(rbm))
332			return true;
333	}
334
335	return false;
336}
337
338/**
339 * gfs2_free_extlen - Return extent length of free blocks
340 * @rrbm: Starting position
341 * @len: Max length to check
342 *
343 * Starting at the block specified by the rbm, see how many free blocks
344 * there are, not reading more than len blocks ahead. This can be done
345 * using memchr_inv when the blocks are byte aligned, but has to be done
346 * on a block by block basis in case of unaligned blocks. Also this
347 * function can cope with bitmap boundaries (although it must stop on
348 * a resource group boundary)
349 *
350 * Returns: Number of free blocks in the extent
351 */
352
353static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
354{
355	struct gfs2_rbm rbm = *rrbm;
356	u32 n_unaligned = rbm.offset & 3;
357	u32 size = len;
358	u32 bytes;
359	u32 chunk_size;
360	u8 *ptr, *start, *end;
361	u64 block;
362	struct gfs2_bitmap *bi;
363
364	if (n_unaligned &&
365	    gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
366		goto out;
367
368	n_unaligned = len & 3;
369	/* Start is now byte aligned */
370	while (len > 3) {
371		bi = rbm_bi(&rbm);
372		start = bi->bi_bh->b_data;
373		if (bi->bi_clone)
374			start = bi->bi_clone;
375		end = start + bi->bi_bh->b_size;
376		start += bi->bi_offset;
377		BUG_ON(rbm.offset & 3);
378		start += (rbm.offset / GFS2_NBBY);
379		bytes = min_t(u32, len / GFS2_NBBY, (end - start));
380		ptr = memchr_inv(start, 0, bytes);
381		chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
382		chunk_size *= GFS2_NBBY;
383		BUG_ON(len < chunk_size);
384		len -= chunk_size;
385		block = gfs2_rbm_to_block(&rbm);
386		if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
387			n_unaligned = 0;
388			break;
389		}
390		if (ptr) {
391			n_unaligned = 3;
392			break;
393		}
394		n_unaligned = len & 3;
395	}
396
397	/* Deal with any bits left over at the end */
398	if (n_unaligned)
399		gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
400out:
401	return size - len;
402}
403
404/**
405 * gfs2_bitcount - count the number of bits in a certain state
406 * @rgd: the resource group descriptor
407 * @buffer: the buffer that holds the bitmaps
408 * @buflen: the length (in bytes) of the buffer
409 * @state: the state of the block we're looking for
410 *
411 * Returns: The number of bits
412 */
413
414static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
415			 unsigned int buflen, u8 state)
416{
417	const u8 *byte = buffer;
418	const u8 *end = buffer + buflen;
419	const u8 state1 = state << 2;
420	const u8 state2 = state << 4;
421	const u8 state3 = state << 6;
422	u32 count = 0;
423
424	for (; byte < end; byte++) {
425		if (((*byte) & 0x03) == state)
426			count++;
427		if (((*byte) & 0x0C) == state1)
428			count++;
429		if (((*byte) & 0x30) == state2)
430			count++;
431		if (((*byte) & 0xC0) == state3)
432			count++;
433	}
434
435	return count;
436}
437
438/**
439 * gfs2_rgrp_verify - Verify that a resource group is consistent
440 * @rgd: the rgrp
441 *
442 */
443
444void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
445{
446	struct gfs2_sbd *sdp = rgd->rd_sbd;
447	struct gfs2_bitmap *bi = NULL;
448	u32 length = rgd->rd_length;
449	u32 count[4], tmp;
450	int buf, x;
451
452	memset(count, 0, 4 * sizeof(u32));
453
454	/* Count # blocks in each of 4 possible allocation states */
455	for (buf = 0; buf < length; buf++) {
456		bi = rgd->rd_bits + buf;
457		for (x = 0; x < 4; x++)
458			count[x] += gfs2_bitcount(rgd,
459						  bi->bi_bh->b_data +
460						  bi->bi_offset,
461						  bi->bi_len, x);
462	}
463
464	if (count[0] != rgd->rd_free) {
465		if (gfs2_consist_rgrpd(rgd))
466			fs_err(sdp, "free data mismatch:  %u != %u\n",
467			       count[0], rgd->rd_free);
468		return;
469	}
470
471	tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
472	if (count[1] != tmp) {
473		if (gfs2_consist_rgrpd(rgd))
474			fs_err(sdp, "used data mismatch:  %u != %u\n",
475			       count[1], tmp);
476		return;
477	}
478
479	if (count[2] + count[3] != rgd->rd_dinodes) {
480		if (gfs2_consist_rgrpd(rgd))
481			fs_err(sdp, "used metadata mismatch:  %u != %u\n",
482			       count[2] + count[3], rgd->rd_dinodes);
483		return;
484	}
485}
486
487static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
488{
489	u64 first = rgd->rd_data0;
490	u64 last = first + rgd->rd_data;
491	return first <= block && block < last;
492}
493
494/**
495 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
496 * @sdp: The GFS2 superblock
497 * @blk: The data block number
498 * @exact: True if this needs to be an exact match
499 *
500 * Returns: The resource group, or NULL if not found
501 */
502
503struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
504{
505	struct rb_node *n, *next;
506	struct gfs2_rgrpd *cur;
507
508	spin_lock(&sdp->sd_rindex_spin);
509	n = sdp->sd_rindex_tree.rb_node;
510	while (n) {
511		cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
512		next = NULL;
513		if (blk < cur->rd_addr)
514			next = n->rb_left;
515		else if (blk >= cur->rd_data0 + cur->rd_data)
516			next = n->rb_right;
517		if (next == NULL) {
518			spin_unlock(&sdp->sd_rindex_spin);
519			if (exact) {
520				if (blk < cur->rd_addr)
521					return NULL;
522				if (blk >= cur->rd_data0 + cur->rd_data)
523					return NULL;
524			}
525			return cur;
526		}
527		n = next;
528	}
529	spin_unlock(&sdp->sd_rindex_spin);
530
531	return NULL;
532}
533
534/**
535 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
536 * @sdp: The GFS2 superblock
537 *
538 * Returns: The first rgrp in the filesystem
539 */
540
541struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
542{
543	const struct rb_node *n;
544	struct gfs2_rgrpd *rgd;
545
546	spin_lock(&sdp->sd_rindex_spin);
547	n = rb_first(&sdp->sd_rindex_tree);
548	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
549	spin_unlock(&sdp->sd_rindex_spin);
550
551	return rgd;
552}
553
554/**
555 * gfs2_rgrpd_get_next - get the next RG
556 * @rgd: the resource group descriptor
557 *
558 * Returns: The next rgrp
559 */
560
561struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
562{
563	struct gfs2_sbd *sdp = rgd->rd_sbd;
564	const struct rb_node *n;
565
566	spin_lock(&sdp->sd_rindex_spin);
567	n = rb_next(&rgd->rd_node);
568	if (n == NULL)
569		n = rb_first(&sdp->sd_rindex_tree);
570
571	if (unlikely(&rgd->rd_node == n)) {
572		spin_unlock(&sdp->sd_rindex_spin);
573		return NULL;
574	}
575	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
576	spin_unlock(&sdp->sd_rindex_spin);
577	return rgd;
578}
579
580void check_and_update_goal(struct gfs2_inode *ip)
581{
582	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
583	if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
584		ip->i_goal = ip->i_no_addr;
585}
586
587void gfs2_free_clones(struct gfs2_rgrpd *rgd)
588{
589	int x;
590
591	for (x = 0; x < rgd->rd_length; x++) {
592		struct gfs2_bitmap *bi = rgd->rd_bits + x;
593		kfree(bi->bi_clone);
594		bi->bi_clone = NULL;
595	}
596}
597
598/**
599 * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
600 * @ip: the inode for this reservation
601 */
602int gfs2_rs_alloc(struct gfs2_inode *ip)
603{
604	int error = 0;
605
606	down_write(&ip->i_rw_mutex);
607	if (ip->i_res)
608		goto out;
609
610	ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
611	if (!ip->i_res) {
612		error = -ENOMEM;
613		goto out;
614	}
615
616	RB_CLEAR_NODE(&ip->i_res->rs_node);
617out:
618	up_write(&ip->i_rw_mutex);
619	return error;
620}
621
622static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
623{
624	gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
625		       (unsigned long long)rs->rs_inum,
626		       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
627		       rs->rs_rbm.offset, rs->rs_free);
628}
629
630/**
631 * __rs_deltree - remove a multi-block reservation from the rgd tree
632 * @rs: The reservation to remove
633 *
634 */
635static void __rs_deltree(struct gfs2_blkreserv *rs)
636{
637	struct gfs2_rgrpd *rgd;
638
639	if (!gfs2_rs_active(rs))
640		return;
641
642	rgd = rs->rs_rbm.rgd;
643	trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
644	rb_erase(&rs->rs_node, &rgd->rd_rstree);
645	RB_CLEAR_NODE(&rs->rs_node);
646
647	if (rs->rs_free) {
648		struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm);
649
650		/* return reserved blocks to the rgrp */
651		BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
652		rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
653		/* The rgrp extent failure point is likely not to increase;
654		   it will only do so if the freed blocks are somehow
655		   contiguous with a span of free blocks that follows. Still,
656		   it will force the number to be recalculated later. */
657		rgd->rd_extfail_pt += rs->rs_free;
658		rs->rs_free = 0;
659		clear_bit(GBF_FULL, &bi->bi_flags);
660	}
661}
662
663/**
664 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
665 * @rs: The reservation to remove
666 *
667 */
668void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
669{
670	struct gfs2_rgrpd *rgd;
671
672	rgd = rs->rs_rbm.rgd;
673	if (rgd) {
674		spin_lock(&rgd->rd_rsspin);
675		__rs_deltree(rs);
676		spin_unlock(&rgd->rd_rsspin);
677	}
678}
679
680/**
681 * gfs2_rs_delete - delete a multi-block reservation
682 * @ip: The inode for this reservation
683 * @wcount: The inode's write count, or NULL
684 *
685 */
686void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
687{
688	down_write(&ip->i_rw_mutex);
689	if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
690		gfs2_rs_deltree(ip->i_res);
691		BUG_ON(ip->i_res->rs_free);
692		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
693		ip->i_res = NULL;
694	}
695	up_write(&ip->i_rw_mutex);
696}
697
698/**
699 * return_all_reservations - return all reserved blocks back to the rgrp.
700 * @rgd: the rgrp that needs its space back
701 *
702 * We previously reserved a bunch of blocks for allocation. Now we need to
703 * give them back. This leave the reservation structures in tact, but removes
704 * all of their corresponding "no-fly zones".
705 */
706static void return_all_reservations(struct gfs2_rgrpd *rgd)
707{
708	struct rb_node *n;
709	struct gfs2_blkreserv *rs;
710
711	spin_lock(&rgd->rd_rsspin);
712	while ((n = rb_first(&rgd->rd_rstree))) {
713		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
714		__rs_deltree(rs);
715	}
716	spin_unlock(&rgd->rd_rsspin);
717}
718
719void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
720{
721	struct rb_node *n;
722	struct gfs2_rgrpd *rgd;
723	struct gfs2_glock *gl;
724
725	while ((n = rb_first(&sdp->sd_rindex_tree))) {
726		rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
727		gl = rgd->rd_gl;
728
729		rb_erase(n, &sdp->sd_rindex_tree);
730
731		if (gl) {
732			spin_lock(&gl->gl_spin);
733			gl->gl_object = NULL;
734			spin_unlock(&gl->gl_spin);
735			gfs2_glock_add_to_lru(gl);
736			gfs2_glock_put(gl);
737		}
738
739		gfs2_free_clones(rgd);
740		kfree(rgd->rd_bits);
741		return_all_reservations(rgd);
742		kmem_cache_free(gfs2_rgrpd_cachep, rgd);
743	}
744}
745
746static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
747{
748	pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
749	pr_info("ri_length = %u\n", rgd->rd_length);
750	pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
751	pr_info("ri_data = %u\n", rgd->rd_data);
752	pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes);
753}
754
755/**
756 * gfs2_compute_bitstructs - Compute the bitmap sizes
757 * @rgd: The resource group descriptor
758 *
759 * Calculates bitmap descriptors, one for each block that contains bitmap data
760 *
761 * Returns: errno
762 */
763
764static int compute_bitstructs(struct gfs2_rgrpd *rgd)
765{
766	struct gfs2_sbd *sdp = rgd->rd_sbd;
767	struct gfs2_bitmap *bi;
768	u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
769	u32 bytes_left, bytes;
770	int x;
771
772	if (!length)
773		return -EINVAL;
774
775	rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
776	if (!rgd->rd_bits)
777		return -ENOMEM;
778
779	bytes_left = rgd->rd_bitbytes;
780
781	for (x = 0; x < length; x++) {
782		bi = rgd->rd_bits + x;
783
784		bi->bi_flags = 0;
785		/* small rgrp; bitmap stored completely in header block */
786		if (length == 1) {
787			bytes = bytes_left;
788			bi->bi_offset = sizeof(struct gfs2_rgrp);
789			bi->bi_start = 0;
790			bi->bi_len = bytes;
791			bi->bi_blocks = bytes * GFS2_NBBY;
792		/* header block */
793		} else if (x == 0) {
794			bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
795			bi->bi_offset = sizeof(struct gfs2_rgrp);
796			bi->bi_start = 0;
797			bi->bi_len = bytes;
798			bi->bi_blocks = bytes * GFS2_NBBY;
799		/* last block */
800		} else if (x + 1 == length) {
801			bytes = bytes_left;
802			bi->bi_offset = sizeof(struct gfs2_meta_header);
803			bi->bi_start = rgd->rd_bitbytes - bytes_left;
804			bi->bi_len = bytes;
805			bi->bi_blocks = bytes * GFS2_NBBY;
806		/* other blocks */
807		} else {
808			bytes = sdp->sd_sb.sb_bsize -
809				sizeof(struct gfs2_meta_header);
810			bi->bi_offset = sizeof(struct gfs2_meta_header);
811			bi->bi_start = rgd->rd_bitbytes - bytes_left;
812			bi->bi_len = bytes;
813			bi->bi_blocks = bytes * GFS2_NBBY;
814		}
815
816		bytes_left -= bytes;
817	}
818
819	if (bytes_left) {
820		gfs2_consist_rgrpd(rgd);
821		return -EIO;
822	}
823	bi = rgd->rd_bits + (length - 1);
824	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
825		if (gfs2_consist_rgrpd(rgd)) {
826			gfs2_rindex_print(rgd);
827			fs_err(sdp, "start=%u len=%u offset=%u\n",
828			       bi->bi_start, bi->bi_len, bi->bi_offset);
829		}
830		return -EIO;
831	}
832
833	return 0;
834}
835
836/**
837 * gfs2_ri_total - Total up the file system space, according to the rindex.
838 * @sdp: the filesystem
839 *
840 */
841u64 gfs2_ri_total(struct gfs2_sbd *sdp)
842{
843	u64 total_data = 0;
844	struct inode *inode = sdp->sd_rindex;
845	struct gfs2_inode *ip = GFS2_I(inode);
846	char buf[sizeof(struct gfs2_rindex)];
847	int error, rgrps;
848
849	for (rgrps = 0;; rgrps++) {
850		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
851
852		if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
853			break;
854		error = gfs2_internal_read(ip, buf, &pos,
855					   sizeof(struct gfs2_rindex));
856		if (error != sizeof(struct gfs2_rindex))
857			break;
858		total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
859	}
860	return total_data;
861}
862
863static int rgd_insert(struct gfs2_rgrpd *rgd)
864{
865	struct gfs2_sbd *sdp = rgd->rd_sbd;
866	struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
867
868	/* Figure out where to put new node */
869	while (*newn) {
870		struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
871						  rd_node);
872
873		parent = *newn;
874		if (rgd->rd_addr < cur->rd_addr)
875			newn = &((*newn)->rb_left);
876		else if (rgd->rd_addr > cur->rd_addr)
877			newn = &((*newn)->rb_right);
878		else
879			return -EEXIST;
880	}
881
882	rb_link_node(&rgd->rd_node, parent, newn);
883	rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
884	sdp->sd_rgrps++;
885	return 0;
886}
887
888/**
889 * read_rindex_entry - Pull in a new resource index entry from the disk
890 * @ip: Pointer to the rindex inode
891 *
892 * Returns: 0 on success, > 0 on EOF, error code otherwise
893 */
894
895static int read_rindex_entry(struct gfs2_inode *ip)
896{
897	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
898	const unsigned bsize = sdp->sd_sb.sb_bsize;
899	loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
900	struct gfs2_rindex buf;
901	int error;
902	struct gfs2_rgrpd *rgd;
903
904	if (pos >= i_size_read(&ip->i_inode))
905		return 1;
906
907	error = gfs2_internal_read(ip, (char *)&buf, &pos,
908				   sizeof(struct gfs2_rindex));
909
910	if (error != sizeof(struct gfs2_rindex))
911		return (error == 0) ? 1 : error;
912
913	rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
914	error = -ENOMEM;
915	if (!rgd)
916		return error;
917
918	rgd->rd_sbd = sdp;
919	rgd->rd_addr = be64_to_cpu(buf.ri_addr);
920	rgd->rd_length = be32_to_cpu(buf.ri_length);
921	rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
922	rgd->rd_data = be32_to_cpu(buf.ri_data);
923	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
924	spin_lock_init(&rgd->rd_rsspin);
925
926	error = compute_bitstructs(rgd);
927	if (error)
928		goto fail;
929
930	error = gfs2_glock_get(sdp, rgd->rd_addr,
931			       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
932	if (error)
933		goto fail;
934
935	rgd->rd_gl->gl_object = rgd;
936	rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize;
937	rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1;
938	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
939	rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
940	if (rgd->rd_data > sdp->sd_max_rg_data)
941		sdp->sd_max_rg_data = rgd->rd_data;
942	spin_lock(&sdp->sd_rindex_spin);
943	error = rgd_insert(rgd);
944	spin_unlock(&sdp->sd_rindex_spin);
945	if (!error)
946		return 0;
947
948	error = 0; /* someone else read in the rgrp; free it and ignore it */
949	gfs2_glock_put(rgd->rd_gl);
950
951fail:
952	kfree(rgd->rd_bits);
953	kmem_cache_free(gfs2_rgrpd_cachep, rgd);
954	return error;
955}
956
957/**
958 * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
959 * @sdp: the GFS2 superblock
960 *
961 * The purpose of this function is to select a subset of the resource groups
962 * and mark them as PREFERRED. We do it in such a way that each node prefers
963 * to use a unique set of rgrps to minimize glock contention.
964 */
965static void set_rgrp_preferences(struct gfs2_sbd *sdp)
966{
967	struct gfs2_rgrpd *rgd, *first;
968	int i;
969
970	/* Skip an initial number of rgrps, based on this node's journal ID.
971	   That should start each node out on its own set. */
972	rgd = gfs2_rgrpd_get_first(sdp);
973	for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
974		rgd = gfs2_rgrpd_get_next(rgd);
975	first = rgd;
976
977	do {
978		rgd->rd_flags |= GFS2_RDF_PREFERRED;
979		for (i = 0; i < sdp->sd_journals; i++) {
980			rgd = gfs2_rgrpd_get_next(rgd);
981			if (rgd == first)
982				break;
983		}
984	} while (rgd != first);
985}
986
987/**
988 * gfs2_ri_update - Pull in a new resource index from the disk
989 * @ip: pointer to the rindex inode
990 *
991 * Returns: 0 on successful update, error code otherwise
992 */
993
994static int gfs2_ri_update(struct gfs2_inode *ip)
995{
996	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
997	int error;
998
999	do {
1000		error = read_rindex_entry(ip);
1001	} while (error == 0);
1002
1003	if (error < 0)
1004		return error;
1005
1006	set_rgrp_preferences(sdp);
1007
1008	sdp->sd_rindex_uptodate = 1;
1009	return 0;
1010}
1011
1012/**
1013 * gfs2_rindex_update - Update the rindex if required
1014 * @sdp: The GFS2 superblock
1015 *
1016 * We grab a lock on the rindex inode to make sure that it doesn't
1017 * change whilst we are performing an operation. We keep this lock
1018 * for quite long periods of time compared to other locks. This
1019 * doesn't matter, since it is shared and it is very, very rarely
1020 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
1021 *
1022 * This makes sure that we're using the latest copy of the resource index
1023 * special file, which might have been updated if someone expanded the
1024 * filesystem (via gfs2_grow utility), which adds new resource groups.
1025 *
1026 * Returns: 0 on succeess, error code otherwise
1027 */
1028
1029int gfs2_rindex_update(struct gfs2_sbd *sdp)
1030{
1031	struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1032	struct gfs2_glock *gl = ip->i_gl;
1033	struct gfs2_holder ri_gh;
1034	int error = 0;
1035	int unlock_required = 0;
1036
1037	/* Read new copy from disk if we don't have the latest */
1038	if (!sdp->sd_rindex_uptodate) {
1039		if (!gfs2_glock_is_locked_by_me(gl)) {
1040			error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1041			if (error)
1042				return error;
1043			unlock_required = 1;
1044		}
1045		if (!sdp->sd_rindex_uptodate)
1046			error = gfs2_ri_update(ip);
1047		if (unlock_required)
1048			gfs2_glock_dq_uninit(&ri_gh);
1049	}
1050
1051	return error;
1052}
1053
1054static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1055{
1056	const struct gfs2_rgrp *str = buf;
1057	u32 rg_flags;
1058
1059	rg_flags = be32_to_cpu(str->rg_flags);
1060	rg_flags &= ~GFS2_RDF_MASK;
1061	rgd->rd_flags &= GFS2_RDF_MASK;
1062	rgd->rd_flags |= rg_flags;
1063	rgd->rd_free = be32_to_cpu(str->rg_free);
1064	rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1065	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1066}
1067
1068static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1069{
1070	struct gfs2_rgrp *str = buf;
1071
1072	str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1073	str->rg_free = cpu_to_be32(rgd->rd_free);
1074	str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1075	str->__pad = cpu_to_be32(0);
1076	str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1077	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1078}
1079
1080static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1081{
1082	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1083	struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1084
1085	if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
1086	    rgl->rl_dinodes != str->rg_dinodes ||
1087	    rgl->rl_igeneration != str->rg_igeneration)
1088		return 0;
1089	return 1;
1090}
1091
1092static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1093{
1094	const struct gfs2_rgrp *str = buf;
1095
1096	rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1097	rgl->rl_flags = str->rg_flags;
1098	rgl->rl_free = str->rg_free;
1099	rgl->rl_dinodes = str->rg_dinodes;
1100	rgl->rl_igeneration = str->rg_igeneration;
1101	rgl->__pad = 0UL;
1102}
1103
1104static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
1105{
1106	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1107	u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
1108	rgl->rl_unlinked = cpu_to_be32(unlinked);
1109}
1110
1111static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1112{
1113	struct gfs2_bitmap *bi;
1114	const u32 length = rgd->rd_length;
1115	const u8 *buffer = NULL;
1116	u32 i, goal, count = 0;
1117
1118	for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1119		goal = 0;
1120		buffer = bi->bi_bh->b_data + bi->bi_offset;
1121		WARN_ON(!buffer_uptodate(bi->bi_bh));
1122		while (goal < bi->bi_len * GFS2_NBBY) {
1123			goal = gfs2_bitfit(buffer, bi->bi_len, goal,
1124					   GFS2_BLKST_UNLINKED);
1125			if (goal == BFITNOENT)
1126				break;
1127			count++;
1128			goal++;
1129		}
1130	}
1131
1132	return count;
1133}
1134
1135
1136/**
1137 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1138 * @rgd: the struct gfs2_rgrpd describing the RG to read in
1139 *
1140 * Read in all of a Resource Group's header and bitmap blocks.
1141 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
1142 *
1143 * Returns: errno
1144 */
1145
1146static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1147{
1148	struct gfs2_sbd *sdp = rgd->rd_sbd;
1149	struct gfs2_glock *gl = rgd->rd_gl;
1150	unsigned int length = rgd->rd_length;
1151	struct gfs2_bitmap *bi;
1152	unsigned int x, y;
1153	int error;
1154
1155	if (rgd->rd_bits[0].bi_bh != NULL)
1156		return 0;
1157
1158	for (x = 0; x < length; x++) {
1159		bi = rgd->rd_bits + x;
1160		error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
1161		if (error)
1162			goto fail;
1163	}
1164
1165	for (y = length; y--;) {
1166		bi = rgd->rd_bits + y;
1167		error = gfs2_meta_wait(sdp, bi->bi_bh);
1168		if (error)
1169			goto fail;
1170		if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1171					      GFS2_METATYPE_RG)) {
1172			error = -EIO;
1173			goto fail;
1174		}
1175	}
1176
1177	if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1178		for (x = 0; x < length; x++)
1179			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1180		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1181		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1182		rgd->rd_free_clone = rgd->rd_free;
1183		/* max out the rgrp allocation failure point */
1184		rgd->rd_extfail_pt = rgd->rd_free;
1185	}
1186	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1187		rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1188		gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1189				     rgd->rd_bits[0].bi_bh->b_data);
1190	}
1191	else if (sdp->sd_args.ar_rgrplvb) {
1192		if (!gfs2_rgrp_lvb_valid(rgd)){
1193			gfs2_consist_rgrpd(rgd);
1194			error = -EIO;
1195			goto fail;
1196		}
1197		if (rgd->rd_rgl->rl_unlinked == 0)
1198			rgd->rd_flags &= ~GFS2_RDF_CHECK;
1199	}
1200	return 0;
1201
1202fail:
1203	while (x--) {
1204		bi = rgd->rd_bits + x;
1205		brelse(bi->bi_bh);
1206		bi->bi_bh = NULL;
1207		gfs2_assert_warn(sdp, !bi->bi_clone);
1208	}
1209
1210	return error;
1211}
1212
1213static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1214{
1215	u32 rl_flags;
1216
1217	if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1218		return 0;
1219
1220	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1221		return gfs2_rgrp_bh_get(rgd);
1222
1223	rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1224	rl_flags &= ~GFS2_RDF_MASK;
1225	rgd->rd_flags &= GFS2_RDF_MASK;
1226	rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1227	if (rgd->rd_rgl->rl_unlinked == 0)
1228		rgd->rd_flags &= ~GFS2_RDF_CHECK;
1229	rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1230	rgd->rd_free_clone = rgd->rd_free;
1231	rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1232	rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1233	return 0;
1234}
1235
1236int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1237{
1238	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1239	struct gfs2_sbd *sdp = rgd->rd_sbd;
1240
1241	if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1242		return 0;
1243	return gfs2_rgrp_bh_get(rgd);
1244}
1245
1246/**
1247 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1248 * @gh: The glock holder for the resource group
1249 *
1250 */
1251
1252void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1253{
1254	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1255	int x, length = rgd->rd_length;
1256
1257	for (x = 0; x < length; x++) {
1258		struct gfs2_bitmap *bi = rgd->rd_bits + x;
1259		if (bi->bi_bh) {
1260			brelse(bi->bi_bh);
1261			bi->bi_bh = NULL;
1262		}
1263	}
1264
1265}
1266
1267int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1268			     struct buffer_head *bh,
1269			     const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1270{
1271	struct super_block *sb = sdp->sd_vfs;
1272	u64 blk;
1273	sector_t start = 0;
1274	sector_t nr_blks = 0;
1275	int rv;
1276	unsigned int x;
1277	u32 trimmed = 0;
1278	u8 diff;
1279
1280	for (x = 0; x < bi->bi_len; x++) {
1281		const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1282		clone += bi->bi_offset;
1283		clone += x;
1284		if (bh) {
1285			const u8 *orig = bh->b_data + bi->bi_offset + x;
1286			diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1287		} else {
1288			diff = ~(*clone | (*clone >> 1));
1289		}
1290		diff &= 0x55;
1291		if (diff == 0)
1292			continue;
1293		blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1294		while(diff) {
1295			if (diff & 1) {
1296				if (nr_blks == 0)
1297					goto start_new_extent;
1298				if ((start + nr_blks) != blk) {
1299					if (nr_blks >= minlen) {
1300						rv = sb_issue_discard(sb,
1301							start, nr_blks,
1302							GFP_NOFS, 0);
1303						if (rv)
1304							goto fail;
1305						trimmed += nr_blks;
1306					}
1307					nr_blks = 0;
1308start_new_extent:
1309					start = blk;
1310				}
1311				nr_blks++;
1312			}
1313			diff >>= 2;
1314			blk++;
1315		}
1316	}
1317	if (nr_blks >= minlen) {
1318		rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1319		if (rv)
1320			goto fail;
1321		trimmed += nr_blks;
1322	}
1323	if (ptrimmed)
1324		*ptrimmed = trimmed;
1325	return 0;
1326
1327fail:
1328	if (sdp->sd_args.ar_discard)
1329		fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
1330	sdp->sd_args.ar_discard = 0;
1331	return -EIO;
1332}
1333
1334/**
1335 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1336 * @filp: Any file on the filesystem
1337 * @argp: Pointer to the arguments (also used to pass result)
1338 *
1339 * Returns: 0 on success, otherwise error code
1340 */
1341
1342int gfs2_fitrim(struct file *filp, void __user *argp)
1343{
1344	struct inode *inode = file_inode(filp);
1345	struct gfs2_sbd *sdp = GFS2_SB(inode);
1346	struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1347	struct buffer_head *bh;
1348	struct gfs2_rgrpd *rgd;
1349	struct gfs2_rgrpd *rgd_end;
1350	struct gfs2_holder gh;
1351	struct fstrim_range r;
1352	int ret = 0;
1353	u64 amt;
1354	u64 trimmed = 0;
1355	u64 start, end, minlen;
1356	unsigned int x;
1357	unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1358
1359	if (!capable(CAP_SYS_ADMIN))
1360		return -EPERM;
1361
1362	if (!blk_queue_discard(q))
1363		return -EOPNOTSUPP;
1364
1365	if (copy_from_user(&r, argp, sizeof(r)))
1366		return -EFAULT;
1367
1368	ret = gfs2_rindex_update(sdp);
1369	if (ret)
1370		return ret;
1371
1372	start = r.start >> bs_shift;
1373	end = start + (r.len >> bs_shift);
1374	minlen = max_t(u64, r.minlen,
1375		       q->limits.discard_granularity) >> bs_shift;
1376
1377	if (end <= start || minlen > sdp->sd_max_rg_data)
1378		return -EINVAL;
1379
1380	rgd = gfs2_blk2rgrpd(sdp, start, 0);
1381	rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
1382
1383	if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1384	    && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1385		return -EINVAL; /* start is beyond the end of the fs */
1386
1387	while (1) {
1388
1389		ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1390		if (ret)
1391			goto out;
1392
1393		if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1394			/* Trim each bitmap in the rgrp */
1395			for (x = 0; x < rgd->rd_length; x++) {
1396				struct gfs2_bitmap *bi = rgd->rd_bits + x;
1397				ret = gfs2_rgrp_send_discards(sdp,
1398						rgd->rd_data0, NULL, bi, minlen,
1399						&amt);
1400				if (ret) {
1401					gfs2_glock_dq_uninit(&gh);
1402					goto out;
1403				}
1404				trimmed += amt;
1405			}
1406
1407			/* Mark rgrp as having been trimmed */
1408			ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1409			if (ret == 0) {
1410				bh = rgd->rd_bits[0].bi_bh;
1411				rgd->rd_flags |= GFS2_RGF_TRIMMED;
1412				gfs2_trans_add_meta(rgd->rd_gl, bh);
1413				gfs2_rgrp_out(rgd, bh->b_data);
1414				gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1415				gfs2_trans_end(sdp);
1416			}
1417		}
1418		gfs2_glock_dq_uninit(&gh);
1419
1420		if (rgd == rgd_end)
1421			break;
1422
1423		rgd = gfs2_rgrpd_get_next(rgd);
1424	}
1425
1426out:
1427	r.len = trimmed << bs_shift;
1428	if (copy_to_user(argp, &r, sizeof(r)))
1429		return -EFAULT;
1430
1431	return ret;
1432}
1433
1434/**
1435 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1436 * @ip: the inode structure
1437 *
1438 */
1439static void rs_insert(struct gfs2_inode *ip)
1440{
1441	struct rb_node **newn, *parent = NULL;
1442	int rc;
1443	struct gfs2_blkreserv *rs = ip->i_res;
1444	struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1445	u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1446
1447	BUG_ON(gfs2_rs_active(rs));
1448
1449	spin_lock(&rgd->rd_rsspin);
1450	newn = &rgd->rd_rstree.rb_node;
1451	while (*newn) {
1452		struct gfs2_blkreserv *cur =
1453			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1454
1455		parent = *newn;
1456		rc = rs_cmp(fsblock, rs->rs_free, cur);
1457		if (rc > 0)
1458			newn = &((*newn)->rb_right);
1459		else if (rc < 0)
1460			newn = &((*newn)->rb_left);
1461		else {
1462			spin_unlock(&rgd->rd_rsspin);
1463			WARN_ON(1);
1464			return;
1465		}
1466	}
1467
1468	rb_link_node(&rs->rs_node, parent, newn);
1469	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1470
1471	/* Do our rgrp accounting for the reservation */
1472	rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1473	spin_unlock(&rgd->rd_rsspin);
1474	trace_gfs2_rs(rs, TRACE_RS_INSERT);
1475}
1476
1477/**
1478 * rg_mblk_search - find a group of multiple free blocks to form a reservation
1479 * @rgd: the resource group descriptor
1480 * @ip: pointer to the inode for which we're reserving blocks
1481 * @ap: the allocation parameters
1482 *
1483 */
1484
1485static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1486			   const struct gfs2_alloc_parms *ap)
1487{
1488	struct gfs2_rbm rbm = { .rgd = rgd, };
1489	u64 goal;
1490	struct gfs2_blkreserv *rs = ip->i_res;
1491	u32 extlen;
1492	u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1493	int ret;
1494	struct inode *inode = &ip->i_inode;
1495
1496	if (S_ISDIR(inode->i_mode))
1497		extlen = 1;
1498	else {
1499		extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
1500		extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1501	}
1502	if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1503		return;
1504
1505	/* Find bitmap block that contains bits for goal block */
1506	if (rgrp_contains_block(rgd, ip->i_goal))
1507		goal = ip->i_goal;
1508	else
1509		goal = rgd->rd_last_alloc + rgd->rd_data0;
1510
1511	if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1512		return;
1513
1514	ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap);
1515	if (ret == 0) {
1516		rs->rs_rbm = rbm;
1517		rs->rs_free = extlen;
1518		rs->rs_inum = ip->i_no_addr;
1519		rs_insert(ip);
1520	} else {
1521		if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1522			rgd->rd_last_alloc = 0;
1523	}
1524}
1525
1526/**
1527 * gfs2_next_unreserved_block - Return next block that is not reserved
1528 * @rgd: The resource group
1529 * @block: The starting block
1530 * @length: The required length
1531 * @ip: Ignore any reservations for this inode
1532 *
1533 * If the block does not appear in any reservation, then return the
1534 * block number unchanged. If it does appear in the reservation, then
1535 * keep looking through the tree of reservations in order to find the
1536 * first block number which is not reserved.
1537 */
1538
1539static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1540				      u32 length,
1541				      const struct gfs2_inode *ip)
1542{
1543	struct gfs2_blkreserv *rs;
1544	struct rb_node *n;
1545	int rc;
1546
1547	spin_lock(&rgd->rd_rsspin);
1548	n = rgd->rd_rstree.rb_node;
1549	while (n) {
1550		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1551		rc = rs_cmp(block, length, rs);
1552		if (rc < 0)
1553			n = n->rb_left;
1554		else if (rc > 0)
1555			n = n->rb_right;
1556		else
1557			break;
1558	}
1559
1560	if (n) {
1561		while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
1562			block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1563			n = n->rb_right;
1564			if (n == NULL)
1565				break;
1566			rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1567		}
1568	}
1569
1570	spin_unlock(&rgd->rd_rsspin);
1571	return block;
1572}
1573
1574/**
1575 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1576 * @rbm: The current position in the resource group
1577 * @ip: The inode for which we are searching for blocks
1578 * @minext: The minimum extent length
1579 * @maxext: A pointer to the maximum extent structure
1580 *
1581 * This checks the current position in the rgrp to see whether there is
1582 * a reservation covering this block. If not then this function is a
1583 * no-op. If there is, then the position is moved to the end of the
1584 * contiguous reservation(s) so that we are pointing at the first
1585 * non-reserved block.
1586 *
1587 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1588 */
1589
1590static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1591					     const struct gfs2_inode *ip,
1592					     u32 minext,
1593					     struct gfs2_extent *maxext)
1594{
1595	u64 block = gfs2_rbm_to_block(rbm);
1596	u32 extlen = 1;
1597	u64 nblock;
1598	int ret;
1599
1600	/*
1601	 * If we have a minimum extent length, then skip over any extent
1602	 * which is less than the min extent length in size.
1603	 */
1604	if (minext) {
1605		extlen = gfs2_free_extlen(rbm, minext);
1606		if (extlen <= maxext->len)
1607			goto fail;
1608	}
1609
1610	/*
1611	 * Check the extent which has been found against the reservations
1612	 * and skip if parts of it are already reserved
1613	 */
1614	nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1615	if (nblock == block) {
1616		if (!minext || extlen >= minext)
1617			return 0;
1618
1619		if (extlen > maxext->len) {
1620			maxext->len = extlen;
1621			maxext->rbm = *rbm;
1622		}
1623fail:
1624		nblock = block + extlen;
1625	}
1626	ret = gfs2_rbm_from_block(rbm, nblock);
1627	if (ret < 0)
1628		return ret;
1629	return 1;
1630}
1631
1632/**
1633 * gfs2_rbm_find - Look for blocks of a particular state
1634 * @rbm: Value/result starting position and final position
1635 * @state: The state which we want to find
1636 * @minext: Pointer to the requested extent length (NULL for a single block)
1637 *          This is updated to be the actual reservation size.
1638 * @ip: If set, check for reservations
1639 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1640 *          around until we've reached the starting point.
1641 * @ap: the allocation parameters
1642 *
1643 * Side effects:
1644 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1645 *   has no free blocks in it.
1646 * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1647 *   has come up short on a free block search.
1648 *
1649 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1650 */
1651
1652static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1653			 const struct gfs2_inode *ip, bool nowrap,
1654			 const struct gfs2_alloc_parms *ap)
1655{
1656	struct buffer_head *bh;
1657	int initial_bii;
1658	u32 initial_offset;
1659	int first_bii = rbm->bii;
1660	u32 first_offset = rbm->offset;
1661	u32 offset;
1662	u8 *buffer;
1663	int n = 0;
1664	int iters = rbm->rgd->rd_length;
1665	int ret;
1666	struct gfs2_bitmap *bi;
1667	struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1668
1669	/* If we are not starting at the beginning of a bitmap, then we
1670	 * need to add one to the bitmap count to ensure that we search
1671	 * the starting bitmap twice.
1672	 */
1673	if (rbm->offset != 0)
1674		iters++;
1675
1676	while(1) {
1677		bi = rbm_bi(rbm);
1678		if (test_bit(GBF_FULL, &bi->bi_flags) &&
1679		    (state == GFS2_BLKST_FREE))
1680			goto next_bitmap;
1681
1682		bh = bi->bi_bh;
1683		buffer = bh->b_data + bi->bi_offset;
1684		WARN_ON(!buffer_uptodate(bh));
1685		if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1686			buffer = bi->bi_clone + bi->bi_offset;
1687		initial_offset = rbm->offset;
1688		offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
1689		if (offset == BFITNOENT)
1690			goto bitmap_full;
1691		rbm->offset = offset;
1692		if (ip == NULL)
1693			return 0;
1694
1695		initial_bii = rbm->bii;
1696		ret = gfs2_reservation_check_and_update(rbm, ip,
1697							minext ? *minext : 0,
1698							&maxext);
1699		if (ret == 0)
1700			return 0;
1701		if (ret > 0) {
1702			n += (rbm->bii - initial_bii);
1703			goto next_iter;
1704		}
1705		if (ret == -E2BIG) {
1706			rbm->bii = 0;
1707			rbm->offset = 0;
1708			n += (rbm->bii - initial_bii);
1709			goto res_covered_end_of_rgrp;
1710		}
1711		return ret;
1712
1713bitmap_full:	/* Mark bitmap as full and fall through */
1714		if ((state == GFS2_BLKST_FREE) && initial_offset == 0) {
1715			struct gfs2_bitmap *bi = rbm_bi(rbm);
1716			set_bit(GBF_FULL, &bi->bi_flags);
1717		}
1718
1719next_bitmap:	/* Find next bitmap in the rgrp */
1720		rbm->offset = 0;
1721		rbm->bii++;
1722		if (rbm->bii == rbm->rgd->rd_length)
1723			rbm->bii = 0;
1724res_covered_end_of_rgrp:
1725		if ((rbm->bii == 0) && nowrap)
1726			break;
1727		n++;
1728next_iter:
1729		if (n >= iters)
1730			break;
1731	}
1732
1733	if (minext == NULL || state != GFS2_BLKST_FREE)
1734		return -ENOSPC;
1735
1736	/* If the extent was too small, and it's smaller than the smallest
1737	   to have failed before, remember for future reference that it's
1738	   useless to search this rgrp again for this amount or more. */
1739	if ((first_offset == 0) && (first_bii == 0) &&
1740	    (*minext < rbm->rgd->rd_extfail_pt))
1741		rbm->rgd->rd_extfail_pt = *minext;
1742
1743	/* If the maximum extent we found is big enough to fulfill the
1744	   minimum requirements, use it anyway. */
1745	if (maxext.len) {
1746		*rbm = maxext.rbm;
1747		*minext = maxext.len;
1748		return 0;
1749	}
1750
1751	return -ENOSPC;
1752}
1753
1754/**
1755 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1756 * @rgd: The rgrp
1757 * @last_unlinked: block address of the last dinode we unlinked
1758 * @skip: block address we should explicitly not unlink
1759 *
1760 * Returns: 0 if no error
1761 *          The inode, if one has been found, in inode.
1762 */
1763
1764static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1765{
1766	u64 block;
1767	struct gfs2_sbd *sdp = rgd->rd_sbd;
1768	struct gfs2_glock *gl;
1769	struct gfs2_inode *ip;
1770	int error;
1771	int found = 0;
1772	struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
1773
1774	while (1) {
1775		down_write(&sdp->sd_log_flush_lock);
1776		error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1777				      true, NULL);
1778		up_write(&sdp->sd_log_flush_lock);
1779		if (error == -ENOSPC)
1780			break;
1781		if (WARN_ON_ONCE(error))
1782			break;
1783
1784		block = gfs2_rbm_to_block(&rbm);
1785		if (gfs2_rbm_from_block(&rbm, block + 1))
1786			break;
1787		if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1788			continue;
1789		if (block == skip)
1790			continue;
1791		*last_unlinked = block;
1792
1793		error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
1794		if (error)
1795			continue;
1796
1797		/* If the inode is already in cache, we can ignore it here
1798		 * because the existing inode disposal code will deal with
1799		 * it when all refs have gone away. Accessing gl_object like
1800		 * this is not safe in general. Here it is ok because we do
1801		 * not dereference the pointer, and we only need an approx
1802		 * answer to whether it is NULL or not.
1803		 */
1804		ip = gl->gl_object;
1805
1806		if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1807			gfs2_glock_put(gl);
1808		else
1809			found++;
1810
1811		/* Limit reclaim to sensible number of tasks */
1812		if (found > NR_CPUS)
1813			return;
1814	}
1815
1816	rgd->rd_flags &= ~GFS2_RDF_CHECK;
1817	return;
1818}
1819
1820/**
1821 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1822 * @rgd: The rgrp in question
1823 * @loops: An indication of how picky we can be (0=very, 1=less so)
1824 *
1825 * This function uses the recently added glock statistics in order to
1826 * figure out whether a parciular resource group is suffering from
1827 * contention from multiple nodes. This is done purely on the basis
1828 * of timings, since this is the only data we have to work with and
1829 * our aim here is to reject a resource group which is highly contended
1830 * but (very important) not to do this too often in order to ensure that
1831 * we do not land up introducing fragmentation by changing resource
1832 * groups when not actually required.
1833 *
1834 * The calculation is fairly simple, we want to know whether the SRTTB
1835 * (i.e. smoothed round trip time for blocking operations) to acquire
1836 * the lock for this rgrp's glock is significantly greater than the
1837 * time taken for resource groups on average. We introduce a margin in
1838 * the form of the variable @var which is computed as the sum of the two
1839 * respective variences, and multiplied by a factor depending on @loops
1840 * and whether we have a lot of data to base the decision on. This is
1841 * then tested against the square difference of the means in order to
1842 * decide whether the result is statistically significant or not.
1843 *
1844 * Returns: A boolean verdict on the congestion status
1845 */
1846
1847static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1848{
1849	const struct gfs2_glock *gl = rgd->rd_gl;
1850	const struct gfs2_sbd *sdp = gl->gl_sbd;
1851	struct gfs2_lkstats *st;
1852	s64 r_dcount, l_dcount;
1853	s64 r_srttb, l_srttb;
1854	s64 srttb_diff;
1855	s64 sqr_diff;
1856	s64 var;
1857
1858	preempt_disable();
1859	st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1860	r_srttb = st->stats[GFS2_LKS_SRTTB];
1861	r_dcount = st->stats[GFS2_LKS_DCOUNT];
1862	var = st->stats[GFS2_LKS_SRTTVARB] +
1863	      gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1864	preempt_enable();
1865
1866	l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1867	l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1868
1869	if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0))
1870		return false;
1871
1872	srttb_diff = r_srttb - l_srttb;
1873	sqr_diff = srttb_diff * srttb_diff;
1874
1875	var *= 2;
1876	if (l_dcount < 8 || r_dcount < 8)
1877		var *= 2;
1878	if (loops == 1)
1879		var *= 2;
1880
1881	return ((srttb_diff < 0) && (sqr_diff > var));
1882}
1883
1884/**
1885 * gfs2_rgrp_used_recently
1886 * @rs: The block reservation with the rgrp to test
1887 * @msecs: The time limit in milliseconds
1888 *
1889 * Returns: True if the rgrp glock has been used within the time limit
1890 */
1891static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1892				    u64 msecs)
1893{
1894	u64 tdiff;
1895
1896	tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1897                            rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1898
1899	return tdiff > (msecs * 1000 * 1000);
1900}
1901
1902static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1903{
1904	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1905	u32 skip;
1906
1907	get_random_bytes(&skip, sizeof(skip));
1908	return skip % sdp->sd_rgrps;
1909}
1910
1911static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1912{
1913	struct gfs2_rgrpd *rgd = *pos;
1914	struct gfs2_sbd *sdp = rgd->rd_sbd;
1915
1916	rgd = gfs2_rgrpd_get_next(rgd);
1917	if (rgd == NULL)
1918		rgd = gfs2_rgrpd_get_first(sdp);
1919	*pos = rgd;
1920	if (rgd != begin) /* If we didn't wrap */
1921		return true;
1922	return false;
1923}
1924
1925/**
1926 * fast_to_acquire - determine if a resource group will be fast to acquire
1927 *
1928 * If this is one of our preferred rgrps, it should be quicker to acquire,
1929 * because we tried to set ourselves up as dlm lock master.
1930 */
1931static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
1932{
1933	struct gfs2_glock *gl = rgd->rd_gl;
1934
1935	if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
1936	    !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
1937	    !test_bit(GLF_DEMOTE, &gl->gl_flags))
1938		return 1;
1939	if (rgd->rd_flags & GFS2_RDF_PREFERRED)
1940		return 1;
1941	return 0;
1942}
1943
1944/**
1945 * gfs2_inplace_reserve - Reserve space in the filesystem
1946 * @ip: the inode to reserve space for
1947 * @ap: the allocation parameters
1948 *
1949 * We try our best to find an rgrp that has at least ap->target blocks
1950 * available. After a couple of passes (loops == 2), the prospects of finding
1951 * such an rgrp diminish. At this stage, we return the first rgrp that has
1952 * atleast ap->min_target blocks available. Either way, we set ap->allowed to
1953 * the number of blocks available in the chosen rgrp.
1954 *
1955 * Returns: 0 on success,
1956 *          -ENOMEM if a suitable rgrp can't be found
1957 *          errno otherwise
1958 */
1959
1960int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
1961{
1962	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1963	struct gfs2_rgrpd *begin = NULL;
1964	struct gfs2_blkreserv *rs = ip->i_res;
1965	int error = 0, rg_locked, flags = 0;
1966	u64 last_unlinked = NO_BLOCK;
1967	int loops = 0;
1968	u32 skip = 0;
1969
1970	if (sdp->sd_args.ar_rgrplvb)
1971		flags |= GL_SKIP;
1972	if (gfs2_assert_warn(sdp, ap->target))
1973		return -EINVAL;
1974	if (gfs2_rs_active(rs)) {
1975		begin = rs->rs_rbm.rgd;
1976	} else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1977		rs->rs_rbm.rgd = begin = ip->i_rgd;
1978	} else {
1979		check_and_update_goal(ip);
1980		rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1981	}
1982	if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
1983		skip = gfs2_orlov_skip(ip);
1984	if (rs->rs_rbm.rgd == NULL)
1985		return -EBADSLT;
1986
1987	while (loops < 3) {
1988		rg_locked = 1;
1989
1990		if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1991			rg_locked = 0;
1992			if (skip && skip--)
1993				goto next_rgrp;
1994			if (!gfs2_rs_active(rs)) {
1995				if (loops == 0 &&
1996				    !fast_to_acquire(rs->rs_rbm.rgd))
1997					goto next_rgrp;
1998				if ((loops < 2) &&
1999				    gfs2_rgrp_used_recently(rs, 1000) &&
2000				    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2001					goto next_rgrp;
2002			}
2003			error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
2004						   LM_ST_EXCLUSIVE, flags,
2005						   &rs->rs_rgd_gh);
2006			if (unlikely(error))
2007				return error;
2008			if (!gfs2_rs_active(rs) && (loops < 2) &&
2009			    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
2010				goto skip_rgrp;
2011			if (sdp->sd_args.ar_rgrplvb) {
2012				error = update_rgrp_lvb(rs->rs_rbm.rgd);
2013				if (unlikely(error)) {
2014					gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2015					return error;
2016				}
2017			}
2018		}
2019
2020		/* Skip unuseable resource groups */
2021		if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2022						 GFS2_RDF_ERROR)) ||
2023		    (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
2024			goto skip_rgrp;
2025
2026		if (sdp->sd_args.ar_rgrplvb)
2027			gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
2028
2029		/* Get a reservation if we don't already have one */
2030		if (!gfs2_rs_active(rs))
2031			rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
2032
2033		/* Skip rgrps when we can't get a reservation on first pass */
2034		if (!gfs2_rs_active(rs) && (loops < 1))
2035			goto check_rgrp;
2036
2037		/* If rgrp has enough free space, use it */
2038		if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
2039		    (loops == 2 && ap->min_target &&
2040		     rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
2041			ip->i_rgd = rs->rs_rbm.rgd;
2042			ap->allowed = ip->i_rgd->rd_free_clone;
2043			return 0;
2044		}
2045check_rgrp:
2046		/* Check for unlinked inodes which can be reclaimed */
2047		if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
2048			try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
2049					ip->i_no_addr);
2050skip_rgrp:
2051		/* Drop reservation, if we couldn't use reserved rgrp */
2052		if (gfs2_rs_active(rs))
2053			gfs2_rs_deltree(rs);
2054
2055		/* Unlock rgrp if required */
2056		if (!rg_locked)
2057			gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2058next_rgrp:
2059		/* Find the next rgrp, and continue looking */
2060		if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
2061			continue;
2062		if (skip)
2063			continue;
2064
2065		/* If we've scanned all the rgrps, but found no free blocks
2066		 * then this checks for some less likely conditions before
2067		 * trying again.
2068		 */
2069		loops++;
2070		/* Check that fs hasn't grown if writing to rindex */
2071		if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2072			error = gfs2_ri_update(ip);
2073			if (error)
2074				return error;
2075		}
2076		/* Flushing the log may release space */
2077		if (loops == 2)
2078			gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
2079	}
2080
2081	return -ENOSPC;
2082}
2083
2084/**
2085 * gfs2_inplace_release - release an inplace reservation
2086 * @ip: the inode the reservation was taken out on
2087 *
2088 * Release a reservation made by gfs2_inplace_reserve().
2089 */
2090
2091void gfs2_inplace_release(struct gfs2_inode *ip)
2092{
2093	struct gfs2_blkreserv *rs = ip->i_res;
2094
2095	if (rs->rs_rgd_gh.gh_gl)
2096		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2097}
2098
2099/**
2100 * gfs2_get_block_type - Check a block in a RG is of given type
2101 * @rgd: the resource group holding the block
2102 * @block: the block number
2103 *
2104 * Returns: The block type (GFS2_BLKST_*)
2105 */
2106
2107static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
2108{
2109	struct gfs2_rbm rbm = { .rgd = rgd, };
2110	int ret;
2111
2112	ret = gfs2_rbm_from_block(&rbm, block);
2113	WARN_ON_ONCE(ret != 0);
2114
2115	return gfs2_testbit(&rbm);
2116}
2117
2118
2119/**
2120 * gfs2_alloc_extent - allocate an extent from a given bitmap
2121 * @rbm: the resource group information
2122 * @dinode: TRUE if the first block we allocate is for a dinode
2123 * @n: The extent length (value/result)
2124 *
2125 * Add the bitmap buffer to the transaction.
2126 * Set the found bits to @new_state to change block's allocation state.
2127 */
2128static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2129			     unsigned int *n)
2130{
2131	struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2132	const unsigned int elen = *n;
2133	u64 block;
2134	int ret;
2135
2136	*n = 1;
2137	block = gfs2_rbm_to_block(rbm);
2138	gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
2139	gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2140	block++;
2141	while (*n < elen) {
2142		ret = gfs2_rbm_from_block(&pos, block);
2143		if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
2144			break;
2145		gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
2146		gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2147		(*n)++;
2148		block++;
2149	}
2150}
2151
2152/**
2153 * rgblk_free - Change alloc state of given block(s)
2154 * @sdp: the filesystem
2155 * @bstart: the start of a run of blocks to free
2156 * @blen: the length of the block run (all must lie within ONE RG!)
2157 * @new_state: GFS2_BLKST_XXX the after-allocation block state
2158 *
2159 * Returns:  Resource group containing the block(s)
2160 */
2161
2162static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2163				     u32 blen, unsigned char new_state)
2164{
2165	struct gfs2_rbm rbm;
2166	struct gfs2_bitmap *bi, *bi_prev = NULL;
2167
2168	rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
2169	if (!rbm.rgd) {
2170		if (gfs2_consist(sdp))
2171			fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
2172		return NULL;
2173	}
2174
2175	gfs2_rbm_from_block(&rbm, bstart);
2176	while (blen--) {
2177		bi = rbm_bi(&rbm);
2178		if (bi != bi_prev) {
2179			if (!bi->bi_clone) {
2180				bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2181						      GFP_NOFS | __GFP_NOFAIL);
2182				memcpy(bi->bi_clone + bi->bi_offset,
2183				       bi->bi_bh->b_data + bi->bi_offset,
2184				       bi->bi_len);
2185			}
2186			gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2187			bi_prev = bi;
2188		}
2189		gfs2_setbit(&rbm, false, new_state);
2190		gfs2_rbm_incr(&rbm);
2191	}
2192
2193	return rbm.rgd;
2194}
2195
2196/**
2197 * gfs2_rgrp_dump - print out an rgrp
2198 * @seq: The iterator
2199 * @gl: The glock in question
2200 *
2201 */
2202
2203void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
2204{
2205	struct gfs2_rgrpd *rgd = gl->gl_object;
2206	struct gfs2_blkreserv *trs;
2207	const struct rb_node *n;
2208
2209	if (rgd == NULL)
2210		return;
2211	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2212		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2213		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2214		       rgd->rd_reserved, rgd->rd_extfail_pt);
2215	spin_lock(&rgd->rd_rsspin);
2216	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2217		trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2218		dump_rs(seq, trs);
2219	}
2220	spin_unlock(&rgd->rd_rsspin);
2221}
2222
2223static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2224{
2225	struct gfs2_sbd *sdp = rgd->rd_sbd;
2226	fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2227		(unsigned long long)rgd->rd_addr);
2228	fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2229	gfs2_rgrp_dump(NULL, rgd->rd_gl);
2230	rgd->rd_flags |= GFS2_RDF_ERROR;
2231}
2232
2233/**
2234 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2235 * @ip: The inode we have just allocated blocks for
2236 * @rbm: The start of the allocated blocks
2237 * @len: The extent length
2238 *
2239 * Adjusts a reservation after an allocation has taken place. If the
2240 * reservation does not match the allocation, or if it is now empty
2241 * then it is removed.
2242 */
2243
2244static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2245				    const struct gfs2_rbm *rbm, unsigned len)
2246{
2247	struct gfs2_blkreserv *rs = ip->i_res;
2248	struct gfs2_rgrpd *rgd = rbm->rgd;
2249	unsigned rlen;
2250	u64 block;
2251	int ret;
2252
2253	spin_lock(&rgd->rd_rsspin);
2254	if (gfs2_rs_active(rs)) {
2255		if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2256			block = gfs2_rbm_to_block(rbm);
2257			ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2258			rlen = min(rs->rs_free, len);
2259			rs->rs_free -= rlen;
2260			rgd->rd_reserved -= rlen;
2261			trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2262			if (rs->rs_free && !ret)
2263				goto out;
2264			/* We used up our block reservation, so we should
2265			   reserve more blocks next time. */
2266			atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint);
2267		}
2268		__rs_deltree(rs);
2269	}
2270out:
2271	spin_unlock(&rgd->rd_rsspin);
2272}
2273
2274/**
2275 * gfs2_set_alloc_start - Set starting point for block allocation
2276 * @rbm: The rbm which will be set to the required location
2277 * @ip: The gfs2 inode
2278 * @dinode: Flag to say if allocation includes a new inode
2279 *
2280 * This sets the starting point from the reservation if one is active
2281 * otherwise it falls back to guessing a start point based on the
2282 * inode's goal block or the last allocation point in the rgrp.
2283 */
2284
2285static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2286				 const struct gfs2_inode *ip, bool dinode)
2287{
2288	u64 goal;
2289
2290	if (gfs2_rs_active(ip->i_res)) {
2291		*rbm = ip->i_res->rs_rbm;
2292		return;
2293	}
2294
2295	if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2296		goal = ip->i_goal;
2297	else
2298		goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2299
2300	gfs2_rbm_from_block(rbm, goal);
2301}
2302
2303/**
2304 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2305 * @ip: the inode to allocate the block for
2306 * @bn: Used to return the starting block number
2307 * @nblocks: requested number of blocks/extent length (value/result)
2308 * @dinode: 1 if we're allocating a dinode block, else 0
2309 * @generation: the generation number of the inode
2310 *
2311 * Returns: 0 or error
2312 */
2313
2314int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2315		      bool dinode, u64 *generation)
2316{
2317	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2318	struct buffer_head *dibh;
2319	struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
2320	unsigned int ndata;
2321	u64 block; /* block, within the file system scope */
2322	int error;
2323
2324	gfs2_set_alloc_start(&rbm, ip, dinode);
2325	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL);
2326
2327	if (error == -ENOSPC) {
2328		gfs2_set_alloc_start(&rbm, ip, dinode);
2329		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false,
2330				      NULL);
2331	}
2332
2333	/* Since all blocks are reserved in advance, this shouldn't happen */
2334	if (error) {
2335		fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
2336			(unsigned long long)ip->i_no_addr, error, *nblocks,
2337			test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2338			rbm.rgd->rd_extfail_pt);
2339		goto rgrp_error;
2340	}
2341
2342	gfs2_alloc_extent(&rbm, dinode, nblocks);
2343	block = gfs2_rbm_to_block(&rbm);
2344	rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2345	if (gfs2_rs_active(ip->i_res))
2346		gfs2_adjust_reservation(ip, &rbm, *nblocks);
2347	ndata = *nblocks;
2348	if (dinode)
2349		ndata--;
2350
2351	if (!dinode) {
2352		ip->i_goal = block + ndata - 1;
2353		error = gfs2_meta_inode_buffer(ip, &dibh);
2354		if (error == 0) {
2355			struct gfs2_dinode *di =
2356				(struct gfs2_dinode *)dibh->b_data;
2357			gfs2_trans_add_meta(ip->i_gl, dibh);
2358			di->di_goal_meta = di->di_goal_data =
2359				cpu_to_be64(ip->i_goal);
2360			brelse(dibh);
2361		}
2362	}
2363	if (rbm.rgd->rd_free < *nblocks) {
2364		pr_warn("nblocks=%u\n", *nblocks);
2365		goto rgrp_error;
2366	}
2367
2368	rbm.rgd->rd_free -= *nblocks;
2369	if (dinode) {
2370		rbm.rgd->rd_dinodes++;
2371		*generation = rbm.rgd->rd_igeneration++;
2372		if (*generation == 0)
2373			*generation = rbm.rgd->rd_igeneration++;
2374	}
2375
2376	gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2377	gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2378	gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2379
2380	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2381	if (dinode)
2382		gfs2_trans_add_unrevoke(sdp, block, *nblocks);
2383
2384	gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2385
2386	rbm.rgd->rd_free_clone -= *nblocks;
2387	trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2388			       dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2389	*bn = block;
2390	return 0;
2391
2392rgrp_error:
2393	gfs2_rgrp_error(rbm.rgd);
2394	return -EIO;
2395}
2396
2397/**
2398 * __gfs2_free_blocks - free a contiguous run of block(s)
2399 * @ip: the inode these blocks are being freed from
2400 * @bstart: first block of a run of contiguous blocks
2401 * @blen: the length of the block run
2402 * @meta: 1 if the blocks represent metadata
2403 *
2404 */
2405
2406void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2407{
2408	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2409	struct gfs2_rgrpd *rgd;
2410
2411	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
2412	if (!rgd)
2413		return;
2414	trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2415	rgd->rd_free += blen;
2416	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2417	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2418	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2419	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2420
2421	/* Directories keep their data in the metadata address space */
2422	if (meta || ip->i_depth)
2423		gfs2_meta_wipe(ip, bstart, blen);
2424}
2425
2426/**
2427 * gfs2_free_meta - free a contiguous run of data block(s)
2428 * @ip: the inode these blocks are being freed from
2429 * @bstart: first block of a run of contiguous blocks
2430 * @blen: the length of the block run
2431 *
2432 */
2433
2434void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
2435{
2436	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2437
2438	__gfs2_free_blocks(ip, bstart, blen, 1);
2439	gfs2_statfs_change(sdp, 0, +blen, 0);
2440	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2441}
2442
2443void gfs2_unlink_di(struct inode *inode)
2444{
2445	struct gfs2_inode *ip = GFS2_I(inode);
2446	struct gfs2_sbd *sdp = GFS2_SB(inode);
2447	struct gfs2_rgrpd *rgd;
2448	u64 blkno = ip->i_no_addr;
2449
2450	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
2451	if (!rgd)
2452		return;
2453	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2454	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2455	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2456	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2457	update_rgrp_lvb_unlinked(rgd, 1);
2458}
2459
2460static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2461{
2462	struct gfs2_sbd *sdp = rgd->rd_sbd;
2463	struct gfs2_rgrpd *tmp_rgd;
2464
2465	tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
2466	if (!tmp_rgd)
2467		return;
2468	gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
2469
2470	if (!rgd->rd_dinodes)
2471		gfs2_consist_rgrpd(rgd);
2472	rgd->rd_dinodes--;
2473	rgd->rd_free++;
2474
2475	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2476	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2477	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2478	update_rgrp_lvb_unlinked(rgd, -1);
2479
2480	gfs2_statfs_change(sdp, 0, +1, -1);
2481}
2482
2483
2484void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2485{
2486	gfs2_free_uninit_di(rgd, ip->i_no_addr);
2487	trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2488	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2489	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2490}
2491
2492/**
2493 * gfs2_check_blk_type - Check the type of a block
2494 * @sdp: The superblock
2495 * @no_addr: The block number to check
2496 * @type: The block type we are looking for
2497 *
2498 * Returns: 0 if the block type matches the expected type
2499 *          -ESTALE if it doesn't match
2500 *          or -ve errno if something went wrong while checking
2501 */
2502
2503int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2504{
2505	struct gfs2_rgrpd *rgd;
2506	struct gfs2_holder rgd_gh;
2507	int error = -EINVAL;
2508
2509	rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2510	if (!rgd)
2511		goto fail;
2512
2513	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2514	if (error)
2515		goto fail;
2516
2517	if (gfs2_get_block_type(rgd, no_addr) != type)
2518		error = -ESTALE;
2519
2520	gfs2_glock_dq_uninit(&rgd_gh);
2521fail:
2522	return error;
2523}
2524
2525/**
2526 * gfs2_rlist_add - add a RG to a list of RGs
2527 * @ip: the inode
2528 * @rlist: the list of resource groups
2529 * @block: the block
2530 *
2531 * Figure out what RG a block belongs to and add that RG to the list
2532 *
2533 * FIXME: Don't use NOFAIL
2534 *
2535 */
2536
2537void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2538		    u64 block)
2539{
2540	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2541	struct gfs2_rgrpd *rgd;
2542	struct gfs2_rgrpd **tmp;
2543	unsigned int new_space;
2544	unsigned int x;
2545
2546	if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2547		return;
2548
2549	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
2550		rgd = ip->i_rgd;
2551	else
2552		rgd = gfs2_blk2rgrpd(sdp, block, 1);
2553	if (!rgd) {
2554		fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
2555		return;
2556	}
2557	ip->i_rgd = rgd;
2558
2559	for (x = 0; x < rlist->rl_rgrps; x++)
2560		if (rlist->rl_rgd[x] == rgd)
2561			return;
2562
2563	if (rlist->rl_rgrps == rlist->rl_space) {
2564		new_space = rlist->rl_space + 10;
2565
2566		tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2567			      GFP_NOFS | __GFP_NOFAIL);
2568
2569		if (rlist->rl_rgd) {
2570			memcpy(tmp, rlist->rl_rgd,
2571			       rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2572			kfree(rlist->rl_rgd);
2573		}
2574
2575		rlist->rl_space = new_space;
2576		rlist->rl_rgd = tmp;
2577	}
2578
2579	rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2580}
2581
2582/**
2583 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2584 *      and initialize an array of glock holders for them
2585 * @rlist: the list of resource groups
2586 * @state: the lock state to acquire the RG lock in
2587 *
2588 * FIXME: Don't use NOFAIL
2589 *
2590 */
2591
2592void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
2593{
2594	unsigned int x;
2595
2596	rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
2597				GFP_NOFS | __GFP_NOFAIL);
2598	for (x = 0; x < rlist->rl_rgrps; x++)
2599		gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2600				state, 0,
2601				&rlist->rl_ghs[x]);
2602}
2603
2604/**
2605 * gfs2_rlist_free - free a resource group list
2606 * @rlist: the list of resource groups
2607 *
2608 */
2609
2610void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2611{
2612	unsigned int x;
2613
2614	kfree(rlist->rl_rgd);
2615
2616	if (rlist->rl_ghs) {
2617		for (x = 0; x < rlist->rl_rgrps; x++)
2618			gfs2_holder_uninit(&rlist->rl_ghs[x]);
2619		kfree(rlist->rl_ghs);
2620		rlist->rl_ghs = NULL;
2621	}
2622}
2623
2624