1#include <linux/fs.h>
2#include <linux/random.h>
3#include <linux/buffer_head.h>
4#include <linux/utsname.h>
5#include <linux/kthread.h>
6
7#include "ext4.h"
8
9/* Checksumming functions */
10static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
11{
12	struct ext4_sb_info *sbi = EXT4_SB(sb);
13	int offset = offsetof(struct mmp_struct, mmp_checksum);
14	__u32 csum;
15
16	csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
17
18	return cpu_to_le32(csum);
19}
20
21static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
22{
23	if (!ext4_has_metadata_csum(sb))
24		return 1;
25
26	return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
27}
28
29static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
30{
31	if (!ext4_has_metadata_csum(sb))
32		return;
33
34	mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
35}
36
37/*
38 * Write the MMP block using WRITE_SYNC to try to get the block on-disk
39 * faster.
40 */
41static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
42{
43	struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
44
45	/*
46	 * We protect against freezing so that we don't create dirty buffers
47	 * on frozen filesystem.
48	 */
49	sb_start_write(sb);
50	ext4_mmp_csum_set(sb, mmp);
51	mark_buffer_dirty(bh);
52	lock_buffer(bh);
53	bh->b_end_io = end_buffer_write_sync;
54	get_bh(bh);
55	submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
56	wait_on_buffer(bh);
57	sb_end_write(sb);
58	if (unlikely(!buffer_uptodate(bh)))
59		return 1;
60
61	return 0;
62}
63
64/*
65 * Read the MMP block. It _must_ be read from disk and hence we clear the
66 * uptodate flag on the buffer.
67 */
68static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
69			  ext4_fsblk_t mmp_block)
70{
71	struct mmp_struct *mmp;
72
73	if (*bh)
74		clear_buffer_uptodate(*bh);
75
76	/* This would be sb_bread(sb, mmp_block), except we need to be sure
77	 * that the MD RAID device cache has been bypassed, and that the read
78	 * is not blocked in the elevator. */
79	if (!*bh)
80		*bh = sb_getblk(sb, mmp_block);
81	if (!*bh)
82		return -ENOMEM;
83	if (*bh) {
84		get_bh(*bh);
85		lock_buffer(*bh);
86		(*bh)->b_end_io = end_buffer_read_sync;
87		submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
88		wait_on_buffer(*bh);
89		if (!buffer_uptodate(*bh)) {
90			brelse(*bh);
91			*bh = NULL;
92		}
93	}
94	if (unlikely(!*bh)) {
95		ext4_warning(sb, "Error while reading MMP block %llu",
96			     mmp_block);
97		return -EIO;
98	}
99
100	mmp = (struct mmp_struct *)((*bh)->b_data);
101	if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
102	    !ext4_mmp_csum_verify(sb, mmp))
103		return -EINVAL;
104
105	return 0;
106}
107
108/*
109 * Dump as much information as possible to help the admin.
110 */
111void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
112		    const char *function, unsigned int line, const char *msg)
113{
114	__ext4_warning(sb, function, line, msg);
115	__ext4_warning(sb, function, line,
116		       "MMP failure info: last update time: %llu, last update "
117		       "node: %s, last update device: %s\n",
118		       (long long unsigned int) le64_to_cpu(mmp->mmp_time),
119		       mmp->mmp_nodename, mmp->mmp_bdevname);
120}
121
122/*
123 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
124 */
125static int kmmpd(void *data)
126{
127	struct super_block *sb = ((struct mmpd_data *) data)->sb;
128	struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
129	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
130	struct mmp_struct *mmp;
131	ext4_fsblk_t mmp_block;
132	u32 seq = 0;
133	unsigned long failed_writes = 0;
134	int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
135	unsigned mmp_check_interval;
136	unsigned long last_update_time;
137	unsigned long diff;
138	int retval;
139
140	mmp_block = le64_to_cpu(es->s_mmp_block);
141	mmp = (struct mmp_struct *)(bh->b_data);
142	mmp->mmp_time = cpu_to_le64(get_seconds());
143	/*
144	 * Start with the higher mmp_check_interval and reduce it if
145	 * the MMP block is being updated on time.
146	 */
147	mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
148				 EXT4_MMP_MIN_CHECK_INTERVAL);
149	mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
150	bdevname(bh->b_bdev, mmp->mmp_bdevname);
151
152	memcpy(mmp->mmp_nodename, init_utsname()->nodename,
153	       sizeof(mmp->mmp_nodename));
154
155	while (!kthread_should_stop()) {
156		if (++seq > EXT4_MMP_SEQ_MAX)
157			seq = 1;
158
159		mmp->mmp_seq = cpu_to_le32(seq);
160		mmp->mmp_time = cpu_to_le64(get_seconds());
161		last_update_time = jiffies;
162
163		retval = write_mmp_block(sb, bh);
164		/*
165		 * Don't spew too many error messages. Print one every
166		 * (s_mmp_update_interval * 60) seconds.
167		 */
168		if (retval) {
169			if ((failed_writes % 60) == 0)
170				ext4_error(sb, "Error writing to MMP block");
171			failed_writes++;
172		}
173
174		if (!(le32_to_cpu(es->s_feature_incompat) &
175		    EXT4_FEATURE_INCOMPAT_MMP)) {
176			ext4_warning(sb, "kmmpd being stopped since MMP feature"
177				     " has been disabled.");
178			EXT4_SB(sb)->s_mmp_tsk = NULL;
179			goto failed;
180		}
181
182		if (sb->s_flags & MS_RDONLY) {
183			ext4_warning(sb, "kmmpd being stopped since filesystem "
184				     "has been remounted as readonly.");
185			EXT4_SB(sb)->s_mmp_tsk = NULL;
186			goto failed;
187		}
188
189		diff = jiffies - last_update_time;
190		if (diff < mmp_update_interval * HZ)
191			schedule_timeout_interruptible(mmp_update_interval *
192						       HZ - diff);
193
194		/*
195		 * We need to make sure that more than mmp_check_interval
196		 * seconds have not passed since writing. If that has happened
197		 * we need to check if the MMP block is as we left it.
198		 */
199		diff = jiffies - last_update_time;
200		if (diff > mmp_check_interval * HZ) {
201			struct buffer_head *bh_check = NULL;
202			struct mmp_struct *mmp_check;
203
204			retval = read_mmp_block(sb, &bh_check, mmp_block);
205			if (retval) {
206				ext4_error(sb, "error reading MMP data: %d",
207					   retval);
208
209				EXT4_SB(sb)->s_mmp_tsk = NULL;
210				goto failed;
211			}
212
213			mmp_check = (struct mmp_struct *)(bh_check->b_data);
214			if (mmp->mmp_seq != mmp_check->mmp_seq ||
215			    memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
216				   sizeof(mmp->mmp_nodename))) {
217				dump_mmp_msg(sb, mmp_check,
218					     "Error while updating MMP info. "
219					     "The filesystem seems to have been"
220					     " multiply mounted.");
221				ext4_error(sb, "abort");
222				goto failed;
223			}
224			put_bh(bh_check);
225		}
226
227		 /*
228		 * Adjust the mmp_check_interval depending on how much time
229		 * it took for the MMP block to be written.
230		 */
231		mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
232					     EXT4_MMP_MAX_CHECK_INTERVAL),
233					 EXT4_MMP_MIN_CHECK_INTERVAL);
234		mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
235	}
236
237	/*
238	 * Unmount seems to be clean.
239	 */
240	mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
241	mmp->mmp_time = cpu_to_le64(get_seconds());
242
243	retval = write_mmp_block(sb, bh);
244
245failed:
246	kfree(data);
247	brelse(bh);
248	return retval;
249}
250
251/*
252 * Get a random new sequence number but make sure it is not greater than
253 * EXT4_MMP_SEQ_MAX.
254 */
255static unsigned int mmp_new_seq(void)
256{
257	u32 new_seq;
258
259	do {
260		new_seq = prandom_u32();
261	} while (new_seq > EXT4_MMP_SEQ_MAX);
262
263	return new_seq;
264}
265
266/*
267 * Protect the filesystem from being mounted more than once.
268 */
269int ext4_multi_mount_protect(struct super_block *sb,
270				    ext4_fsblk_t mmp_block)
271{
272	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
273	struct buffer_head *bh = NULL;
274	struct mmp_struct *mmp = NULL;
275	struct mmpd_data *mmpd_data;
276	u32 seq;
277	unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
278	unsigned int wait_time = 0;
279	int retval;
280
281	if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
282	    mmp_block >= ext4_blocks_count(es)) {
283		ext4_warning(sb, "Invalid MMP block in superblock");
284		goto failed;
285	}
286
287	retval = read_mmp_block(sb, &bh, mmp_block);
288	if (retval)
289		goto failed;
290
291	mmp = (struct mmp_struct *)(bh->b_data);
292
293	if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
294		mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
295
296	/*
297	 * If check_interval in MMP block is larger, use that instead of
298	 * update_interval from the superblock.
299	 */
300	if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
301		mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
302
303	seq = le32_to_cpu(mmp->mmp_seq);
304	if (seq == EXT4_MMP_SEQ_CLEAN)
305		goto skip;
306
307	if (seq == EXT4_MMP_SEQ_FSCK) {
308		dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
309		goto failed;
310	}
311
312	wait_time = min(mmp_check_interval * 2 + 1,
313			mmp_check_interval + 60);
314
315	/* Print MMP interval if more than 20 secs. */
316	if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
317		ext4_warning(sb, "MMP interval %u higher than expected, please"
318			     " wait.\n", wait_time * 2);
319
320	if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
321		ext4_warning(sb, "MMP startup interrupted, failing mount\n");
322		goto failed;
323	}
324
325	retval = read_mmp_block(sb, &bh, mmp_block);
326	if (retval)
327		goto failed;
328	mmp = (struct mmp_struct *)(bh->b_data);
329	if (seq != le32_to_cpu(mmp->mmp_seq)) {
330		dump_mmp_msg(sb, mmp,
331			     "Device is already active on another node.");
332		goto failed;
333	}
334
335skip:
336	/*
337	 * write a new random sequence number.
338	 */
339	seq = mmp_new_seq();
340	mmp->mmp_seq = cpu_to_le32(seq);
341
342	retval = write_mmp_block(sb, bh);
343	if (retval)
344		goto failed;
345
346	/*
347	 * wait for MMP interval and check mmp_seq.
348	 */
349	if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
350		ext4_warning(sb, "MMP startup interrupted, failing mount\n");
351		goto failed;
352	}
353
354	retval = read_mmp_block(sb, &bh, mmp_block);
355	if (retval)
356		goto failed;
357	mmp = (struct mmp_struct *)(bh->b_data);
358	if (seq != le32_to_cpu(mmp->mmp_seq)) {
359		dump_mmp_msg(sb, mmp,
360			     "Device is already active on another node.");
361		goto failed;
362	}
363
364	mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
365	if (!mmpd_data) {
366		ext4_warning(sb, "not enough memory for mmpd_data");
367		goto failed;
368	}
369	mmpd_data->sb = sb;
370	mmpd_data->bh = bh;
371
372	/*
373	 * Start a kernel thread to update the MMP block periodically.
374	 */
375	EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
376					     bdevname(bh->b_bdev,
377						      mmp->mmp_bdevname));
378	if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
379		EXT4_SB(sb)->s_mmp_tsk = NULL;
380		kfree(mmpd_data);
381		ext4_warning(sb, "Unable to create kmmpd thread for %s.",
382			     sb->s_id);
383		goto failed;
384	}
385
386	return 0;
387
388failed:
389	brelse(bh);
390	return 1;
391}
392
393
394