root/fs/ext4/mmp.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ext4_mmp_csum
  2. ext4_mmp_csum_verify
  3. ext4_mmp_csum_set
  4. write_mmp_block
  5. read_mmp_block
  6. __dump_mmp_msg
  7. kmmpd
  8. mmp_new_seq
  9. ext4_multi_mount_protect

   1 // SPDX-License-Identifier: GPL-2.0
   2 #include <linux/fs.h>
   3 #include <linux/random.h>
   4 #include <linux/buffer_head.h>
   5 #include <linux/utsname.h>
   6 #include <linux/kthread.h>
   7 
   8 #include "ext4.h"
   9 
  10 /* Checksumming functions */
  11 static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
  12 {
  13         struct ext4_sb_info *sbi = EXT4_SB(sb);
  14         int offset = offsetof(struct mmp_struct, mmp_checksum);
  15         __u32 csum;
  16 
  17         csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
  18 
  19         return cpu_to_le32(csum);
  20 }
  21 
  22 static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
  23 {
  24         if (!ext4_has_metadata_csum(sb))
  25                 return 1;
  26 
  27         return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
  28 }
  29 
  30 static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
  31 {
  32         if (!ext4_has_metadata_csum(sb))
  33                 return;
  34 
  35         mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
  36 }
  37 
  38 /*
  39  * Write the MMP block using REQ_SYNC to try to get the block on-disk
  40  * faster.
  41  */
  42 static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
  43 {
  44         struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
  45 
  46         /*
  47          * We protect against freezing so that we don't create dirty buffers
  48          * on frozen filesystem.
  49          */
  50         sb_start_write(sb);
  51         ext4_mmp_csum_set(sb, mmp);
  52         lock_buffer(bh);
  53         bh->b_end_io = end_buffer_write_sync;
  54         get_bh(bh);
  55         submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh);
  56         wait_on_buffer(bh);
  57         sb_end_write(sb);
  58         if (unlikely(!buffer_uptodate(bh)))
  59                 return 1;
  60 
  61         return 0;
  62 }
  63 
  64 /*
  65  * Read the MMP block. It _must_ be read from disk and hence we clear the
  66  * uptodate flag on the buffer.
  67  */
  68 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
  69                           ext4_fsblk_t mmp_block)
  70 {
  71         struct mmp_struct *mmp;
  72         int ret;
  73 
  74         if (*bh)
  75                 clear_buffer_uptodate(*bh);
  76 
  77         /* This would be sb_bread(sb, mmp_block), except we need to be sure
  78          * that the MD RAID device cache has been bypassed, and that the read
  79          * is not blocked in the elevator. */
  80         if (!*bh) {
  81                 *bh = sb_getblk(sb, mmp_block);
  82                 if (!*bh) {
  83                         ret = -ENOMEM;
  84                         goto warn_exit;
  85                 }
  86         }
  87 
  88         get_bh(*bh);
  89         lock_buffer(*bh);
  90         (*bh)->b_end_io = end_buffer_read_sync;
  91         submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, *bh);
  92         wait_on_buffer(*bh);
  93         if (!buffer_uptodate(*bh)) {
  94                 ret = -EIO;
  95                 goto warn_exit;
  96         }
  97         mmp = (struct mmp_struct *)((*bh)->b_data);
  98         if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) {
  99                 ret = -EFSCORRUPTED;
 100                 goto warn_exit;
 101         }
 102         if (!ext4_mmp_csum_verify(sb, mmp)) {
 103                 ret = -EFSBADCRC;
 104                 goto warn_exit;
 105         }
 106         return 0;
 107 warn_exit:
 108         brelse(*bh);
 109         *bh = NULL;
 110         ext4_warning(sb, "Error %d while reading MMP block %llu",
 111                      ret, mmp_block);
 112         return ret;
 113 }
 114 
 115 /*
 116  * Dump as much information as possible to help the admin.
 117  */
 118 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
 119                     const char *function, unsigned int line, const char *msg)
 120 {
 121         __ext4_warning(sb, function, line, "%s", msg);
 122         __ext4_warning(sb, function, line,
 123                        "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s",
 124                        (unsigned long long)le64_to_cpu(mmp->mmp_time),
 125                        (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename,
 126                        (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname);
 127 }
 128 
 129 /*
 130  * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
 131  */
 132 static int kmmpd(void *data)
 133 {
 134         struct super_block *sb = ((struct mmpd_data *) data)->sb;
 135         struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
 136         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 137         struct mmp_struct *mmp;
 138         ext4_fsblk_t mmp_block;
 139         u32 seq = 0;
 140         unsigned long failed_writes = 0;
 141         int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
 142         unsigned mmp_check_interval;
 143         unsigned long last_update_time;
 144         unsigned long diff;
 145         int retval;
 146 
 147         mmp_block = le64_to_cpu(es->s_mmp_block);
 148         mmp = (struct mmp_struct *)(bh->b_data);
 149         mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
 150         /*
 151          * Start with the higher mmp_check_interval and reduce it if
 152          * the MMP block is being updated on time.
 153          */
 154         mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
 155                                  EXT4_MMP_MIN_CHECK_INTERVAL);
 156         mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 157         BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE);
 158         bdevname(bh->b_bdev, mmp->mmp_bdevname);
 159 
 160         memcpy(mmp->mmp_nodename, init_utsname()->nodename,
 161                sizeof(mmp->mmp_nodename));
 162 
 163         while (!kthread_should_stop()) {
 164                 if (++seq > EXT4_MMP_SEQ_MAX)
 165                         seq = 1;
 166 
 167                 mmp->mmp_seq = cpu_to_le32(seq);
 168                 mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
 169                 last_update_time = jiffies;
 170 
 171                 retval = write_mmp_block(sb, bh);
 172                 /*
 173                  * Don't spew too many error messages. Print one every
 174                  * (s_mmp_update_interval * 60) seconds.
 175                  */
 176                 if (retval) {
 177                         if ((failed_writes % 60) == 0)
 178                                 ext4_error(sb, "Error writing to MMP block");
 179                         failed_writes++;
 180                 }
 181 
 182                 if (!(le32_to_cpu(es->s_feature_incompat) &
 183                     EXT4_FEATURE_INCOMPAT_MMP)) {
 184                         ext4_warning(sb, "kmmpd being stopped since MMP feature"
 185                                      " has been disabled.");
 186                         goto exit_thread;
 187                 }
 188 
 189                 if (sb_rdonly(sb))
 190                         break;
 191 
 192                 diff = jiffies - last_update_time;
 193                 if (diff < mmp_update_interval * HZ)
 194                         schedule_timeout_interruptible(mmp_update_interval *
 195                                                        HZ - diff);
 196 
 197                 /*
 198                  * We need to make sure that more than mmp_check_interval
 199                  * seconds have not passed since writing. If that has happened
 200                  * we need to check if the MMP block is as we left it.
 201                  */
 202                 diff = jiffies - last_update_time;
 203                 if (diff > mmp_check_interval * HZ) {
 204                         struct buffer_head *bh_check = NULL;
 205                         struct mmp_struct *mmp_check;
 206 
 207                         retval = read_mmp_block(sb, &bh_check, mmp_block);
 208                         if (retval) {
 209                                 ext4_error(sb, "error reading MMP data: %d",
 210                                            retval);
 211                                 goto exit_thread;
 212                         }
 213 
 214                         mmp_check = (struct mmp_struct *)(bh_check->b_data);
 215                         if (mmp->mmp_seq != mmp_check->mmp_seq ||
 216                             memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
 217                                    sizeof(mmp->mmp_nodename))) {
 218                                 dump_mmp_msg(sb, mmp_check,
 219                                              "Error while updating MMP info. "
 220                                              "The filesystem seems to have been"
 221                                              " multiply mounted.");
 222                                 ext4_error(sb, "abort");
 223                                 put_bh(bh_check);
 224                                 retval = -EBUSY;
 225                                 goto exit_thread;
 226                         }
 227                         put_bh(bh_check);
 228                 }
 229 
 230                  /*
 231                  * Adjust the mmp_check_interval depending on how much time
 232                  * it took for the MMP block to be written.
 233                  */
 234                 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
 235                                              EXT4_MMP_MAX_CHECK_INTERVAL),
 236                                          EXT4_MMP_MIN_CHECK_INTERVAL);
 237                 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 238         }
 239 
 240         /*
 241          * Unmount seems to be clean.
 242          */
 243         mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
 244         mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
 245 
 246         retval = write_mmp_block(sb, bh);
 247 
 248 exit_thread:
 249         EXT4_SB(sb)->s_mmp_tsk = NULL;
 250         kfree(data);
 251         brelse(bh);
 252         return retval;
 253 }
 254 
 255 /*
 256  * Get a random new sequence number but make sure it is not greater than
 257  * EXT4_MMP_SEQ_MAX.
 258  */
 259 static unsigned int mmp_new_seq(void)
 260 {
 261         u32 new_seq;
 262 
 263         do {
 264                 new_seq = prandom_u32();
 265         } while (new_seq > EXT4_MMP_SEQ_MAX);
 266 
 267         return new_seq;
 268 }
 269 
 270 /*
 271  * Protect the filesystem from being mounted more than once.
 272  */
 273 int ext4_multi_mount_protect(struct super_block *sb,
 274                                     ext4_fsblk_t mmp_block)
 275 {
 276         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 277         struct buffer_head *bh = NULL;
 278         struct mmp_struct *mmp = NULL;
 279         struct mmpd_data *mmpd_data;
 280         u32 seq;
 281         unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
 282         unsigned int wait_time = 0;
 283         int retval;
 284 
 285         if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
 286             mmp_block >= ext4_blocks_count(es)) {
 287                 ext4_warning(sb, "Invalid MMP block in superblock");
 288                 goto failed;
 289         }
 290 
 291         retval = read_mmp_block(sb, &bh, mmp_block);
 292         if (retval)
 293                 goto failed;
 294 
 295         mmp = (struct mmp_struct *)(bh->b_data);
 296 
 297         if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
 298                 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
 299 
 300         /*
 301          * If check_interval in MMP block is larger, use that instead of
 302          * update_interval from the superblock.
 303          */
 304         if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
 305                 mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
 306 
 307         seq = le32_to_cpu(mmp->mmp_seq);
 308         if (seq == EXT4_MMP_SEQ_CLEAN)
 309                 goto skip;
 310 
 311         if (seq == EXT4_MMP_SEQ_FSCK) {
 312                 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
 313                 goto failed;
 314         }
 315 
 316         wait_time = min(mmp_check_interval * 2 + 1,
 317                         mmp_check_interval + 60);
 318 
 319         /* Print MMP interval if more than 20 secs. */
 320         if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
 321                 ext4_warning(sb, "MMP interval %u higher than expected, please"
 322                              " wait.\n", wait_time * 2);
 323 
 324         if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 325                 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
 326                 goto failed;
 327         }
 328 
 329         retval = read_mmp_block(sb, &bh, mmp_block);
 330         if (retval)
 331                 goto failed;
 332         mmp = (struct mmp_struct *)(bh->b_data);
 333         if (seq != le32_to_cpu(mmp->mmp_seq)) {
 334                 dump_mmp_msg(sb, mmp,
 335                              "Device is already active on another node.");
 336                 goto failed;
 337         }
 338 
 339 skip:
 340         /*
 341          * write a new random sequence number.
 342          */
 343         seq = mmp_new_seq();
 344         mmp->mmp_seq = cpu_to_le32(seq);
 345 
 346         retval = write_mmp_block(sb, bh);
 347         if (retval)
 348                 goto failed;
 349 
 350         /*
 351          * wait for MMP interval and check mmp_seq.
 352          */
 353         if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 354                 ext4_warning(sb, "MMP startup interrupted, failing mount");
 355                 goto failed;
 356         }
 357 
 358         retval = read_mmp_block(sb, &bh, mmp_block);
 359         if (retval)
 360                 goto failed;
 361         mmp = (struct mmp_struct *)(bh->b_data);
 362         if (seq != le32_to_cpu(mmp->mmp_seq)) {
 363                 dump_mmp_msg(sb, mmp,
 364                              "Device is already active on another node.");
 365                 goto failed;
 366         }
 367 
 368         mmpd_data = kmalloc(sizeof(*mmpd_data), GFP_KERNEL);
 369         if (!mmpd_data) {
 370                 ext4_warning(sb, "not enough memory for mmpd_data");
 371                 goto failed;
 372         }
 373         mmpd_data->sb = sb;
 374         mmpd_data->bh = bh;
 375 
 376         /*
 377          * Start a kernel thread to update the MMP block periodically.
 378          */
 379         EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%.*s",
 380                                              (int)sizeof(mmp->mmp_bdevname),
 381                                              bdevname(bh->b_bdev,
 382                                                       mmp->mmp_bdevname));
 383         if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
 384                 EXT4_SB(sb)->s_mmp_tsk = NULL;
 385                 kfree(mmpd_data);
 386                 ext4_warning(sb, "Unable to create kmmpd thread for %s.",
 387                              sb->s_id);
 388                 goto failed;
 389         }
 390 
 391         return 0;
 392 
 393 failed:
 394         brelse(bh);
 395         return 1;
 396 }
 397 
 398 

/* [<][>][^][v][top][bottom][index][help] */