root/fs/jfs/jfs_logmgr.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. write_special_inodes
  2. lmLog
  3. lmWriteRecord
  4. lmNextPage
  5. lmGroupCommit
  6. lmGCwrite
  7. lmPostGC
  8. lmLogSync
  9. jfs_syncpt
  10. lmLogOpen
  11. open_inline_log
  12. open_dummy_log
  13. lmLogInit
  14. lmLogClose
  15. jfs_flush_journal
  16. lmLogShutdown
  17. lmLogFileSystem
  18. lbmLogInit
  19. lbmLogShutdown
  20. lbmAllocate
  21. lbmFree
  22. lbmfree
  23. lbmRedrive
  24. lbmRead
  25. lbmWrite
  26. lbmDirectWrite
  27. lbmStartIO
  28. lbmIOWait
  29. lbmIODone
  30. jfsIOWait
  31. lmLogFormat
  32. jfs_lmstats_proc_show

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  *   Copyright (C) International Business Machines Corp., 2000-2004
   4  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   5  */
   6 
   7 /*
   8  *      jfs_logmgr.c: log manager
   9  *
  10  * for related information, see transaction manager (jfs_txnmgr.c), and
  11  * recovery manager (jfs_logredo.c).
  12  *
  13  * note: for detail, RTFS.
  14  *
  15  *      log buffer manager:
  16  * special purpose buffer manager supporting log i/o requirements.
  17  * per log serial pageout of logpage
  18  * queuing i/o requests and redrive i/o at iodone
  19  * maintain current logpage buffer
  20  * no caching since append only
  21  * appropriate jfs buffer cache buffers as needed
  22  *
  23  *      group commit:
  24  * transactions which wrote COMMIT records in the same in-memory
  25  * log page during the pageout of previous/current log page(s) are
  26  * committed together by the pageout of the page.
  27  *
  28  *      TBD lazy commit:
  29  * transactions are committed asynchronously when the log page
  30  * containing it COMMIT is paged out when it becomes full;
  31  *
  32  *      serialization:
  33  * . a per log lock serialize log write.
  34  * . a per log lock serialize group commit.
  35  * . a per log lock serialize log open/close;
  36  *
  37  *      TBD log integrity:
  38  * careful-write (ping-pong) of last logpage to recover from crash
  39  * in overwrite.
  40  * detection of split (out-of-order) write of physical sectors
  41  * of last logpage via timestamp at end of each sector
  42  * with its mirror data array at trailer).
  43  *
  44  *      alternatives:
  45  * lsn - 64-bit monotonically increasing integer vs
  46  * 32-bit lspn and page eor.
  47  */
  48 
  49 #include <linux/fs.h>
  50 #include <linux/blkdev.h>
  51 #include <linux/interrupt.h>
  52 #include <linux/completion.h>
  53 #include <linux/kthread.h>
  54 #include <linux/buffer_head.h>          /* for sync_blockdev() */
  55 #include <linux/bio.h>
  56 #include <linux/freezer.h>
  57 #include <linux/export.h>
  58 #include <linux/delay.h>
  59 #include <linux/mutex.h>
  60 #include <linux/seq_file.h>
  61 #include <linux/slab.h>
  62 #include "jfs_incore.h"
  63 #include "jfs_filsys.h"
  64 #include "jfs_metapage.h"
  65 #include "jfs_superblock.h"
  66 #include "jfs_txnmgr.h"
  67 #include "jfs_debug.h"
  68 
  69 
  70 /*
  71  * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
  72  */
  73 static struct lbuf *log_redrive_list;
  74 static DEFINE_SPINLOCK(log_redrive_lock);
  75 
  76 
  77 /*
  78  *      log read/write serialization (per log)
  79  */
  80 #define LOG_LOCK_INIT(log)      mutex_init(&(log)->loglock)
  81 #define LOG_LOCK(log)           mutex_lock(&((log)->loglock))
  82 #define LOG_UNLOCK(log)         mutex_unlock(&((log)->loglock))
  83 
  84 
  85 /*
  86  *      log group commit serialization (per log)
  87  */
  88 
  89 #define LOGGC_LOCK_INIT(log)    spin_lock_init(&(log)->gclock)
  90 #define LOGGC_LOCK(log)         spin_lock_irq(&(log)->gclock)
  91 #define LOGGC_UNLOCK(log)       spin_unlock_irq(&(log)->gclock)
  92 #define LOGGC_WAKEUP(tblk)      wake_up_all(&(tblk)->gcwait)
  93 
  94 /*
  95  *      log sync serialization (per log)
  96  */
  97 #define LOGSYNC_DELTA(logsize)          min((logsize)/8, 128*LOGPSIZE)
  98 #define LOGSYNC_BARRIER(logsize)        ((logsize)/4)
  99 /*
 100 #define LOGSYNC_DELTA(logsize)          min((logsize)/4, 256*LOGPSIZE)
 101 #define LOGSYNC_BARRIER(logsize)        ((logsize)/2)
 102 */
 103 
 104 
 105 /*
 106  *      log buffer cache synchronization
 107  */
 108 static DEFINE_SPINLOCK(jfsLCacheLock);
 109 
 110 #define LCACHE_LOCK(flags)      spin_lock_irqsave(&jfsLCacheLock, flags)
 111 #define LCACHE_UNLOCK(flags)    spin_unlock_irqrestore(&jfsLCacheLock, flags)
 112 
 113 /*
 114  * See __SLEEP_COND in jfs_locks.h
 115  */
 116 #define LCACHE_SLEEP_COND(wq, cond, flags)      \
 117 do {                                            \
 118         if (cond)                               \
 119                 break;                          \
 120         __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
 121 } while (0)
 122 
 123 #define LCACHE_WAKEUP(event)    wake_up(event)
 124 
 125 
 126 /*
 127  *      lbuf buffer cache (lCache) control
 128  */
 129 /* log buffer manager pageout control (cumulative, inclusive) */
 130 #define lbmREAD         0x0001
 131 #define lbmWRITE        0x0002  /* enqueue at tail of write queue;
 132                                  * init pageout if at head of queue;
 133                                  */
 134 #define lbmRELEASE      0x0004  /* remove from write queue
 135                                  * at completion of pageout;
 136                                  * do not free/recycle it yet:
 137                                  * caller will free it;
 138                                  */
 139 #define lbmSYNC         0x0008  /* do not return to freelist
 140                                  * when removed from write queue;
 141                                  */
 142 #define lbmFREE         0x0010  /* return to freelist
 143                                  * at completion of pageout;
 144                                  * the buffer may be recycled;
 145                                  */
 146 #define lbmDONE         0x0020
 147 #define lbmERROR        0x0040
 148 #define lbmGC           0x0080  /* lbmIODone to perform post-GC processing
 149                                  * of log page
 150                                  */
 151 #define lbmDIRECT       0x0100
 152 
 153 /*
 154  * Global list of active external journals
 155  */
 156 static LIST_HEAD(jfs_external_logs);
 157 static struct jfs_log *dummy_log;
 158 static DEFINE_MUTEX(jfs_log_mutex);
 159 
 160 /*
 161  * forward references
 162  */
 163 static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
 164                          struct lrd * lrd, struct tlock * tlck);
 165 
 166 static int lmNextPage(struct jfs_log * log);
 167 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
 168                            int activate);
 169 
 170 static int open_inline_log(struct super_block *sb);
 171 static int open_dummy_log(struct super_block *sb);
 172 static int lbmLogInit(struct jfs_log * log);
 173 static void lbmLogShutdown(struct jfs_log * log);
 174 static struct lbuf *lbmAllocate(struct jfs_log * log, int);
 175 static void lbmFree(struct lbuf * bp);
 176 static void lbmfree(struct lbuf * bp);
 177 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
 178 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
 179 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
 180 static int lbmIOWait(struct lbuf * bp, int flag);
 181 static bio_end_io_t lbmIODone;
 182 static void lbmStartIO(struct lbuf * bp);
 183 static void lmGCwrite(struct jfs_log * log, int cant_block);
 184 static int lmLogSync(struct jfs_log * log, int hard_sync);
 185 
 186 
 187 
 188 /*
 189  *      statistics
 190  */
 191 #ifdef CONFIG_JFS_STATISTICS
 192 static struct lmStat {
 193         uint commit;            /* # of commit */
 194         uint pagedone;          /* # of page written */
 195         uint submitted;         /* # of pages submitted */
 196         uint full_page;         /* # of full pages submitted */
 197         uint partial_page;      /* # of partial pages submitted */
 198 } lmStat;
 199 #endif
 200 
 201 static void write_special_inodes(struct jfs_log *log,
 202                                  int (*writer)(struct address_space *))
 203 {
 204         struct jfs_sb_info *sbi;
 205 
 206         list_for_each_entry(sbi, &log->sb_list, log_list) {
 207                 writer(sbi->ipbmap->i_mapping);
 208                 writer(sbi->ipimap->i_mapping);
 209                 writer(sbi->direct_inode->i_mapping);
 210         }
 211 }
 212 
 213 /*
 214  * NAME:        lmLog()
 215  *
 216  * FUNCTION:    write a log record;
 217  *
 218  * PARAMETER:
 219  *
 220  * RETURN:      lsn - offset to the next log record to write (end-of-log);
 221  *              -1  - error;
 222  *
 223  * note: todo: log error handler
 224  */
 225 int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 226           struct tlock * tlck)
 227 {
 228         int lsn;
 229         int diffp, difft;
 230         struct metapage *mp = NULL;
 231         unsigned long flags;
 232 
 233         jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 234                  log, tblk, lrd, tlck);
 235 
 236         LOG_LOCK(log);
 237 
 238         /* log by (out-of-transaction) JFS ? */
 239         if (tblk == NULL)
 240                 goto writeRecord;
 241 
 242         /* log from page ? */
 243         if (tlck == NULL ||
 244             tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
 245                 goto writeRecord;
 246 
 247         /*
 248          *      initialize/update page/transaction recovery lsn
 249          */
 250         lsn = log->lsn;
 251 
 252         LOGSYNC_LOCK(log, flags);
 253 
 254         /*
 255          * initialize page lsn if first log write of the page
 256          */
 257         if (mp->lsn == 0) {
 258                 mp->log = log;
 259                 mp->lsn = lsn;
 260                 log->count++;
 261 
 262                 /* insert page at tail of logsynclist */
 263                 list_add_tail(&mp->synclist, &log->synclist);
 264         }
 265 
 266         /*
 267          *      initialize/update lsn of tblock of the page
 268          *
 269          * transaction inherits oldest lsn of pages associated
 270          * with allocation/deallocation of resources (their
 271          * log records are used to reconstruct allocation map
 272          * at recovery time: inode for inode allocation map,
 273          * B+-tree index of extent descriptors for block
 274          * allocation map);
 275          * allocation map pages inherit transaction lsn at
 276          * commit time to allow forwarding log syncpt past log
 277          * records associated with allocation/deallocation of
 278          * resources only after persistent map of these map pages
 279          * have been updated and propagated to home.
 280          */
 281         /*
 282          * initialize transaction lsn:
 283          */
 284         if (tblk->lsn == 0) {
 285                 /* inherit lsn of its first page logged */
 286                 tblk->lsn = mp->lsn;
 287                 log->count++;
 288 
 289                 /* insert tblock after the page on logsynclist */
 290                 list_add(&tblk->synclist, &mp->synclist);
 291         }
 292         /*
 293          * update transaction lsn:
 294          */
 295         else {
 296                 /* inherit oldest/smallest lsn of page */
 297                 logdiff(diffp, mp->lsn, log);
 298                 logdiff(difft, tblk->lsn, log);
 299                 if (diffp < difft) {
 300                         /* update tblock lsn with page lsn */
 301                         tblk->lsn = mp->lsn;
 302 
 303                         /* move tblock after page on logsynclist */
 304                         list_move(&tblk->synclist, &mp->synclist);
 305                 }
 306         }
 307 
 308         LOGSYNC_UNLOCK(log, flags);
 309 
 310         /*
 311          *      write the log record
 312          */
 313       writeRecord:
 314         lsn = lmWriteRecord(log, tblk, lrd, tlck);
 315 
 316         /*
 317          * forward log syncpt if log reached next syncpt trigger
 318          */
 319         logdiff(diffp, lsn, log);
 320         if (diffp >= log->nextsync)
 321                 lsn = lmLogSync(log, 0);
 322 
 323         /* update end-of-log lsn */
 324         log->lsn = lsn;
 325 
 326         LOG_UNLOCK(log);
 327 
 328         /* return end-of-log address */
 329         return lsn;
 330 }
 331 
 332 /*
 333  * NAME:        lmWriteRecord()
 334  *
 335  * FUNCTION:    move the log record to current log page
 336  *
 337  * PARAMETER:   cd      - commit descriptor
 338  *
 339  * RETURN:      end-of-log address
 340  *
 341  * serialization: LOG_LOCK() held on entry/exit
 342  */
 343 static int
 344 lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 345               struct tlock * tlck)
 346 {
 347         int lsn = 0;            /* end-of-log address */
 348         struct lbuf *bp;        /* dst log page buffer */
 349         struct logpage *lp;     /* dst log page */
 350         caddr_t dst;            /* destination address in log page */
 351         int dstoffset;          /* end-of-log offset in log page */
 352         int freespace;          /* free space in log page */
 353         caddr_t p;              /* src meta-data page */
 354         caddr_t src;
 355         int srclen;
 356         int nbytes;             /* number of bytes to move */
 357         int i;
 358         int len;
 359         struct linelock *linelock;
 360         struct lv *lv;
 361         struct lvd *lvd;
 362         int l2linesize;
 363 
 364         len = 0;
 365 
 366         /* retrieve destination log page to write */
 367         bp = (struct lbuf *) log->bp;
 368         lp = (struct logpage *) bp->l_ldata;
 369         dstoffset = log->eor;
 370 
 371         /* any log data to write ? */
 372         if (tlck == NULL)
 373                 goto moveLrd;
 374 
 375         /*
 376          *      move log record data
 377          */
 378         /* retrieve source meta-data page to log */
 379         if (tlck->flag & tlckPAGELOCK) {
 380                 p = (caddr_t) (tlck->mp->data);
 381                 linelock = (struct linelock *) & tlck->lock;
 382         }
 383         /* retrieve source in-memory inode to log */
 384         else if (tlck->flag & tlckINODELOCK) {
 385                 if (tlck->type & tlckDTREE)
 386                         p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
 387                 else
 388                         p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
 389                 linelock = (struct linelock *) & tlck->lock;
 390         }
 391 #ifdef  _JFS_WIP
 392         else if (tlck->flag & tlckINLINELOCK) {
 393 
 394                 inlinelock = (struct inlinelock *) & tlck;
 395                 p = (caddr_t) & inlinelock->pxd;
 396                 linelock = (struct linelock *) & tlck;
 397         }
 398 #endif                          /* _JFS_WIP */
 399         else {
 400                 jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
 401                 return 0;       /* Probably should trap */
 402         }
 403         l2linesize = linelock->l2linesize;
 404 
 405       moveData:
 406         ASSERT(linelock->index <= linelock->maxcnt);
 407 
 408         lv = linelock->lv;
 409         for (i = 0; i < linelock->index; i++, lv++) {
 410                 if (lv->length == 0)
 411                         continue;
 412 
 413                 /* is page full ? */
 414                 if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
 415                         /* page become full: move on to next page */
 416                         lmNextPage(log);
 417 
 418                         bp = log->bp;
 419                         lp = (struct logpage *) bp->l_ldata;
 420                         dstoffset = LOGPHDRSIZE;
 421                 }
 422 
 423                 /*
 424                  * move log vector data
 425                  */
 426                 src = (u8 *) p + (lv->offset << l2linesize);
 427                 srclen = lv->length << l2linesize;
 428                 len += srclen;
 429                 while (srclen > 0) {
 430                         freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 431                         nbytes = min(freespace, srclen);
 432                         dst = (caddr_t) lp + dstoffset;
 433                         memcpy(dst, src, nbytes);
 434                         dstoffset += nbytes;
 435 
 436                         /* is page not full ? */
 437                         if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 438                                 break;
 439 
 440                         /* page become full: move on to next page */
 441                         lmNextPage(log);
 442 
 443                         bp = (struct lbuf *) log->bp;
 444                         lp = (struct logpage *) bp->l_ldata;
 445                         dstoffset = LOGPHDRSIZE;
 446 
 447                         srclen -= nbytes;
 448                         src += nbytes;
 449                 }
 450 
 451                 /*
 452                  * move log vector descriptor
 453                  */
 454                 len += 4;
 455                 lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
 456                 lvd->offset = cpu_to_le16(lv->offset);
 457                 lvd->length = cpu_to_le16(lv->length);
 458                 dstoffset += 4;
 459                 jfs_info("lmWriteRecord: lv offset:%d length:%d",
 460                          lv->offset, lv->length);
 461         }
 462 
 463         if ((i = linelock->next)) {
 464                 linelock = (struct linelock *) lid_to_tlock(i);
 465                 goto moveData;
 466         }
 467 
 468         /*
 469          *      move log record descriptor
 470          */
 471       moveLrd:
 472         lrd->length = cpu_to_le16(len);
 473 
 474         src = (caddr_t) lrd;
 475         srclen = LOGRDSIZE;
 476 
 477         while (srclen > 0) {
 478                 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 479                 nbytes = min(freespace, srclen);
 480                 dst = (caddr_t) lp + dstoffset;
 481                 memcpy(dst, src, nbytes);
 482 
 483                 dstoffset += nbytes;
 484                 srclen -= nbytes;
 485 
 486                 /* are there more to move than freespace of page ? */
 487                 if (srclen)
 488                         goto pageFull;
 489 
 490                 /*
 491                  * end of log record descriptor
 492                  */
 493 
 494                 /* update last log record eor */
 495                 log->eor = dstoffset;
 496                 bp->l_eor = dstoffset;
 497                 lsn = (log->page << L2LOGPSIZE) + dstoffset;
 498 
 499                 if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
 500                         tblk->clsn = lsn;
 501                         jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
 502                                  bp->l_eor);
 503 
 504                         INCREMENT(lmStat.commit);       /* # of commit */
 505 
 506                         /*
 507                          * enqueue tblock for group commit:
 508                          *
 509                          * enqueue tblock of non-trivial/synchronous COMMIT
 510                          * at tail of group commit queue
 511                          * (trivial/asynchronous COMMITs are ignored by
 512                          * group commit.)
 513                          */
 514                         LOGGC_LOCK(log);
 515 
 516                         /* init tblock gc state */
 517                         tblk->flag = tblkGC_QUEUE;
 518                         tblk->bp = log->bp;
 519                         tblk->pn = log->page;
 520                         tblk->eor = log->eor;
 521 
 522                         /* enqueue transaction to commit queue */
 523                         list_add_tail(&tblk->cqueue, &log->cqueue);
 524 
 525                         LOGGC_UNLOCK(log);
 526                 }
 527 
 528                 jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
 529                         le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
 530 
 531                 /* page not full ? */
 532                 if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 533                         return lsn;
 534 
 535               pageFull:
 536                 /* page become full: move on to next page */
 537                 lmNextPage(log);
 538 
 539                 bp = (struct lbuf *) log->bp;
 540                 lp = (struct logpage *) bp->l_ldata;
 541                 dstoffset = LOGPHDRSIZE;
 542                 src += nbytes;
 543         }
 544 
 545         return lsn;
 546 }
 547 
 548 
 549 /*
 550  * NAME:        lmNextPage()
 551  *
 552  * FUNCTION:    write current page and allocate next page.
 553  *
 554  * PARAMETER:   log
 555  *
 556  * RETURN:      0
 557  *
 558  * serialization: LOG_LOCK() held on entry/exit
 559  */
 560 static int lmNextPage(struct jfs_log * log)
 561 {
 562         struct logpage *lp;
 563         int lspn;               /* log sequence page number */
 564         int pn;                 /* current page number */
 565         struct lbuf *bp;
 566         struct lbuf *nextbp;
 567         struct tblock *tblk;
 568 
 569         /* get current log page number and log sequence page number */
 570         pn = log->page;
 571         bp = log->bp;
 572         lp = (struct logpage *) bp->l_ldata;
 573         lspn = le32_to_cpu(lp->h.page);
 574 
 575         LOGGC_LOCK(log);
 576 
 577         /*
 578          *      write or queue the full page at the tail of write queue
 579          */
 580         /* get the tail tblk on commit queue */
 581         if (list_empty(&log->cqueue))
 582                 tblk = NULL;
 583         else
 584                 tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
 585 
 586         /* every tblk who has COMMIT record on the current page,
 587          * and has not been committed, must be on commit queue
 588          * since tblk is queued at commit queueu at the time
 589          * of writing its COMMIT record on the page before
 590          * page becomes full (even though the tblk thread
 591          * who wrote COMMIT record may have been suspended
 592          * currently);
 593          */
 594 
 595         /* is page bound with outstanding tail tblk ? */
 596         if (tblk && tblk->pn == pn) {
 597                 /* mark tblk for end-of-page */
 598                 tblk->flag |= tblkGC_EOP;
 599 
 600                 if (log->cflag & logGC_PAGEOUT) {
 601                         /* if page is not already on write queue,
 602                          * just enqueue (no lbmWRITE to prevent redrive)
 603                          * buffer to wqueue to ensure correct serial order
 604                          * of the pages since log pages will be added
 605                          * continuously
 606                          */
 607                         if (bp->l_wqnext == NULL)
 608                                 lbmWrite(log, bp, 0, 0);
 609                 } else {
 610                         /*
 611                          * No current GC leader, initiate group commit
 612                          */
 613                         log->cflag |= logGC_PAGEOUT;
 614                         lmGCwrite(log, 0);
 615                 }
 616         }
 617         /* page is not bound with outstanding tblk:
 618          * init write or mark it to be redriven (lbmWRITE)
 619          */
 620         else {
 621                 /* finalize the page */
 622                 bp->l_ceor = bp->l_eor;
 623                 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 624                 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
 625         }
 626         LOGGC_UNLOCK(log);
 627 
 628         /*
 629          *      allocate/initialize next page
 630          */
 631         /* if log wraps, the first data page of log is 2
 632          * (0 never used, 1 is superblock).
 633          */
 634         log->page = (pn == log->size - 1) ? 2 : pn + 1;
 635         log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
 636 
 637         /* allocate/initialize next log page buffer */
 638         nextbp = lbmAllocate(log, log->page);
 639         nextbp->l_eor = log->eor;
 640         log->bp = nextbp;
 641 
 642         /* initialize next log page */
 643         lp = (struct logpage *) nextbp->l_ldata;
 644         lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
 645         lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
 646 
 647         return 0;
 648 }
 649 
 650 
 651 /*
 652  * NAME:        lmGroupCommit()
 653  *
 654  * FUNCTION:    group commit
 655  *      initiate pageout of the pages with COMMIT in the order of
 656  *      page number - redrive pageout of the page at the head of
 657  *      pageout queue until full page has been written.
 658  *
 659  * RETURN:
 660  *
 661  * NOTE:
 662  *      LOGGC_LOCK serializes log group commit queue, and
 663  *      transaction blocks on the commit queue.
 664  *      N.B. LOG_LOCK is NOT held during lmGroupCommit().
 665  */
 666 int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 667 {
 668         int rc = 0;
 669 
 670         LOGGC_LOCK(log);
 671 
 672         /* group committed already ? */
 673         if (tblk->flag & tblkGC_COMMITTED) {
 674                 if (tblk->flag & tblkGC_ERROR)
 675                         rc = -EIO;
 676 
 677                 LOGGC_UNLOCK(log);
 678                 return rc;
 679         }
 680         jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
 681 
 682         if (tblk->xflag & COMMIT_LAZY)
 683                 tblk->flag |= tblkGC_LAZY;
 684 
 685         if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
 686             (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
 687              || jfs_tlocks_low)) {
 688                 /*
 689                  * No pageout in progress
 690                  *
 691                  * start group commit as its group leader.
 692                  */
 693                 log->cflag |= logGC_PAGEOUT;
 694 
 695                 lmGCwrite(log, 0);
 696         }
 697 
 698         if (tblk->xflag & COMMIT_LAZY) {
 699                 /*
 700                  * Lazy transactions can leave now
 701                  */
 702                 LOGGC_UNLOCK(log);
 703                 return 0;
 704         }
 705 
 706         /* lmGCwrite gives up LOGGC_LOCK, check again */
 707 
 708         if (tblk->flag & tblkGC_COMMITTED) {
 709                 if (tblk->flag & tblkGC_ERROR)
 710                         rc = -EIO;
 711 
 712                 LOGGC_UNLOCK(log);
 713                 return rc;
 714         }
 715 
 716         /* upcount transaction waiting for completion
 717          */
 718         log->gcrtc++;
 719         tblk->flag |= tblkGC_READY;
 720 
 721         __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
 722                      LOGGC_LOCK(log), LOGGC_UNLOCK(log));
 723 
 724         /* removed from commit queue */
 725         if (tblk->flag & tblkGC_ERROR)
 726                 rc = -EIO;
 727 
 728         LOGGC_UNLOCK(log);
 729         return rc;
 730 }
 731 
 732 /*
 733  * NAME:        lmGCwrite()
 734  *
 735  * FUNCTION:    group commit write
 736  *      initiate write of log page, building a group of all transactions
 737  *      with commit records on that page.
 738  *
 739  * RETURN:      None
 740  *
 741  * NOTE:
 742  *      LOGGC_LOCK must be held by caller.
 743  *      N.B. LOG_LOCK is NOT held during lmGroupCommit().
 744  */
 745 static void lmGCwrite(struct jfs_log * log, int cant_write)
 746 {
 747         struct lbuf *bp;
 748         struct logpage *lp;
 749         int gcpn;               /* group commit page number */
 750         struct tblock *tblk;
 751         struct tblock *xtblk = NULL;
 752 
 753         /*
 754          * build the commit group of a log page
 755          *
 756          * scan commit queue and make a commit group of all
 757          * transactions with COMMIT records on the same log page.
 758          */
 759         /* get the head tblk on the commit queue */
 760         gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
 761 
 762         list_for_each_entry(tblk, &log->cqueue, cqueue) {
 763                 if (tblk->pn != gcpn)
 764                         break;
 765 
 766                 xtblk = tblk;
 767 
 768                 /* state transition: (QUEUE, READY) -> COMMIT */
 769                 tblk->flag |= tblkGC_COMMIT;
 770         }
 771         tblk = xtblk;           /* last tblk of the page */
 772 
 773         /*
 774          * pageout to commit transactions on the log page.
 775          */
 776         bp = (struct lbuf *) tblk->bp;
 777         lp = (struct logpage *) bp->l_ldata;
 778         /* is page already full ? */
 779         if (tblk->flag & tblkGC_EOP) {
 780                 /* mark page to free at end of group commit of the page */
 781                 tblk->flag &= ~tblkGC_EOP;
 782                 tblk->flag |= tblkGC_FREE;
 783                 bp->l_ceor = bp->l_eor;
 784                 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 785                 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
 786                          cant_write);
 787                 INCREMENT(lmStat.full_page);
 788         }
 789         /* page is not yet full */
 790         else {
 791                 bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
 792                 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 793                 lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
 794                 INCREMENT(lmStat.partial_page);
 795         }
 796 }
 797 
 798 /*
 799  * NAME:        lmPostGC()
 800  *
 801  * FUNCTION:    group commit post-processing
 802  *      Processes transactions after their commit records have been written
 803  *      to disk, redriving log I/O if necessary.
 804  *
 805  * RETURN:      None
 806  *
 807  * NOTE:
 808  *      This routine is called a interrupt time by lbmIODone
 809  */
 810 static void lmPostGC(struct lbuf * bp)
 811 {
 812         unsigned long flags;
 813         struct jfs_log *log = bp->l_log;
 814         struct logpage *lp;
 815         struct tblock *tblk, *temp;
 816 
 817         //LOGGC_LOCK(log);
 818         spin_lock_irqsave(&log->gclock, flags);
 819         /*
 820          * current pageout of group commit completed.
 821          *
 822          * remove/wakeup transactions from commit queue who were
 823          * group committed with the current log page
 824          */
 825         list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
 826                 if (!(tblk->flag & tblkGC_COMMIT))
 827                         break;
 828                 /* if transaction was marked GC_COMMIT then
 829                  * it has been shipped in the current pageout
 830                  * and made it to disk - it is committed.
 831                  */
 832 
 833                 if (bp->l_flag & lbmERROR)
 834                         tblk->flag |= tblkGC_ERROR;
 835 
 836                 /* remove it from the commit queue */
 837                 list_del(&tblk->cqueue);
 838                 tblk->flag &= ~tblkGC_QUEUE;
 839 
 840                 if (tblk == log->flush_tblk) {
 841                         /* we can stop flushing the log now */
 842                         clear_bit(log_FLUSH, &log->flag);
 843                         log->flush_tblk = NULL;
 844                 }
 845 
 846                 jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
 847                          tblk->flag);
 848 
 849                 if (!(tblk->xflag & COMMIT_FORCE))
 850                         /*
 851                          * Hand tblk over to lazy commit thread
 852                          */
 853                         txLazyUnlock(tblk);
 854                 else {
 855                         /* state transition: COMMIT -> COMMITTED */
 856                         tblk->flag |= tblkGC_COMMITTED;
 857 
 858                         if (tblk->flag & tblkGC_READY)
 859                                 log->gcrtc--;
 860 
 861                         LOGGC_WAKEUP(tblk);
 862                 }
 863 
 864                 /* was page full before pageout ?
 865                  * (and this is the last tblk bound with the page)
 866                  */
 867                 if (tblk->flag & tblkGC_FREE)
 868                         lbmFree(bp);
 869                 /* did page become full after pageout ?
 870                  * (and this is the last tblk bound with the page)
 871                  */
 872                 else if (tblk->flag & tblkGC_EOP) {
 873                         /* finalize the page */
 874                         lp = (struct logpage *) bp->l_ldata;
 875                         bp->l_ceor = bp->l_eor;
 876                         lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
 877                         jfs_info("lmPostGC: calling lbmWrite");
 878                         lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
 879                                  1);
 880                 }
 881 
 882         }
 883 
 884         /* are there any transactions who have entered lnGroupCommit()
 885          * (whose COMMITs are after that of the last log page written.
 886          * They are waiting for new group commit (above at (SLEEP 1))
 887          * or lazy transactions are on a full (queued) log page,
 888          * select the latest ready transaction as new group leader and
 889          * wake her up to lead her group.
 890          */
 891         if ((!list_empty(&log->cqueue)) &&
 892             ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
 893              test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
 894                 /*
 895                  * Call lmGCwrite with new group leader
 896                  */
 897                 lmGCwrite(log, 1);
 898 
 899         /* no transaction are ready yet (transactions are only just
 900          * queued (GC_QUEUE) and not entered for group commit yet).
 901          * the first transaction entering group commit
 902          * will elect herself as new group leader.
 903          */
 904         else
 905                 log->cflag &= ~logGC_PAGEOUT;
 906 
 907         //LOGGC_UNLOCK(log);
 908         spin_unlock_irqrestore(&log->gclock, flags);
 909         return;
 910 }
 911 
 912 /*
 913  * NAME:        lmLogSync()
 914  *
 915  * FUNCTION:    write log SYNCPT record for specified log
 916  *      if new sync address is available
 917  *      (normally the case if sync() is executed by back-ground
 918  *      process).
 919  *      calculate new value of i_nextsync which determines when
 920  *      this code is called again.
 921  *
 922  * PARAMETERS:  log     - log structure
 923  *              hard_sync - 1 to force all metadata to be written
 924  *
 925  * RETURN:      0
 926  *
 927  * serialization: LOG_LOCK() held on entry/exit
 928  */
 929 static int lmLogSync(struct jfs_log * log, int hard_sync)
 930 {
 931         int logsize;
 932         int written;            /* written since last syncpt */
 933         int free;               /* free space left available */
 934         int delta;              /* additional delta to write normally */
 935         int more;               /* additional write granted */
 936         struct lrd lrd;
 937         int lsn;
 938         struct logsyncblk *lp;
 939         unsigned long flags;
 940 
 941         /* push dirty metapages out to disk */
 942         if (hard_sync)
 943                 write_special_inodes(log, filemap_fdatawrite);
 944         else
 945                 write_special_inodes(log, filemap_flush);
 946 
 947         /*
 948          *      forward syncpt
 949          */
 950         /* if last sync is same as last syncpt,
 951          * invoke sync point forward processing to update sync.
 952          */
 953 
 954         if (log->sync == log->syncpt) {
 955                 LOGSYNC_LOCK(log, flags);
 956                 if (list_empty(&log->synclist))
 957                         log->sync = log->lsn;
 958                 else {
 959                         lp = list_entry(log->synclist.next,
 960                                         struct logsyncblk, synclist);
 961                         log->sync = lp->lsn;
 962                 }
 963                 LOGSYNC_UNLOCK(log, flags);
 964 
 965         }
 966 
 967         /* if sync is different from last syncpt,
 968          * write a SYNCPT record with syncpt = sync.
 969          * reset syncpt = sync
 970          */
 971         if (log->sync != log->syncpt) {
 972                 lrd.logtid = 0;
 973                 lrd.backchain = 0;
 974                 lrd.type = cpu_to_le16(LOG_SYNCPT);
 975                 lrd.length = 0;
 976                 lrd.log.syncpt.sync = cpu_to_le32(log->sync);
 977                 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
 978 
 979                 log->syncpt = log->sync;
 980         } else
 981                 lsn = log->lsn;
 982 
 983         /*
 984          *      setup next syncpt trigger (SWAG)
 985          */
 986         logsize = log->logsize;
 987 
 988         logdiff(written, lsn, log);
 989         free = logsize - written;
 990         delta = LOGSYNC_DELTA(logsize);
 991         more = min(free / 2, delta);
 992         if (more < 2 * LOGPSIZE) {
 993                 jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
 994                 /*
 995                  *      log wrapping
 996                  *
 997                  * option 1 - panic ? No.!
 998                  * option 2 - shutdown file systems
 999                  *            associated with log ?
1000                  * option 3 - extend log ?
1001                  * option 4 - second chance
1002                  *
1003                  * mark log wrapped, and continue.
1004                  * when all active transactions are completed,
1005                  * mark log valid for recovery.
1006                  * if crashed during invalid state, log state
1007                  * implies invalid log, forcing fsck().
1008                  */
1009                 /* mark log state log wrap in log superblock */
1010                 /* log->state = LOGWRAP; */
1011 
1012                 /* reset sync point computation */
1013                 log->syncpt = log->sync = lsn;
1014                 log->nextsync = delta;
1015         } else
1016                 /* next syncpt trigger = written + more */
1017                 log->nextsync = written + more;
1018 
1019         /* if number of bytes written from last sync point is more
1020          * than 1/4 of the log size, stop new transactions from
1021          * starting until all current transactions are completed
1022          * by setting syncbarrier flag.
1023          */
1024         if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1025             (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1026                 set_bit(log_SYNCBARRIER, &log->flag);
1027                 jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1028                          log->syncpt);
1029                 /*
1030                  * We may have to initiate group commit
1031                  */
1032                 jfs_flush_journal(log, 0);
1033         }
1034 
1035         return lsn;
1036 }
1037 
1038 /*
1039  * NAME:        jfs_syncpt
1040  *
1041  * FUNCTION:    write log SYNCPT record for specified log
1042  *
1043  * PARAMETERS:  log       - log structure
1044  *              hard_sync - set to 1 to force metadata to be written
1045  */
1046 void jfs_syncpt(struct jfs_log *log, int hard_sync)
1047 {       LOG_LOCK(log);
1048         if (!test_bit(log_QUIESCE, &log->flag))
1049                 lmLogSync(log, hard_sync);
1050         LOG_UNLOCK(log);
1051 }
1052 
1053 /*
1054  * NAME:        lmLogOpen()
1055  *
1056  * FUNCTION:    open the log on first open;
1057  *      insert filesystem in the active list of the log.
1058  *
1059  * PARAMETER:   ipmnt   - file system mount inode
1060  *              iplog   - log inode (out)
1061  *
1062  * RETURN:
1063  *
1064  * serialization:
1065  */
1066 int lmLogOpen(struct super_block *sb)
1067 {
1068         int rc;
1069         struct block_device *bdev;
1070         struct jfs_log *log;
1071         struct jfs_sb_info *sbi = JFS_SBI(sb);
1072 
1073         if (sbi->flag & JFS_NOINTEGRITY)
1074                 return open_dummy_log(sb);
1075 
1076         if (sbi->mntflag & JFS_INLINELOG)
1077                 return open_inline_log(sb);
1078 
1079         mutex_lock(&jfs_log_mutex);
1080         list_for_each_entry(log, &jfs_external_logs, journal_list) {
1081                 if (log->bdev->bd_dev == sbi->logdev) {
1082                         if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
1083                                 jfs_warn("wrong uuid on JFS journal");
1084                                 mutex_unlock(&jfs_log_mutex);
1085                                 return -EINVAL;
1086                         }
1087                         /*
1088                          * add file system to log active file system list
1089                          */
1090                         if ((rc = lmLogFileSystem(log, sbi, 1))) {
1091                                 mutex_unlock(&jfs_log_mutex);
1092                                 return rc;
1093                         }
1094                         goto journal_found;
1095                 }
1096         }
1097 
1098         if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1099                 mutex_unlock(&jfs_log_mutex);
1100                 return -ENOMEM;
1101         }
1102         INIT_LIST_HEAD(&log->sb_list);
1103         init_waitqueue_head(&log->syncwait);
1104 
1105         /*
1106          *      external log as separate logical volume
1107          *
1108          * file systems to log may have n-to-1 relationship;
1109          */
1110 
1111         bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1112                                  log);
1113         if (IS_ERR(bdev)) {
1114                 rc = PTR_ERR(bdev);
1115                 goto free;
1116         }
1117 
1118         log->bdev = bdev;
1119         uuid_copy(&log->uuid, &sbi->loguuid);
1120 
1121         /*
1122          * initialize log:
1123          */
1124         if ((rc = lmLogInit(log)))
1125                 goto close;
1126 
1127         list_add(&log->journal_list, &jfs_external_logs);
1128 
1129         /*
1130          * add file system to log active file system list
1131          */
1132         if ((rc = lmLogFileSystem(log, sbi, 1)))
1133                 goto shutdown;
1134 
1135 journal_found:
1136         LOG_LOCK(log);
1137         list_add(&sbi->log_list, &log->sb_list);
1138         sbi->log = log;
1139         LOG_UNLOCK(log);
1140 
1141         mutex_unlock(&jfs_log_mutex);
1142         return 0;
1143 
1144         /*
1145          *      unwind on error
1146          */
1147       shutdown:         /* unwind lbmLogInit() */
1148         list_del(&log->journal_list);
1149         lbmLogShutdown(log);
1150 
1151       close:            /* close external log device */
1152         blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1153 
1154       free:             /* free log descriptor */
1155         mutex_unlock(&jfs_log_mutex);
1156         kfree(log);
1157 
1158         jfs_warn("lmLogOpen: exit(%d)", rc);
1159         return rc;
1160 }
1161 
1162 static int open_inline_log(struct super_block *sb)
1163 {
1164         struct jfs_log *log;
1165         int rc;
1166 
1167         if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1168                 return -ENOMEM;
1169         INIT_LIST_HEAD(&log->sb_list);
1170         init_waitqueue_head(&log->syncwait);
1171 
1172         set_bit(log_INLINELOG, &log->flag);
1173         log->bdev = sb->s_bdev;
1174         log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1175         log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1176             (L2LOGPSIZE - sb->s_blocksize_bits);
1177         log->l2bsize = sb->s_blocksize_bits;
1178         ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1179 
1180         /*
1181          * initialize log.
1182          */
1183         if ((rc = lmLogInit(log))) {
1184                 kfree(log);
1185                 jfs_warn("lmLogOpen: exit(%d)", rc);
1186                 return rc;
1187         }
1188 
1189         list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1190         JFS_SBI(sb)->log = log;
1191 
1192         return rc;
1193 }
1194 
1195 static int open_dummy_log(struct super_block *sb)
1196 {
1197         int rc;
1198 
1199         mutex_lock(&jfs_log_mutex);
1200         if (!dummy_log) {
1201                 dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1202                 if (!dummy_log) {
1203                         mutex_unlock(&jfs_log_mutex);
1204                         return -ENOMEM;
1205                 }
1206                 INIT_LIST_HEAD(&dummy_log->sb_list);
1207                 init_waitqueue_head(&dummy_log->syncwait);
1208                 dummy_log->no_integrity = 1;
1209                 /* Make up some stuff */
1210                 dummy_log->base = 0;
1211                 dummy_log->size = 1024;
1212                 rc = lmLogInit(dummy_log);
1213                 if (rc) {
1214                         kfree(dummy_log);
1215                         dummy_log = NULL;
1216                         mutex_unlock(&jfs_log_mutex);
1217                         return rc;
1218                 }
1219         }
1220 
1221         LOG_LOCK(dummy_log);
1222         list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1223         JFS_SBI(sb)->log = dummy_log;
1224         LOG_UNLOCK(dummy_log);
1225         mutex_unlock(&jfs_log_mutex);
1226 
1227         return 0;
1228 }
1229 
1230 /*
1231  * NAME:        lmLogInit()
1232  *
1233  * FUNCTION:    log initialization at first log open.
1234  *
1235  *      logredo() (or logformat()) should have been run previously.
1236  *      initialize the log from log superblock.
1237  *      set the log state in the superblock to LOGMOUNT and
1238  *      write SYNCPT log record.
1239  *
1240  * PARAMETER:   log     - log structure
1241  *
1242  * RETURN:      0       - if ok
1243  *              -EINVAL - bad log magic number or superblock dirty
1244  *              error returned from logwait()
1245  *
1246  * serialization: single first open thread
1247  */
1248 int lmLogInit(struct jfs_log * log)
1249 {
1250         int rc = 0;
1251         struct lrd lrd;
1252         struct logsuper *logsuper;
1253         struct lbuf *bpsuper;
1254         struct lbuf *bp;
1255         struct logpage *lp;
1256         int lsn = 0;
1257 
1258         jfs_info("lmLogInit: log:0x%p", log);
1259 
1260         /* initialize the group commit serialization lock */
1261         LOGGC_LOCK_INIT(log);
1262 
1263         /* allocate/initialize the log write serialization lock */
1264         LOG_LOCK_INIT(log);
1265 
1266         LOGSYNC_LOCK_INIT(log);
1267 
1268         INIT_LIST_HEAD(&log->synclist);
1269 
1270         INIT_LIST_HEAD(&log->cqueue);
1271         log->flush_tblk = NULL;
1272 
1273         log->count = 0;
1274 
1275         /*
1276          * initialize log i/o
1277          */
1278         if ((rc = lbmLogInit(log)))
1279                 return rc;
1280 
1281         if (!test_bit(log_INLINELOG, &log->flag))
1282                 log->l2bsize = L2LOGPSIZE;
1283 
1284         /* check for disabled journaling to disk */
1285         if (log->no_integrity) {
1286                 /*
1287                  * Journal pages will still be filled.  When the time comes
1288                  * to actually do the I/O, the write is not done, and the
1289                  * endio routine is called directly.
1290                  */
1291                 bp = lbmAllocate(log , 0);
1292                 log->bp = bp;
1293                 bp->l_pn = bp->l_eor = 0;
1294         } else {
1295                 /*
1296                  * validate log superblock
1297                  */
1298                 if ((rc = lbmRead(log, 1, &bpsuper)))
1299                         goto errout10;
1300 
1301                 logsuper = (struct logsuper *) bpsuper->l_ldata;
1302 
1303                 if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1304                         jfs_warn("*** Log Format Error ! ***");
1305                         rc = -EINVAL;
1306                         goto errout20;
1307                 }
1308 
1309                 /* logredo() should have been run successfully. */
1310                 if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1311                         jfs_warn("*** Log Is Dirty ! ***");
1312                         rc = -EINVAL;
1313                         goto errout20;
1314                 }
1315 
1316                 /* initialize log from log superblock */
1317                 if (test_bit(log_INLINELOG,&log->flag)) {
1318                         if (log->size != le32_to_cpu(logsuper->size)) {
1319                                 rc = -EINVAL;
1320                                 goto errout20;
1321                         }
1322                         jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1323                                  log, (unsigned long long)log->base, log->size);
1324                 } else {
1325                         if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
1326                                 jfs_warn("wrong uuid on JFS log device");
1327                                 goto errout20;
1328                         }
1329                         log->size = le32_to_cpu(logsuper->size);
1330                         log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1331                         jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1332                                  log, (unsigned long long)log->base, log->size);
1333                 }
1334 
1335                 log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1336                 log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1337 
1338                 /*
1339                  * initialize for log append write mode
1340                  */
1341                 /* establish current/end-of-log page/buffer */
1342                 if ((rc = lbmRead(log, log->page, &bp)))
1343                         goto errout20;
1344 
1345                 lp = (struct logpage *) bp->l_ldata;
1346 
1347                 jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1348                          le32_to_cpu(logsuper->end), log->page, log->eor,
1349                          le16_to_cpu(lp->h.eor));
1350 
1351                 log->bp = bp;
1352                 bp->l_pn = log->page;
1353                 bp->l_eor = log->eor;
1354 
1355                 /* if current page is full, move on to next page */
1356                 if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1357                         lmNextPage(log);
1358 
1359                 /*
1360                  * initialize log syncpoint
1361                  */
1362                 /*
1363                  * write the first SYNCPT record with syncpoint = 0
1364                  * (i.e., log redo up to HERE !);
1365                  * remove current page from lbm write queue at end of pageout
1366                  * (to write log superblock update), but do not release to
1367                  * freelist;
1368                  */
1369                 lrd.logtid = 0;
1370                 lrd.backchain = 0;
1371                 lrd.type = cpu_to_le16(LOG_SYNCPT);
1372                 lrd.length = 0;
1373                 lrd.log.syncpt.sync = 0;
1374                 lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1375                 bp = log->bp;
1376                 bp->l_ceor = bp->l_eor;
1377                 lp = (struct logpage *) bp->l_ldata;
1378                 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1379                 lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1380                 if ((rc = lbmIOWait(bp, 0)))
1381                         goto errout30;
1382 
1383                 /*
1384                  * update/write superblock
1385                  */
1386                 logsuper->state = cpu_to_le32(LOGMOUNT);
1387                 log->serial = le32_to_cpu(logsuper->serial) + 1;
1388                 logsuper->serial = cpu_to_le32(log->serial);
1389                 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1390                 if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1391                         goto errout30;
1392         }
1393 
1394         /* initialize logsync parameters */
1395         log->logsize = (log->size - 2) << L2LOGPSIZE;
1396         log->lsn = lsn;
1397         log->syncpt = lsn;
1398         log->sync = log->syncpt;
1399         log->nextsync = LOGSYNC_DELTA(log->logsize);
1400 
1401         jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1402                  log->lsn, log->syncpt, log->sync);
1403 
1404         /*
1405          * initialize for lazy/group commit
1406          */
1407         log->clsn = lsn;
1408 
1409         return 0;
1410 
1411         /*
1412          *      unwind on error
1413          */
1414       errout30:         /* release log page */
1415         log->wqueue = NULL;
1416         bp->l_wqnext = NULL;
1417         lbmFree(bp);
1418 
1419       errout20:         /* release log superblock */
1420         lbmFree(bpsuper);
1421 
1422       errout10:         /* unwind lbmLogInit() */
1423         lbmLogShutdown(log);
1424 
1425         jfs_warn("lmLogInit: exit(%d)", rc);
1426         return rc;
1427 }
1428 
1429 
1430 /*
1431  * NAME:        lmLogClose()
1432  *
1433  * FUNCTION:    remove file system <ipmnt> from active list of log <iplog>
1434  *              and close it on last close.
1435  *
1436  * PARAMETER:   sb      - superblock
1437  *
1438  * RETURN:      errors from subroutines
1439  *
1440  * serialization:
1441  */
1442 int lmLogClose(struct super_block *sb)
1443 {
1444         struct jfs_sb_info *sbi = JFS_SBI(sb);
1445         struct jfs_log *log = sbi->log;
1446         struct block_device *bdev;
1447         int rc = 0;
1448 
1449         jfs_info("lmLogClose: log:0x%p", log);
1450 
1451         mutex_lock(&jfs_log_mutex);
1452         LOG_LOCK(log);
1453         list_del(&sbi->log_list);
1454         LOG_UNLOCK(log);
1455         sbi->log = NULL;
1456 
1457         /*
1458          * We need to make sure all of the "written" metapages
1459          * actually make it to disk
1460          */
1461         sync_blockdev(sb->s_bdev);
1462 
1463         if (test_bit(log_INLINELOG, &log->flag)) {
1464                 /*
1465                  *      in-line log in host file system
1466                  */
1467                 rc = lmLogShutdown(log);
1468                 kfree(log);
1469                 goto out;
1470         }
1471 
1472         if (!log->no_integrity)
1473                 lmLogFileSystem(log, sbi, 0);
1474 
1475         if (!list_empty(&log->sb_list))
1476                 goto out;
1477 
1478         /*
1479          * TODO: ensure that the dummy_log is in a state to allow
1480          * lbmLogShutdown to deallocate all the buffers and call
1481          * kfree against dummy_log.  For now, leave dummy_log & its
1482          * buffers in memory, and resuse if another no-integrity mount
1483          * is requested.
1484          */
1485         if (log->no_integrity)
1486                 goto out;
1487 
1488         /*
1489          *      external log as separate logical volume
1490          */
1491         list_del(&log->journal_list);
1492         bdev = log->bdev;
1493         rc = lmLogShutdown(log);
1494 
1495         blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1496 
1497         kfree(log);
1498 
1499       out:
1500         mutex_unlock(&jfs_log_mutex);
1501         jfs_info("lmLogClose: exit(%d)", rc);
1502         return rc;
1503 }
1504 
1505 
1506 /*
1507  * NAME:        jfs_flush_journal()
1508  *
1509  * FUNCTION:    initiate write of any outstanding transactions to the journal
1510  *              and optionally wait until they are all written to disk
1511  *
1512  *              wait == 0  flush until latest txn is committed, don't wait
1513  *              wait == 1  flush until latest txn is committed, wait
1514  *              wait > 1   flush until all txn's are complete, wait
1515  */
1516 void jfs_flush_journal(struct jfs_log *log, int wait)
1517 {
1518         int i;
1519         struct tblock *target = NULL;
1520 
1521         /* jfs_write_inode may call us during read-only mount */
1522         if (!log)
1523                 return;
1524 
1525         jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1526 
1527         LOGGC_LOCK(log);
1528 
1529         if (!list_empty(&log->cqueue)) {
1530                 /*
1531                  * This ensures that we will keep writing to the journal as long
1532                  * as there are unwritten commit records
1533                  */
1534                 target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1535 
1536                 if (test_bit(log_FLUSH, &log->flag)) {
1537                         /*
1538                          * We're already flushing.
1539                          * if flush_tblk is NULL, we are flushing everything,
1540                          * so leave it that way.  Otherwise, update it to the
1541                          * latest transaction
1542                          */
1543                         if (log->flush_tblk)
1544                                 log->flush_tblk = target;
1545                 } else {
1546                         /* Only flush until latest transaction is committed */
1547                         log->flush_tblk = target;
1548                         set_bit(log_FLUSH, &log->flag);
1549 
1550                         /*
1551                          * Initiate I/O on outstanding transactions
1552                          */
1553                         if (!(log->cflag & logGC_PAGEOUT)) {
1554                                 log->cflag |= logGC_PAGEOUT;
1555                                 lmGCwrite(log, 0);
1556                         }
1557                 }
1558         }
1559         if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1560                 /* Flush until all activity complete */
1561                 set_bit(log_FLUSH, &log->flag);
1562                 log->flush_tblk = NULL;
1563         }
1564 
1565         if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1566                 DECLARE_WAITQUEUE(__wait, current);
1567 
1568                 add_wait_queue(&target->gcwait, &__wait);
1569                 set_current_state(TASK_UNINTERRUPTIBLE);
1570                 LOGGC_UNLOCK(log);
1571                 schedule();
1572                 LOGGC_LOCK(log);
1573                 remove_wait_queue(&target->gcwait, &__wait);
1574         }
1575         LOGGC_UNLOCK(log);
1576 
1577         if (wait < 2)
1578                 return;
1579 
1580         write_special_inodes(log, filemap_fdatawrite);
1581 
1582         /*
1583          * If there was recent activity, we may need to wait
1584          * for the lazycommit thread to catch up
1585          */
1586         if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1587                 for (i = 0; i < 200; i++) {     /* Too much? */
1588                         msleep(250);
1589                         write_special_inodes(log, filemap_fdatawrite);
1590                         if (list_empty(&log->cqueue) &&
1591                             list_empty(&log->synclist))
1592                                 break;
1593                 }
1594         }
1595         assert(list_empty(&log->cqueue));
1596 
1597 #ifdef CONFIG_JFS_DEBUG
1598         if (!list_empty(&log->synclist)) {
1599                 struct logsyncblk *lp;
1600 
1601                 printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1602                 list_for_each_entry(lp, &log->synclist, synclist) {
1603                         if (lp->xflag & COMMIT_PAGE) {
1604                                 struct metapage *mp = (struct metapage *)lp;
1605                                 print_hex_dump(KERN_ERR, "metapage: ",
1606                                                DUMP_PREFIX_ADDRESS, 16, 4,
1607                                                mp, sizeof(struct metapage), 0);
1608                                 print_hex_dump(KERN_ERR, "page: ",
1609                                                DUMP_PREFIX_ADDRESS, 16,
1610                                                sizeof(long), mp->page,
1611                                                sizeof(struct page), 0);
1612                         } else
1613                                 print_hex_dump(KERN_ERR, "tblock:",
1614                                                DUMP_PREFIX_ADDRESS, 16, 4,
1615                                                lp, sizeof(struct tblock), 0);
1616                 }
1617         }
1618 #else
1619         WARN_ON(!list_empty(&log->synclist));
1620 #endif
1621         clear_bit(log_FLUSH, &log->flag);
1622 }
1623 
1624 /*
1625  * NAME:        lmLogShutdown()
1626  *
1627  * FUNCTION:    log shutdown at last LogClose().
1628  *
1629  *              write log syncpt record.
1630  *              update super block to set redone flag to 0.
1631  *
1632  * PARAMETER:   log     - log inode
1633  *
1634  * RETURN:      0       - success
1635  *
1636  * serialization: single last close thread
1637  */
1638 int lmLogShutdown(struct jfs_log * log)
1639 {
1640         int rc;
1641         struct lrd lrd;
1642         int lsn;
1643         struct logsuper *logsuper;
1644         struct lbuf *bpsuper;
1645         struct lbuf *bp;
1646         struct logpage *lp;
1647 
1648         jfs_info("lmLogShutdown: log:0x%p", log);
1649 
1650         jfs_flush_journal(log, 2);
1651 
1652         /*
1653          * write the last SYNCPT record with syncpoint = 0
1654          * (i.e., log redo up to HERE !)
1655          */
1656         lrd.logtid = 0;
1657         lrd.backchain = 0;
1658         lrd.type = cpu_to_le16(LOG_SYNCPT);
1659         lrd.length = 0;
1660         lrd.log.syncpt.sync = 0;
1661 
1662         lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1663         bp = log->bp;
1664         lp = (struct logpage *) bp->l_ldata;
1665         lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1666         lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1667         lbmIOWait(log->bp, lbmFREE);
1668         log->bp = NULL;
1669 
1670         /*
1671          * synchronous update log superblock
1672          * mark log state as shutdown cleanly
1673          * (i.e., Log does not need to be replayed).
1674          */
1675         if ((rc = lbmRead(log, 1, &bpsuper)))
1676                 goto out;
1677 
1678         logsuper = (struct logsuper *) bpsuper->l_ldata;
1679         logsuper->state = cpu_to_le32(LOGREDONE);
1680         logsuper->end = cpu_to_le32(lsn);
1681         lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1682         rc = lbmIOWait(bpsuper, lbmFREE);
1683 
1684         jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1685                  lsn, log->page, log->eor);
1686 
1687       out:
1688         /*
1689          * shutdown per log i/o
1690          */
1691         lbmLogShutdown(log);
1692 
1693         if (rc) {
1694                 jfs_warn("lmLogShutdown: exit(%d)", rc);
1695         }
1696         return rc;
1697 }
1698 
1699 
1700 /*
1701  * NAME:        lmLogFileSystem()
1702  *
1703  * FUNCTION:    insert (<activate> = true)/remove (<activate> = false)
1704  *      file system into/from log active file system list.
1705  *
1706  * PARAMETE:    log     - pointer to logs inode.
1707  *              fsdev   - kdev_t of filesystem.
1708  *              serial  - pointer to returned log serial number
1709  *              activate - insert/remove device from active list.
1710  *
1711  * RETURN:      0       - success
1712  *              errors returned by vms_iowait().
1713  */
1714 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1715                            int activate)
1716 {
1717         int rc = 0;
1718         int i;
1719         struct logsuper *logsuper;
1720         struct lbuf *bpsuper;
1721         uuid_t *uuid = &sbi->uuid;
1722 
1723         /*
1724          * insert/remove file system device to log active file system list.
1725          */
1726         if ((rc = lbmRead(log, 1, &bpsuper)))
1727                 return rc;
1728 
1729         logsuper = (struct logsuper *) bpsuper->l_ldata;
1730         if (activate) {
1731                 for (i = 0; i < MAX_ACTIVE; i++)
1732                         if (uuid_is_null(&logsuper->active[i].uuid)) {
1733                                 uuid_copy(&logsuper->active[i].uuid, uuid);
1734                                 sbi->aggregate = i;
1735                                 break;
1736                         }
1737                 if (i == MAX_ACTIVE) {
1738                         jfs_warn("Too many file systems sharing journal!");
1739                         lbmFree(bpsuper);
1740                         return -EMFILE; /* Is there a better rc? */
1741                 }
1742         } else {
1743                 for (i = 0; i < MAX_ACTIVE; i++)
1744                         if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
1745                                 uuid_copy(&logsuper->active[i].uuid,
1746                                           &uuid_null);
1747                                 break;
1748                         }
1749                 if (i == MAX_ACTIVE) {
1750                         jfs_warn("Somebody stomped on the journal!");
1751                         lbmFree(bpsuper);
1752                         return -EIO;
1753                 }
1754 
1755         }
1756 
1757         /*
1758          * synchronous write log superblock:
1759          *
1760          * write sidestream bypassing write queue:
1761          * at file system mount, log super block is updated for
1762          * activation of the file system before any log record
1763          * (MOUNT record) of the file system, and at file system
1764          * unmount, all meta data for the file system has been
1765          * flushed before log super block is updated for deactivation
1766          * of the file system.
1767          */
1768         lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1769         rc = lbmIOWait(bpsuper, lbmFREE);
1770 
1771         return rc;
1772 }
1773 
1774 /*
1775  *              log buffer manager (lbm)
1776  *              ------------------------
1777  *
1778  * special purpose buffer manager supporting log i/o requirements.
1779  *
1780  * per log write queue:
1781  * log pageout occurs in serial order by fifo write queue and
1782  * restricting to a single i/o in pregress at any one time.
1783  * a circular singly-linked list
1784  * (log->wrqueue points to the tail, and buffers are linked via
1785  * bp->wrqueue field), and
1786  * maintains log page in pageout ot waiting for pageout in serial pageout.
1787  */
1788 
1789 /*
1790  *      lbmLogInit()
1791  *
1792  * initialize per log I/O setup at lmLogInit()
1793  */
1794 static int lbmLogInit(struct jfs_log * log)
1795 {                               /* log inode */
1796         int i;
1797         struct lbuf *lbuf;
1798 
1799         jfs_info("lbmLogInit: log:0x%p", log);
1800 
1801         /* initialize current buffer cursor */
1802         log->bp = NULL;
1803 
1804         /* initialize log device write queue */
1805         log->wqueue = NULL;
1806 
1807         /*
1808          * Each log has its own buffer pages allocated to it.  These are
1809          * not managed by the page cache.  This ensures that a transaction
1810          * writing to the log does not block trying to allocate a page from
1811          * the page cache (for the log).  This would be bad, since page
1812          * allocation waits on the kswapd thread that may be committing inodes
1813          * which would cause log activity.  Was that clear?  I'm trying to
1814          * avoid deadlock here.
1815          */
1816         init_waitqueue_head(&log->free_wait);
1817 
1818         log->lbuf_free = NULL;
1819 
1820         for (i = 0; i < LOGPAGES;) {
1821                 char *buffer;
1822                 uint offset;
1823                 struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1824 
1825                 if (!page)
1826                         goto error;
1827                 buffer = page_address(page);
1828                 for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1829                         lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1830                         if (lbuf == NULL) {
1831                                 if (offset == 0)
1832                                         __free_page(page);
1833                                 goto error;
1834                         }
1835                         if (offset) /* we already have one reference */
1836                                 get_page(page);
1837                         lbuf->l_offset = offset;
1838                         lbuf->l_ldata = buffer + offset;
1839                         lbuf->l_page = page;
1840                         lbuf->l_log = log;
1841                         init_waitqueue_head(&lbuf->l_ioevent);
1842 
1843                         lbuf->l_freelist = log->lbuf_free;
1844                         log->lbuf_free = lbuf;
1845                         i++;
1846                 }
1847         }
1848 
1849         return (0);
1850 
1851       error:
1852         lbmLogShutdown(log);
1853         return -ENOMEM;
1854 }
1855 
1856 
1857 /*
1858  *      lbmLogShutdown()
1859  *
1860  * finalize per log I/O setup at lmLogShutdown()
1861  */
1862 static void lbmLogShutdown(struct jfs_log * log)
1863 {
1864         struct lbuf *lbuf;
1865 
1866         jfs_info("lbmLogShutdown: log:0x%p", log);
1867 
1868         lbuf = log->lbuf_free;
1869         while (lbuf) {
1870                 struct lbuf *next = lbuf->l_freelist;
1871                 __free_page(lbuf->l_page);
1872                 kfree(lbuf);
1873                 lbuf = next;
1874         }
1875 }
1876 
1877 
1878 /*
1879  *      lbmAllocate()
1880  *
1881  * allocate an empty log buffer
1882  */
1883 static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1884 {
1885         struct lbuf *bp;
1886         unsigned long flags;
1887 
1888         /*
1889          * recycle from log buffer freelist if any
1890          */
1891         LCACHE_LOCK(flags);
1892         LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1893         log->lbuf_free = bp->l_freelist;
1894         LCACHE_UNLOCK(flags);
1895 
1896         bp->l_flag = 0;
1897 
1898         bp->l_wqnext = NULL;
1899         bp->l_freelist = NULL;
1900 
1901         bp->l_pn = pn;
1902         bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1903         bp->l_ceor = 0;
1904 
1905         return bp;
1906 }
1907 
1908 
1909 /*
1910  *      lbmFree()
1911  *
1912  * release a log buffer to freelist
1913  */
1914 static void lbmFree(struct lbuf * bp)
1915 {
1916         unsigned long flags;
1917 
1918         LCACHE_LOCK(flags);
1919 
1920         lbmfree(bp);
1921 
1922         LCACHE_UNLOCK(flags);
1923 }
1924 
1925 static void lbmfree(struct lbuf * bp)
1926 {
1927         struct jfs_log *log = bp->l_log;
1928 
1929         assert(bp->l_wqnext == NULL);
1930 
1931         /*
1932          * return the buffer to head of freelist
1933          */
1934         bp->l_freelist = log->lbuf_free;
1935         log->lbuf_free = bp;
1936 
1937         wake_up(&log->free_wait);
1938         return;
1939 }
1940 
1941 
1942 /*
1943  * NAME:        lbmRedrive
1944  *
1945  * FUNCTION:    add a log buffer to the log redrive list
1946  *
1947  * PARAMETER:
1948  *      bp      - log buffer
1949  *
1950  * NOTES:
1951  *      Takes log_redrive_lock.
1952  */
1953 static inline void lbmRedrive(struct lbuf *bp)
1954 {
1955         unsigned long flags;
1956 
1957         spin_lock_irqsave(&log_redrive_lock, flags);
1958         bp->l_redrive_next = log_redrive_list;
1959         log_redrive_list = bp;
1960         spin_unlock_irqrestore(&log_redrive_lock, flags);
1961 
1962         wake_up_process(jfsIOthread);
1963 }
1964 
1965 
1966 /*
1967  *      lbmRead()
1968  */
1969 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1970 {
1971         struct bio *bio;
1972         struct lbuf *bp;
1973 
1974         /*
1975          * allocate a log buffer
1976          */
1977         *bpp = bp = lbmAllocate(log, pn);
1978         jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1979 
1980         bp->l_flag |= lbmREAD;
1981 
1982         bio = bio_alloc(GFP_NOFS, 1);
1983 
1984         bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
1985         bio_set_dev(bio, log->bdev);
1986 
1987         bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
1988         BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
1989 
1990         bio->bi_end_io = lbmIODone;
1991         bio->bi_private = bp;
1992         bio->bi_opf = REQ_OP_READ;
1993         /*check if journaling to disk has been disabled*/
1994         if (log->no_integrity) {
1995                 bio->bi_iter.bi_size = 0;
1996                 lbmIODone(bio);
1997         } else {
1998                 submit_bio(bio);
1999         }
2000 
2001         wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2002 
2003         return 0;
2004 }
2005 
2006 
2007 /*
2008  *      lbmWrite()
2009  *
2010  * buffer at head of pageout queue stays after completion of
2011  * partial-page pageout and redriven by explicit initiation of
2012  * pageout by caller until full-page pageout is completed and
2013  * released.
2014  *
2015  * device driver i/o done redrives pageout of new buffer at
2016  * head of pageout queue when current buffer at head of pageout
2017  * queue is released at the completion of its full-page pageout.
2018  *
2019  * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2020  * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2021  */
2022 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2023                      int cant_block)
2024 {
2025         struct lbuf *tail;
2026         unsigned long flags;
2027 
2028         jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2029 
2030         /* map the logical block address to physical block address */
2031         bp->l_blkno =
2032             log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2033 
2034         LCACHE_LOCK(flags);             /* disable+lock */
2035 
2036         /*
2037          * initialize buffer for device driver
2038          */
2039         bp->l_flag = flag;
2040 
2041         /*
2042          *      insert bp at tail of write queue associated with log
2043          *
2044          * (request is either for bp already/currently at head of queue
2045          * or new bp to be inserted at tail)
2046          */
2047         tail = log->wqueue;
2048 
2049         /* is buffer not already on write queue ? */
2050         if (bp->l_wqnext == NULL) {
2051                 /* insert at tail of wqueue */
2052                 if (tail == NULL) {
2053                         log->wqueue = bp;
2054                         bp->l_wqnext = bp;
2055                 } else {
2056                         log->wqueue = bp;
2057                         bp->l_wqnext = tail->l_wqnext;
2058                         tail->l_wqnext = bp;
2059                 }
2060 
2061                 tail = bp;
2062         }
2063 
2064         /* is buffer at head of wqueue and for write ? */
2065         if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2066                 LCACHE_UNLOCK(flags);   /* unlock+enable */
2067                 return;
2068         }
2069 
2070         LCACHE_UNLOCK(flags);   /* unlock+enable */
2071 
2072         if (cant_block)
2073                 lbmRedrive(bp);
2074         else if (flag & lbmSYNC)
2075                 lbmStartIO(bp);
2076         else {
2077                 LOGGC_UNLOCK(log);
2078                 lbmStartIO(bp);
2079                 LOGGC_LOCK(log);
2080         }
2081 }
2082 
2083 
2084 /*
2085  *      lbmDirectWrite()
2086  *
2087  * initiate pageout bypassing write queue for sidestream
2088  * (e.g., log superblock) write;
2089  */
2090 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2091 {
2092         jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2093                  bp, flag, bp->l_pn);
2094 
2095         /*
2096          * initialize buffer for device driver
2097          */
2098         bp->l_flag = flag | lbmDIRECT;
2099 
2100         /* map the logical block address to physical block address */
2101         bp->l_blkno =
2102             log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2103 
2104         /*
2105          *      initiate pageout of the page
2106          */
2107         lbmStartIO(bp);
2108 }
2109 
2110 
2111 /*
2112  * NAME:        lbmStartIO()
2113  *
2114  * FUNCTION:    Interface to DD strategy routine
2115  *
2116  * RETURN:      none
2117  *
2118  * serialization: LCACHE_LOCK() is NOT held during log i/o;
2119  */
2120 static void lbmStartIO(struct lbuf * bp)
2121 {
2122         struct bio *bio;
2123         struct jfs_log *log = bp->l_log;
2124 
2125         jfs_info("lbmStartIO");
2126 
2127         bio = bio_alloc(GFP_NOFS, 1);
2128         bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2129         bio_set_dev(bio, log->bdev);
2130 
2131         bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2132         BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2133 
2134         bio->bi_end_io = lbmIODone;
2135         bio->bi_private = bp;
2136         bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2137 
2138         /* check if journaling to disk has been disabled */
2139         if (log->no_integrity) {
2140                 bio->bi_iter.bi_size = 0;
2141                 lbmIODone(bio);
2142         } else {
2143                 submit_bio(bio);
2144                 INCREMENT(lmStat.submitted);
2145         }
2146 }
2147 
2148 
2149 /*
2150  *      lbmIOWait()
2151  */
2152 static int lbmIOWait(struct lbuf * bp, int flag)
2153 {
2154         unsigned long flags;
2155         int rc = 0;
2156 
2157         jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2158 
2159         LCACHE_LOCK(flags);             /* disable+lock */
2160 
2161         LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2162 
2163         rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2164 
2165         if (flag & lbmFREE)
2166                 lbmfree(bp);
2167 
2168         LCACHE_UNLOCK(flags);   /* unlock+enable */
2169 
2170         jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2171         return rc;
2172 }
2173 
2174 /*
2175  *      lbmIODone()
2176  *
2177  * executed at INTIODONE level
2178  */
2179 static void lbmIODone(struct bio *bio)
2180 {
2181         struct lbuf *bp = bio->bi_private;
2182         struct lbuf *nextbp, *tail;
2183         struct jfs_log *log;
2184         unsigned long flags;
2185 
2186         /*
2187          * get back jfs buffer bound to the i/o buffer
2188          */
2189         jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2190 
2191         LCACHE_LOCK(flags);             /* disable+lock */
2192 
2193         bp->l_flag |= lbmDONE;
2194 
2195         if (bio->bi_status) {
2196                 bp->l_flag |= lbmERROR;
2197 
2198                 jfs_err("lbmIODone: I/O error in JFS log");
2199         }
2200 
2201         bio_put(bio);
2202 
2203         /*
2204          *      pagein completion
2205          */
2206         if (bp->l_flag & lbmREAD) {
2207                 bp->l_flag &= ~lbmREAD;
2208 
2209                 LCACHE_UNLOCK(flags);   /* unlock+enable */
2210 
2211                 /* wakeup I/O initiator */
2212                 LCACHE_WAKEUP(&bp->l_ioevent);
2213 
2214                 return;
2215         }
2216 
2217         /*
2218          *      pageout completion
2219          *
2220          * the bp at the head of write queue has completed pageout.
2221          *
2222          * if single-commit/full-page pageout, remove the current buffer
2223          * from head of pageout queue, and redrive pageout with
2224          * the new buffer at head of pageout queue;
2225          * otherwise, the partial-page pageout buffer stays at
2226          * the head of pageout queue to be redriven for pageout
2227          * by lmGroupCommit() until full-page pageout is completed.
2228          */
2229         bp->l_flag &= ~lbmWRITE;
2230         INCREMENT(lmStat.pagedone);
2231 
2232         /* update committed lsn */
2233         log = bp->l_log;
2234         log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2235 
2236         if (bp->l_flag & lbmDIRECT) {
2237                 LCACHE_WAKEUP(&bp->l_ioevent);
2238                 LCACHE_UNLOCK(flags);
2239                 return;
2240         }
2241 
2242         tail = log->wqueue;
2243 
2244         /* single element queue */
2245         if (bp == tail) {
2246                 /* remove head buffer of full-page pageout
2247                  * from log device write queue
2248                  */
2249                 if (bp->l_flag & lbmRELEASE) {
2250                         log->wqueue = NULL;
2251                         bp->l_wqnext = NULL;
2252                 }
2253         }
2254         /* multi element queue */
2255         else {
2256                 /* remove head buffer of full-page pageout
2257                  * from log device write queue
2258                  */
2259                 if (bp->l_flag & lbmRELEASE) {
2260                         nextbp = tail->l_wqnext = bp->l_wqnext;
2261                         bp->l_wqnext = NULL;
2262 
2263                         /*
2264                          * redrive pageout of next page at head of write queue:
2265                          * redrive next page without any bound tblk
2266                          * (i.e., page w/o any COMMIT records), or
2267                          * first page of new group commit which has been
2268                          * queued after current page (subsequent pageout
2269                          * is performed synchronously, except page without
2270                          * any COMMITs) by lmGroupCommit() as indicated
2271                          * by lbmWRITE flag;
2272                          */
2273                         if (nextbp->l_flag & lbmWRITE) {
2274                                 /*
2275                                  * We can't do the I/O at interrupt time.
2276                                  * The jfsIO thread can do it
2277                                  */
2278                                 lbmRedrive(nextbp);
2279                         }
2280                 }
2281         }
2282 
2283         /*
2284          *      synchronous pageout:
2285          *
2286          * buffer has not necessarily been removed from write queue
2287          * (e.g., synchronous write of partial-page with COMMIT):
2288          * leave buffer for i/o initiator to dispose
2289          */
2290         if (bp->l_flag & lbmSYNC) {
2291                 LCACHE_UNLOCK(flags);   /* unlock+enable */
2292 
2293                 /* wakeup I/O initiator */
2294                 LCACHE_WAKEUP(&bp->l_ioevent);
2295         }
2296 
2297         /*
2298          *      Group Commit pageout:
2299          */
2300         else if (bp->l_flag & lbmGC) {
2301                 LCACHE_UNLOCK(flags);
2302                 lmPostGC(bp);
2303         }
2304 
2305         /*
2306          *      asynchronous pageout:
2307          *
2308          * buffer must have been removed from write queue:
2309          * insert buffer at head of freelist where it can be recycled
2310          */
2311         else {
2312                 assert(bp->l_flag & lbmRELEASE);
2313                 assert(bp->l_flag & lbmFREE);
2314                 lbmfree(bp);
2315 
2316                 LCACHE_UNLOCK(flags);   /* unlock+enable */
2317         }
2318 }
2319 
2320 int jfsIOWait(void *arg)
2321 {
2322         struct lbuf *bp;
2323 
2324         do {
2325                 spin_lock_irq(&log_redrive_lock);
2326                 while ((bp = log_redrive_list)) {
2327                         log_redrive_list = bp->l_redrive_next;
2328                         bp->l_redrive_next = NULL;
2329                         spin_unlock_irq(&log_redrive_lock);
2330                         lbmStartIO(bp);
2331                         spin_lock_irq(&log_redrive_lock);
2332                 }
2333 
2334                 if (freezing(current)) {
2335                         spin_unlock_irq(&log_redrive_lock);
2336                         try_to_freeze();
2337                 } else {
2338                         set_current_state(TASK_INTERRUPTIBLE);
2339                         spin_unlock_irq(&log_redrive_lock);
2340                         schedule();
2341                 }
2342         } while (!kthread_should_stop());
2343 
2344         jfs_info("jfsIOWait being killed!");
2345         return 0;
2346 }
2347 
2348 /*
2349  * NAME:        lmLogFormat()/jfs_logform()
2350  *
2351  * FUNCTION:    format file system log
2352  *
2353  * PARAMETERS:
2354  *      log     - volume log
2355  *      logAddress - start address of log space in FS block
2356  *      logSize - length of log space in FS block;
2357  *
2358  * RETURN:      0       - success
2359  *              -EIO    - i/o error
2360  *
2361  * XXX: We're synchronously writing one page at a time.  This needs to
2362  *      be improved by writing multiple pages at once.
2363  */
2364 int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2365 {
2366         int rc = -EIO;
2367         struct jfs_sb_info *sbi;
2368         struct logsuper *logsuper;
2369         struct logpage *lp;
2370         int lspn;               /* log sequence page number */
2371         struct lrd *lrd_ptr;
2372         int npages = 0;
2373         struct lbuf *bp;
2374 
2375         jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2376                  (long long)logAddress, logSize);
2377 
2378         sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2379 
2380         /* allocate a log buffer */
2381         bp = lbmAllocate(log, 1);
2382 
2383         npages = logSize >> sbi->l2nbperpage;
2384 
2385         /*
2386          *      log space:
2387          *
2388          * page 0 - reserved;
2389          * page 1 - log superblock;
2390          * page 2 - log data page: A SYNC log record is written
2391          *          into this page at logform time;
2392          * pages 3-N - log data page: set to empty log data pages;
2393          */
2394         /*
2395          *      init log superblock: log page 1
2396          */
2397         logsuper = (struct logsuper *) bp->l_ldata;
2398 
2399         logsuper->magic = cpu_to_le32(LOGMAGIC);
2400         logsuper->version = cpu_to_le32(LOGVERSION);
2401         logsuper->state = cpu_to_le32(LOGREDONE);
2402         logsuper->flag = cpu_to_le32(sbi->mntflag);     /* ? */
2403         logsuper->size = cpu_to_le32(npages);
2404         logsuper->bsize = cpu_to_le32(sbi->bsize);
2405         logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2406         logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2407 
2408         bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2409         bp->l_blkno = logAddress + sbi->nbperpage;
2410         lbmStartIO(bp);
2411         if ((rc = lbmIOWait(bp, 0)))
2412                 goto exit;
2413 
2414         /*
2415          *      init pages 2 to npages-1 as log data pages:
2416          *
2417          * log page sequence number (lpsn) initialization:
2418          *
2419          * pn:   0     1     2     3                 n-1
2420          *       +-----+-----+=====+=====+===.....===+=====+
2421          * lspn:             N-1   0     1           N-2
2422          *                   <--- N page circular file ---->
2423          *
2424          * the N (= npages-2) data pages of the log is maintained as
2425          * a circular file for the log records;
2426          * lpsn grows by 1 monotonically as each log page is written
2427          * to the circular file of the log;
2428          * and setLogpage() will not reset the page number even if
2429          * the eor is equal to LOGPHDRSIZE. In order for binary search
2430          * still work in find log end process, we have to simulate the
2431          * log wrap situation at the log format time.
2432          * The 1st log page written will have the highest lpsn. Then
2433          * the succeeding log pages will have ascending order of
2434          * the lspn starting from 0, ... (N-2)
2435          */
2436         lp = (struct logpage *) bp->l_ldata;
2437         /*
2438          * initialize 1st log page to be written: lpsn = N - 1,
2439          * write a SYNCPT log record is written to this page
2440          */
2441         lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2442         lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2443 
2444         lrd_ptr = (struct lrd *) &lp->data;
2445         lrd_ptr->logtid = 0;
2446         lrd_ptr->backchain = 0;
2447         lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2448         lrd_ptr->length = 0;
2449         lrd_ptr->log.syncpt.sync = 0;
2450 
2451         bp->l_blkno += sbi->nbperpage;
2452         bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2453         lbmStartIO(bp);
2454         if ((rc = lbmIOWait(bp, 0)))
2455                 goto exit;
2456 
2457         /*
2458          *      initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2459          */
2460         for (lspn = 0; lspn < npages - 3; lspn++) {
2461                 lp->h.page = lp->t.page = cpu_to_le32(lspn);
2462                 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2463 
2464                 bp->l_blkno += sbi->nbperpage;
2465                 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2466                 lbmStartIO(bp);
2467                 if ((rc = lbmIOWait(bp, 0)))
2468                         goto exit;
2469         }
2470 
2471         rc = 0;
2472 exit:
2473         /*
2474          *      finalize log
2475          */
2476         /* release the buffer */
2477         lbmFree(bp);
2478 
2479         return rc;
2480 }
2481 
2482 #ifdef CONFIG_JFS_STATISTICS
2483 int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2484 {
2485         seq_printf(m,
2486                        "JFS Logmgr stats\n"
2487                        "================\n"
2488                        "commits = %d\n"
2489                        "writes submitted = %d\n"
2490                        "writes completed = %d\n"
2491                        "full pages submitted = %d\n"
2492                        "partial pages submitted = %d\n",
2493                        lmStat.commit,
2494                        lmStat.submitted,
2495                        lmStat.pagedone,
2496                        lmStat.full_page,
2497                        lmStat.partial_page);
2498         return 0;
2499 }
2500 #endif /* CONFIG_JFS_STATISTICS */

/* [<][>][^][v][top][bottom][index][help] */