root/fs/jfs/jfs_txnmgr.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. TXN_SLEEP_DROP_LOCK
  2. txLockAlloc
  3. txLockFree
  4. txInit
  5. txExit
  6. txBegin
  7. txBeginAnon
  8. txEnd
  9. txLock
  10. txRelease
  11. txUnlock
  12. txMaplock
  13. txLinelock
  14. txCommit
  15. txLog
  16. diLog
  17. dataLog
  18. dtLog
  19. xtLog
  20. mapLog
  21. txEA
  22. txForce
  23. txUpdateMap
  24. txAllocPMap
  25. txFreeMap
  26. txFreelock
  27. txAbort
  28. txLazyCommit
  29. jfs_lazycommit
  30. txLazyUnlock
  31. LogSyncRelease
  32. txQuiesce
  33. txResume
  34. jfs_sync
  35. jfs_txanchor_proc_show
  36. jfs_txstats_proc_show

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  *   Copyright (C) International Business Machines Corp., 2000-2005
   4  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   5  */
   6 
   7 /*
   8  *      jfs_txnmgr.c: transaction manager
   9  *
  10  * notes:
  11  * transaction starts with txBegin() and ends with txCommit()
  12  * or txAbort().
  13  *
  14  * tlock is acquired at the time of update;
  15  * (obviate scan at commit time for xtree and dtree)
  16  * tlock and mp points to each other;
  17  * (no hashlist for mp -> tlock).
  18  *
  19  * special cases:
  20  * tlock on in-memory inode:
  21  * in-place tlock in the in-memory inode itself;
  22  * converted to page lock by iWrite() at commit time.
  23  *
  24  * tlock during write()/mmap() under anonymous transaction (tid = 0):
  25  * transferred (?) to transaction at commit time.
  26  *
  27  * use the page itself to update allocation maps
  28  * (obviate intermediate replication of allocation/deallocation data)
  29  * hold on to mp+lock thru update of maps
  30  */
  31 
  32 #include <linux/fs.h>
  33 #include <linux/vmalloc.h>
  34 #include <linux/completion.h>
  35 #include <linux/freezer.h>
  36 #include <linux/module.h>
  37 #include <linux/moduleparam.h>
  38 #include <linux/kthread.h>
  39 #include <linux/seq_file.h>
  40 #include "jfs_incore.h"
  41 #include "jfs_inode.h"
  42 #include "jfs_filsys.h"
  43 #include "jfs_metapage.h"
  44 #include "jfs_dinode.h"
  45 #include "jfs_imap.h"
  46 #include "jfs_dmap.h"
  47 #include "jfs_superblock.h"
  48 #include "jfs_debug.h"
  49 
  50 /*
  51  *      transaction management structures
  52  */
  53 static struct {
  54         int freetid;            /* index of a free tid structure */
  55         int freelock;           /* index first free lock word */
  56         wait_queue_head_t freewait;     /* eventlist of free tblock */
  57         wait_queue_head_t freelockwait; /* eventlist of free tlock */
  58         wait_queue_head_t lowlockwait;  /* eventlist of ample tlocks */
  59         int tlocksInUse;        /* Number of tlocks in use */
  60         spinlock_t LazyLock;    /* synchronize sync_queue & unlock_queue */
  61 /*      struct tblock *sync_queue; * Transactions waiting for data sync */
  62         struct list_head unlock_queue;  /* Txns waiting to be released */
  63         struct list_head anon_list;     /* inodes having anonymous txns */
  64         struct list_head anon_list2;    /* inodes having anonymous txns
  65                                            that couldn't be sync'ed */
  66 } TxAnchor;
  67 
  68 int jfs_tlocks_low;             /* Indicates low number of available tlocks */
  69 
  70 #ifdef CONFIG_JFS_STATISTICS
  71 static struct {
  72         uint txBegin;
  73         uint txBegin_barrier;
  74         uint txBegin_lockslow;
  75         uint txBegin_freetid;
  76         uint txBeginAnon;
  77         uint txBeginAnon_barrier;
  78         uint txBeginAnon_lockslow;
  79         uint txLockAlloc;
  80         uint txLockAlloc_freelock;
  81 } TxStat;
  82 #endif
  83 
  84 static int nTxBlock = -1;       /* number of transaction blocks */
  85 module_param(nTxBlock, int, 0);
  86 MODULE_PARM_DESC(nTxBlock,
  87                  "Number of transaction blocks (max:65536)");
  88 
  89 static int nTxLock = -1;        /* number of transaction locks */
  90 module_param(nTxLock, int, 0);
  91 MODULE_PARM_DESC(nTxLock,
  92                  "Number of transaction locks (max:65536)");
  93 
  94 struct tblock *TxBlock; /* transaction block table */
  95 static int TxLockLWM;   /* Low water mark for number of txLocks used */
  96 static int TxLockHWM;   /* High water mark for number of txLocks used */
  97 static int TxLockVHWM;  /* Very High water mark */
  98 struct tlock *TxLock;   /* transaction lock table */
  99 
 100 /*
 101  *      transaction management lock
 102  */
 103 static DEFINE_SPINLOCK(jfsTxnLock);
 104 
 105 #define TXN_LOCK()              spin_lock(&jfsTxnLock)
 106 #define TXN_UNLOCK()            spin_unlock(&jfsTxnLock)
 107 
 108 #define LAZY_LOCK_INIT()        spin_lock_init(&TxAnchor.LazyLock);
 109 #define LAZY_LOCK(flags)        spin_lock_irqsave(&TxAnchor.LazyLock, flags)
 110 #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
 111 
 112 static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait);
 113 static int jfs_commit_thread_waking;
 114 
 115 /*
 116  * Retry logic exist outside these macros to protect from spurrious wakeups.
 117  */
 118 static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 119 {
 120         DECLARE_WAITQUEUE(wait, current);
 121 
 122         add_wait_queue(event, &wait);
 123         set_current_state(TASK_UNINTERRUPTIBLE);
 124         TXN_UNLOCK();
 125         io_schedule();
 126         remove_wait_queue(event, &wait);
 127 }
 128 
 129 #define TXN_SLEEP(event)\
 130 {\
 131         TXN_SLEEP_DROP_LOCK(event);\
 132         TXN_LOCK();\
 133 }
 134 
 135 #define TXN_WAKEUP(event) wake_up_all(event)
 136 
 137 /*
 138  *      statistics
 139  */
 140 static struct {
 141         tid_t maxtid;           /* 4: biggest tid ever used */
 142         lid_t maxlid;           /* 4: biggest lid ever used */
 143         int ntid;               /* 4: # of transactions performed */
 144         int nlid;               /* 4: # of tlocks acquired */
 145         int waitlock;           /* 4: # of tlock wait */
 146 } stattx;
 147 
 148 /*
 149  * forward references
 150  */
 151 static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 152                 struct tlock * tlck, struct commit * cd);
 153 static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 154                 struct tlock * tlck);
 155 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 156                 struct tlock * tlck);
 157 static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 158                 struct tlock * tlck);
 159 static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 160                 struct tblock * tblk);
 161 static void txForce(struct tblock * tblk);
 162 static int txLog(struct jfs_log * log, struct tblock * tblk,
 163                 struct commit * cd);
 164 static void txUpdateMap(struct tblock * tblk);
 165 static void txRelease(struct tblock * tblk);
 166 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 167            struct tlock * tlck);
 168 static void LogSyncRelease(struct metapage * mp);
 169 
 170 /*
 171  *              transaction block/lock management
 172  *              ---------------------------------
 173  */
 174 
 175 /*
 176  * Get a transaction lock from the free list.  If the number in use is
 177  * greater than the high water mark, wake up the sync daemon.  This should
 178  * free some anonymous transaction locks.  (TXN_LOCK must be held.)
 179  */
 180 static lid_t txLockAlloc(void)
 181 {
 182         lid_t lid;
 183 
 184         INCREMENT(TxStat.txLockAlloc);
 185         if (!TxAnchor.freelock) {
 186                 INCREMENT(TxStat.txLockAlloc_freelock);
 187         }
 188 
 189         while (!(lid = TxAnchor.freelock))
 190                 TXN_SLEEP(&TxAnchor.freelockwait);
 191         TxAnchor.freelock = TxLock[lid].next;
 192         HIGHWATERMARK(stattx.maxlid, lid);
 193         if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) {
 194                 jfs_info("txLockAlloc tlocks low");
 195                 jfs_tlocks_low = 1;
 196                 wake_up_process(jfsSyncThread);
 197         }
 198 
 199         return lid;
 200 }
 201 
 202 static void txLockFree(lid_t lid)
 203 {
 204         TxLock[lid].tid = 0;
 205         TxLock[lid].next = TxAnchor.freelock;
 206         TxAnchor.freelock = lid;
 207         TxAnchor.tlocksInUse--;
 208         if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) {
 209                 jfs_info("txLockFree jfs_tlocks_low no more");
 210                 jfs_tlocks_low = 0;
 211                 TXN_WAKEUP(&TxAnchor.lowlockwait);
 212         }
 213         TXN_WAKEUP(&TxAnchor.freelockwait);
 214 }
 215 
 216 /*
 217  * NAME:        txInit()
 218  *
 219  * FUNCTION:    initialize transaction management structures
 220  *
 221  * RETURN:
 222  *
 223  * serialization: single thread at jfs_init()
 224  */
 225 int txInit(void)
 226 {
 227         int k, size;
 228         struct sysinfo si;
 229 
 230         /* Set defaults for nTxLock and nTxBlock if unset */
 231 
 232         if (nTxLock == -1) {
 233                 if (nTxBlock == -1) {
 234                         /* Base default on memory size */
 235                         si_meminfo(&si);
 236                         if (si.totalram > (256 * 1024)) /* 1 GB */
 237                                 nTxLock = 64 * 1024;
 238                         else
 239                                 nTxLock = si.totalram >> 2;
 240                 } else if (nTxBlock > (8 * 1024))
 241                         nTxLock = 64 * 1024;
 242                 else
 243                         nTxLock = nTxBlock << 3;
 244         }
 245         if (nTxBlock == -1)
 246                 nTxBlock = nTxLock >> 3;
 247 
 248         /* Verify tunable parameters */
 249         if (nTxBlock < 16)
 250                 nTxBlock = 16;  /* No one should set it this low */
 251         if (nTxBlock > 65536)
 252                 nTxBlock = 65536;
 253         if (nTxLock < 256)
 254                 nTxLock = 256;  /* No one should set it this low */
 255         if (nTxLock > 65536)
 256                 nTxLock = 65536;
 257 
 258         printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n",
 259                nTxBlock, nTxLock);
 260         /*
 261          * initialize transaction block (tblock) table
 262          *
 263          * transaction id (tid) = tblock index
 264          * tid = 0 is reserved.
 265          */
 266         TxLockLWM = (nTxLock * 4) / 10;
 267         TxLockHWM = (nTxLock * 7) / 10;
 268         TxLockVHWM = (nTxLock * 8) / 10;
 269 
 270         size = sizeof(struct tblock) * nTxBlock;
 271         TxBlock = vmalloc(size);
 272         if (TxBlock == NULL)
 273                 return -ENOMEM;
 274 
 275         for (k = 1; k < nTxBlock - 1; k++) {
 276                 TxBlock[k].next = k + 1;
 277                 init_waitqueue_head(&TxBlock[k].gcwait);
 278                 init_waitqueue_head(&TxBlock[k].waitor);
 279         }
 280         TxBlock[k].next = 0;
 281         init_waitqueue_head(&TxBlock[k].gcwait);
 282         init_waitqueue_head(&TxBlock[k].waitor);
 283 
 284         TxAnchor.freetid = 1;
 285         init_waitqueue_head(&TxAnchor.freewait);
 286 
 287         stattx.maxtid = 1;      /* statistics */
 288 
 289         /*
 290          * initialize transaction lock (tlock) table
 291          *
 292          * transaction lock id = tlock index
 293          * tlock id = 0 is reserved.
 294          */
 295         size = sizeof(struct tlock) * nTxLock;
 296         TxLock = vmalloc(size);
 297         if (TxLock == NULL) {
 298                 vfree(TxBlock);
 299                 return -ENOMEM;
 300         }
 301 
 302         /* initialize tlock table */
 303         for (k = 1; k < nTxLock - 1; k++)
 304                 TxLock[k].next = k + 1;
 305         TxLock[k].next = 0;
 306         init_waitqueue_head(&TxAnchor.freelockwait);
 307         init_waitqueue_head(&TxAnchor.lowlockwait);
 308 
 309         TxAnchor.freelock = 1;
 310         TxAnchor.tlocksInUse = 0;
 311         INIT_LIST_HEAD(&TxAnchor.anon_list);
 312         INIT_LIST_HEAD(&TxAnchor.anon_list2);
 313 
 314         LAZY_LOCK_INIT();
 315         INIT_LIST_HEAD(&TxAnchor.unlock_queue);
 316 
 317         stattx.maxlid = 1;      /* statistics */
 318 
 319         return 0;
 320 }
 321 
 322 /*
 323  * NAME:        txExit()
 324  *
 325  * FUNCTION:    clean up when module is unloaded
 326  */
 327 void txExit(void)
 328 {
 329         vfree(TxLock);
 330         TxLock = NULL;
 331         vfree(TxBlock);
 332         TxBlock = NULL;
 333 }
 334 
 335 /*
 336  * NAME:        txBegin()
 337  *
 338  * FUNCTION:    start a transaction.
 339  *
 340  * PARAMETER:   sb      - superblock
 341  *              flag    - force for nested tx;
 342  *
 343  * RETURN:      tid     - transaction id
 344  *
 345  * note: flag force allows to start tx for nested tx
 346  * to prevent deadlock on logsync barrier;
 347  */
 348 tid_t txBegin(struct super_block *sb, int flag)
 349 {
 350         tid_t t;
 351         struct tblock *tblk;
 352         struct jfs_log *log;
 353 
 354         jfs_info("txBegin: flag = 0x%x", flag);
 355         log = JFS_SBI(sb)->log;
 356 
 357         TXN_LOCK();
 358 
 359         INCREMENT(TxStat.txBegin);
 360 
 361       retry:
 362         if (!(flag & COMMIT_FORCE)) {
 363                 /*
 364                  * synchronize with logsync barrier
 365                  */
 366                 if (test_bit(log_SYNCBARRIER, &log->flag) ||
 367                     test_bit(log_QUIESCE, &log->flag)) {
 368                         INCREMENT(TxStat.txBegin_barrier);
 369                         TXN_SLEEP(&log->syncwait);
 370                         goto retry;
 371                 }
 372         }
 373         if (flag == 0) {
 374                 /*
 375                  * Don't begin transaction if we're getting starved for tlocks
 376                  * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
 377                  * free tlocks)
 378                  */
 379                 if (TxAnchor.tlocksInUse > TxLockVHWM) {
 380                         INCREMENT(TxStat.txBegin_lockslow);
 381                         TXN_SLEEP(&TxAnchor.lowlockwait);
 382                         goto retry;
 383                 }
 384         }
 385 
 386         /*
 387          * allocate transaction id/block
 388          */
 389         if ((t = TxAnchor.freetid) == 0) {
 390                 jfs_info("txBegin: waiting for free tid");
 391                 INCREMENT(TxStat.txBegin_freetid);
 392                 TXN_SLEEP(&TxAnchor.freewait);
 393                 goto retry;
 394         }
 395 
 396         tblk = tid_to_tblock(t);
 397 
 398         if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) {
 399                 /* Don't let a non-forced transaction take the last tblk */
 400                 jfs_info("txBegin: waiting for free tid");
 401                 INCREMENT(TxStat.txBegin_freetid);
 402                 TXN_SLEEP(&TxAnchor.freewait);
 403                 goto retry;
 404         }
 405 
 406         TxAnchor.freetid = tblk->next;
 407 
 408         /*
 409          * initialize transaction
 410          */
 411 
 412         /*
 413          * We can't zero the whole thing or we screw up another thread being
 414          * awakened after sleeping on tblk->waitor
 415          *
 416          * memset(tblk, 0, sizeof(struct tblock));
 417          */
 418         tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
 419 
 420         tblk->sb = sb;
 421         ++log->logtid;
 422         tblk->logtid = log->logtid;
 423 
 424         ++log->active;
 425 
 426         HIGHWATERMARK(stattx.maxtid, t);        /* statistics */
 427         INCREMENT(stattx.ntid); /* statistics */
 428 
 429         TXN_UNLOCK();
 430 
 431         jfs_info("txBegin: returning tid = %d", t);
 432 
 433         return t;
 434 }
 435 
 436 /*
 437  * NAME:        txBeginAnon()
 438  *
 439  * FUNCTION:    start an anonymous transaction.
 440  *              Blocks if logsync or available tlocks are low to prevent
 441  *              anonymous tlocks from depleting supply.
 442  *
 443  * PARAMETER:   sb      - superblock
 444  *
 445  * RETURN:      none
 446  */
 447 void txBeginAnon(struct super_block *sb)
 448 {
 449         struct jfs_log *log;
 450 
 451         log = JFS_SBI(sb)->log;
 452 
 453         TXN_LOCK();
 454         INCREMENT(TxStat.txBeginAnon);
 455 
 456       retry:
 457         /*
 458          * synchronize with logsync barrier
 459          */
 460         if (test_bit(log_SYNCBARRIER, &log->flag) ||
 461             test_bit(log_QUIESCE, &log->flag)) {
 462                 INCREMENT(TxStat.txBeginAnon_barrier);
 463                 TXN_SLEEP(&log->syncwait);
 464                 goto retry;
 465         }
 466 
 467         /*
 468          * Don't begin transaction if we're getting starved for tlocks
 469          */
 470         if (TxAnchor.tlocksInUse > TxLockVHWM) {
 471                 INCREMENT(TxStat.txBeginAnon_lockslow);
 472                 TXN_SLEEP(&TxAnchor.lowlockwait);
 473                 goto retry;
 474         }
 475         TXN_UNLOCK();
 476 }
 477 
 478 /*
 479  *      txEnd()
 480  *
 481  * function: free specified transaction block.
 482  *
 483  *      logsync barrier processing:
 484  *
 485  * serialization:
 486  */
 487 void txEnd(tid_t tid)
 488 {
 489         struct tblock *tblk = tid_to_tblock(tid);
 490         struct jfs_log *log;
 491 
 492         jfs_info("txEnd: tid = %d", tid);
 493         TXN_LOCK();
 494 
 495         /*
 496          * wakeup transactions waiting on the page locked
 497          * by the current transaction
 498          */
 499         TXN_WAKEUP(&tblk->waitor);
 500 
 501         log = JFS_SBI(tblk->sb)->log;
 502 
 503         /*
 504          * Lazy commit thread can't free this guy until we mark it UNLOCKED,
 505          * otherwise, we would be left with a transaction that may have been
 506          * reused.
 507          *
 508          * Lazy commit thread will turn off tblkGC_LAZY before calling this
 509          * routine.
 510          */
 511         if (tblk->flag & tblkGC_LAZY) {
 512                 jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
 513                 TXN_UNLOCK();
 514 
 515                 spin_lock_irq(&log->gclock);    // LOGGC_LOCK
 516                 tblk->flag |= tblkGC_UNLOCKED;
 517                 spin_unlock_irq(&log->gclock);  // LOGGC_UNLOCK
 518                 return;
 519         }
 520 
 521         jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk);
 522 
 523         assert(tblk->next == 0);
 524 
 525         /*
 526          * insert tblock back on freelist
 527          */
 528         tblk->next = TxAnchor.freetid;
 529         TxAnchor.freetid = tid;
 530 
 531         /*
 532          * mark the tblock not active
 533          */
 534         if (--log->active == 0) {
 535                 clear_bit(log_FLUSH, &log->flag);
 536 
 537                 /*
 538                  * synchronize with logsync barrier
 539                  */
 540                 if (test_bit(log_SYNCBARRIER, &log->flag)) {
 541                         TXN_UNLOCK();
 542 
 543                         /* write dirty metadata & forward log syncpt */
 544                         jfs_syncpt(log, 1);
 545 
 546                         jfs_info("log barrier off: 0x%x", log->lsn);
 547 
 548                         /* enable new transactions start */
 549                         clear_bit(log_SYNCBARRIER, &log->flag);
 550 
 551                         /* wakeup all waitors for logsync barrier */
 552                         TXN_WAKEUP(&log->syncwait);
 553 
 554                         goto wakeup;
 555                 }
 556         }
 557 
 558         TXN_UNLOCK();
 559 wakeup:
 560         /*
 561          * wakeup all waitors for a free tblock
 562          */
 563         TXN_WAKEUP(&TxAnchor.freewait);
 564 }
 565 
 566 /*
 567  *      txLock()
 568  *
 569  * function: acquire a transaction lock on the specified <mp>
 570  *
 571  * parameter:
 572  *
 573  * return:      transaction lock id
 574  *
 575  * serialization:
 576  */
 577 struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 578                      int type)
 579 {
 580         struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 581         int dir_xtree = 0;
 582         lid_t lid;
 583         tid_t xtid;
 584         struct tlock *tlck;
 585         struct xtlock *xtlck;
 586         struct linelock *linelock;
 587         xtpage_t *p;
 588         struct tblock *tblk;
 589 
 590         TXN_LOCK();
 591 
 592         if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
 593             !(mp->xflag & COMMIT_PAGE)) {
 594                 /*
 595                  * Directory inode is special.  It can have both an xtree tlock
 596                  * and a dtree tlock associated with it.
 597                  */
 598                 dir_xtree = 1;
 599                 lid = jfs_ip->xtlid;
 600         } else
 601                 lid = mp->lid;
 602 
 603         /* is page not locked by a transaction ? */
 604         if (lid == 0)
 605                 goto allocateLock;
 606 
 607         jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid);
 608 
 609         /* is page locked by the requester transaction ? */
 610         tlck = lid_to_tlock(lid);
 611         if ((xtid = tlck->tid) == tid) {
 612                 TXN_UNLOCK();
 613                 goto grantLock;
 614         }
 615 
 616         /*
 617          * is page locked by anonymous transaction/lock ?
 618          *
 619          * (page update without transaction (i.e., file write) is
 620          * locked under anonymous transaction tid = 0:
 621          * anonymous tlocks maintained on anonymous tlock list of
 622          * the inode of the page and available to all anonymous
 623          * transactions until txCommit() time at which point
 624          * they are transferred to the transaction tlock list of
 625          * the committing transaction of the inode)
 626          */
 627         if (xtid == 0) {
 628                 tlck->tid = tid;
 629                 TXN_UNLOCK();
 630                 tblk = tid_to_tblock(tid);
 631                 /*
 632                  * The order of the tlocks in the transaction is important
 633                  * (during truncate, child xtree pages must be freed before
 634                  * parent's tlocks change the working map).
 635                  * Take tlock off anonymous list and add to tail of
 636                  * transaction list
 637                  *
 638                  * Note:  We really need to get rid of the tid & lid and
 639                  * use list_head's.  This code is getting UGLY!
 640                  */
 641                 if (jfs_ip->atlhead == lid) {
 642                         if (jfs_ip->atltail == lid) {
 643                                 /* only anonymous txn.
 644                                  * Remove from anon_list
 645                                  */
 646                                 TXN_LOCK();
 647                                 list_del_init(&jfs_ip->anon_inode_list);
 648                                 TXN_UNLOCK();
 649                         }
 650                         jfs_ip->atlhead = tlck->next;
 651                 } else {
 652                         lid_t last;
 653                         for (last = jfs_ip->atlhead;
 654                              lid_to_tlock(last)->next != lid;
 655                              last = lid_to_tlock(last)->next) {
 656                                 assert(last);
 657                         }
 658                         lid_to_tlock(last)->next = tlck->next;
 659                         if (jfs_ip->atltail == lid)
 660                                 jfs_ip->atltail = last;
 661                 }
 662 
 663                 /* insert the tlock at tail of transaction tlock list */
 664 
 665                 if (tblk->next)
 666                         lid_to_tlock(tblk->last)->next = lid;
 667                 else
 668                         tblk->next = lid;
 669                 tlck->next = 0;
 670                 tblk->last = lid;
 671 
 672                 goto grantLock;
 673         }
 674 
 675         goto waitLock;
 676 
 677         /*
 678          * allocate a tlock
 679          */
 680       allocateLock:
 681         lid = txLockAlloc();
 682         tlck = lid_to_tlock(lid);
 683 
 684         /*
 685          * initialize tlock
 686          */
 687         tlck->tid = tid;
 688 
 689         TXN_UNLOCK();
 690 
 691         /* mark tlock for meta-data page */
 692         if (mp->xflag & COMMIT_PAGE) {
 693 
 694                 tlck->flag = tlckPAGELOCK;
 695 
 696                 /* mark the page dirty and nohomeok */
 697                 metapage_nohomeok(mp);
 698 
 699                 jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
 700                          mp, mp->nohomeok, tid, tlck);
 701 
 702                 /* if anonymous transaction, and buffer is on the group
 703                  * commit synclist, mark inode to show this.  This will
 704                  * prevent the buffer from being marked nohomeok for too
 705                  * long a time.
 706                  */
 707                 if ((tid == 0) && mp->lsn)
 708                         set_cflag(COMMIT_Synclist, ip);
 709         }
 710         /* mark tlock for in-memory inode */
 711         else
 712                 tlck->flag = tlckINODELOCK;
 713 
 714         if (S_ISDIR(ip->i_mode))
 715                 tlck->flag |= tlckDIRECTORY;
 716 
 717         tlck->type = 0;
 718 
 719         /* bind the tlock and the page */
 720         tlck->ip = ip;
 721         tlck->mp = mp;
 722         if (dir_xtree)
 723                 jfs_ip->xtlid = lid;
 724         else
 725                 mp->lid = lid;
 726 
 727         /*
 728          * enqueue transaction lock to transaction/inode
 729          */
 730         /* insert the tlock at tail of transaction tlock list */
 731         if (tid) {
 732                 tblk = tid_to_tblock(tid);
 733                 if (tblk->next)
 734                         lid_to_tlock(tblk->last)->next = lid;
 735                 else
 736                         tblk->next = lid;
 737                 tlck->next = 0;
 738                 tblk->last = lid;
 739         }
 740         /* anonymous transaction:
 741          * insert the tlock at head of inode anonymous tlock list
 742          */
 743         else {
 744                 tlck->next = jfs_ip->atlhead;
 745                 jfs_ip->atlhead = lid;
 746                 if (tlck->next == 0) {
 747                         /* This inode's first anonymous transaction */
 748                         jfs_ip->atltail = lid;
 749                         TXN_LOCK();
 750                         list_add_tail(&jfs_ip->anon_inode_list,
 751                                       &TxAnchor.anon_list);
 752                         TXN_UNLOCK();
 753                 }
 754         }
 755 
 756         /* initialize type dependent area for linelock */
 757         linelock = (struct linelock *) & tlck->lock;
 758         linelock->next = 0;
 759         linelock->flag = tlckLINELOCK;
 760         linelock->maxcnt = TLOCKSHORT;
 761         linelock->index = 0;
 762 
 763         switch (type & tlckTYPE) {
 764         case tlckDTREE:
 765                 linelock->l2linesize = L2DTSLOTSIZE;
 766                 break;
 767 
 768         case tlckXTREE:
 769                 linelock->l2linesize = L2XTSLOTSIZE;
 770 
 771                 xtlck = (struct xtlock *) linelock;
 772                 xtlck->header.offset = 0;
 773                 xtlck->header.length = 2;
 774 
 775                 if (type & tlckNEW) {
 776                         xtlck->lwm.offset = XTENTRYSTART;
 777                 } else {
 778                         if (mp->xflag & COMMIT_PAGE)
 779                                 p = (xtpage_t *) mp->data;
 780                         else
 781                                 p = &jfs_ip->i_xtroot;
 782                         xtlck->lwm.offset =
 783                             le16_to_cpu(p->header.nextindex);
 784                 }
 785                 xtlck->lwm.length = 0;  /* ! */
 786                 xtlck->twm.offset = 0;
 787                 xtlck->hwm.offset = 0;
 788 
 789                 xtlck->index = 2;
 790                 break;
 791 
 792         case tlckINODE:
 793                 linelock->l2linesize = L2INODESLOTSIZE;
 794                 break;
 795 
 796         case tlckDATA:
 797                 linelock->l2linesize = L2DATASLOTSIZE;
 798                 break;
 799 
 800         default:
 801                 jfs_err("UFO tlock:0x%p", tlck);
 802         }
 803 
 804         /*
 805          * update tlock vector
 806          */
 807       grantLock:
 808         tlck->type |= type;
 809 
 810         return tlck;
 811 
 812         /*
 813          * page is being locked by another transaction:
 814          */
 815       waitLock:
 816         /* Only locks on ipimap or ipaimap should reach here */
 817         /* assert(jfs_ip->fileset == AGGREGATE_I); */
 818         if (jfs_ip->fileset != AGGREGATE_I) {
 819                 printk(KERN_ERR "txLock: trying to lock locked page!");
 820                 print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
 821                                ip, sizeof(*ip), 0);
 822                 print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
 823                                mp, sizeof(*mp), 0);
 824                 print_hex_dump(KERN_ERR, "Locker's tblock: ",
 825                                DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
 826                                sizeof(struct tblock), 0);
 827                 print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
 828                                tlck, sizeof(*tlck), 0);
 829                 BUG();
 830         }
 831         INCREMENT(stattx.waitlock);     /* statistics */
 832         TXN_UNLOCK();
 833         release_metapage(mp);
 834         TXN_LOCK();
 835         xtid = tlck->tid;       /* reacquire after dropping TXN_LOCK */
 836 
 837         jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
 838                  tid, xtid, lid);
 839 
 840         /* Recheck everything since dropping TXN_LOCK */
 841         if (xtid && (tlck->mp == mp) && (mp->lid == lid))
 842                 TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
 843         else
 844                 TXN_UNLOCK();
 845         jfs_info("txLock: awakened     tid = %d, lid = %d", tid, lid);
 846 
 847         return NULL;
 848 }
 849 
 850 /*
 851  * NAME:        txRelease()
 852  *
 853  * FUNCTION:    Release buffers associated with transaction locks, but don't
 854  *              mark homeok yet.  The allows other transactions to modify
 855  *              buffers, but won't let them go to disk until commit record
 856  *              actually gets written.
 857  *
 858  * PARAMETER:
 859  *              tblk    -
 860  *
 861  * RETURN:      Errors from subroutines.
 862  */
 863 static void txRelease(struct tblock * tblk)
 864 {
 865         struct metapage *mp;
 866         lid_t lid;
 867         struct tlock *tlck;
 868 
 869         TXN_LOCK();
 870 
 871         for (lid = tblk->next; lid; lid = tlck->next) {
 872                 tlck = lid_to_tlock(lid);
 873                 if ((mp = tlck->mp) != NULL &&
 874                     (tlck->type & tlckBTROOT) == 0) {
 875                         assert(mp->xflag & COMMIT_PAGE);
 876                         mp->lid = 0;
 877                 }
 878         }
 879 
 880         /*
 881          * wakeup transactions waiting on a page locked
 882          * by the current transaction
 883          */
 884         TXN_WAKEUP(&tblk->waitor);
 885 
 886         TXN_UNLOCK();
 887 }
 888 
 889 /*
 890  * NAME:        txUnlock()
 891  *
 892  * FUNCTION:    Initiates pageout of pages modified by tid in journalled
 893  *              objects and frees their lockwords.
 894  */
 895 static void txUnlock(struct tblock * tblk)
 896 {
 897         struct tlock *tlck;
 898         struct linelock *linelock;
 899         lid_t lid, next, llid, k;
 900         struct metapage *mp;
 901         struct jfs_log *log;
 902         int difft, diffp;
 903         unsigned long flags;
 904 
 905         jfs_info("txUnlock: tblk = 0x%p", tblk);
 906         log = JFS_SBI(tblk->sb)->log;
 907 
 908         /*
 909          * mark page under tlock homeok (its log has been written):
 910          */
 911         for (lid = tblk->next; lid; lid = next) {
 912                 tlck = lid_to_tlock(lid);
 913                 next = tlck->next;
 914 
 915                 jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
 916 
 917                 /* unbind page from tlock */
 918                 if ((mp = tlck->mp) != NULL &&
 919                     (tlck->type & tlckBTROOT) == 0) {
 920                         assert(mp->xflag & COMMIT_PAGE);
 921 
 922                         /* hold buffer
 923                          */
 924                         hold_metapage(mp);
 925 
 926                         assert(mp->nohomeok > 0);
 927                         _metapage_homeok(mp);
 928 
 929                         /* inherit younger/larger clsn */
 930                         LOGSYNC_LOCK(log, flags);
 931                         if (mp->clsn) {
 932                                 logdiff(difft, tblk->clsn, log);
 933                                 logdiff(diffp, mp->clsn, log);
 934                                 if (difft > diffp)
 935                                         mp->clsn = tblk->clsn;
 936                         } else
 937                                 mp->clsn = tblk->clsn;
 938                         LOGSYNC_UNLOCK(log, flags);
 939 
 940                         assert(!(tlck->flag & tlckFREEPAGE));
 941 
 942                         put_metapage(mp);
 943                 }
 944 
 945                 /* insert tlock, and linelock(s) of the tlock if any,
 946                  * at head of freelist
 947                  */
 948                 TXN_LOCK();
 949 
 950                 llid = ((struct linelock *) & tlck->lock)->next;
 951                 while (llid) {
 952                         linelock = (struct linelock *) lid_to_tlock(llid);
 953                         k = linelock->next;
 954                         txLockFree(llid);
 955                         llid = k;
 956                 }
 957                 txLockFree(lid);
 958 
 959                 TXN_UNLOCK();
 960         }
 961         tblk->next = tblk->last = 0;
 962 
 963         /*
 964          * remove tblock from logsynclist
 965          * (allocation map pages inherited lsn of tblk and
 966          * has been inserted in logsync list at txUpdateMap())
 967          */
 968         if (tblk->lsn) {
 969                 LOGSYNC_LOCK(log, flags);
 970                 log->count--;
 971                 list_del(&tblk->synclist);
 972                 LOGSYNC_UNLOCK(log, flags);
 973         }
 974 }
 975 
 976 /*
 977  *      txMaplock()
 978  *
 979  * function: allocate a transaction lock for freed page/entry;
 980  *      for freed page, maplock is used as xtlock/dtlock type;
 981  */
 982 struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 983 {
 984         struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 985         lid_t lid;
 986         struct tblock *tblk;
 987         struct tlock *tlck;
 988         struct maplock *maplock;
 989 
 990         TXN_LOCK();
 991 
 992         /*
 993          * allocate a tlock
 994          */
 995         lid = txLockAlloc();
 996         tlck = lid_to_tlock(lid);
 997 
 998         /*
 999          * initialize tlock
1000          */
1001         tlck->tid = tid;
1002 
1003         /* bind the tlock and the object */
1004         tlck->flag = tlckINODELOCK;
1005         if (S_ISDIR(ip->i_mode))
1006                 tlck->flag |= tlckDIRECTORY;
1007         tlck->ip = ip;
1008         tlck->mp = NULL;
1009 
1010         tlck->type = type;
1011 
1012         /*
1013          * enqueue transaction lock to transaction/inode
1014          */
1015         /* insert the tlock at tail of transaction tlock list */
1016         if (tid) {
1017                 tblk = tid_to_tblock(tid);
1018                 if (tblk->next)
1019                         lid_to_tlock(tblk->last)->next = lid;
1020                 else
1021                         tblk->next = lid;
1022                 tlck->next = 0;
1023                 tblk->last = lid;
1024         }
1025         /* anonymous transaction:
1026          * insert the tlock at head of inode anonymous tlock list
1027          */
1028         else {
1029                 tlck->next = jfs_ip->atlhead;
1030                 jfs_ip->atlhead = lid;
1031                 if (tlck->next == 0) {
1032                         /* This inode's first anonymous transaction */
1033                         jfs_ip->atltail = lid;
1034                         list_add_tail(&jfs_ip->anon_inode_list,
1035                                       &TxAnchor.anon_list);
1036                 }
1037         }
1038 
1039         TXN_UNLOCK();
1040 
1041         /* initialize type dependent area for maplock */
1042         maplock = (struct maplock *) & tlck->lock;
1043         maplock->next = 0;
1044         maplock->maxcnt = 0;
1045         maplock->index = 0;
1046 
1047         return tlck;
1048 }
1049 
1050 /*
1051  *      txLinelock()
1052  *
1053  * function: allocate a transaction lock for log vector list
1054  */
1055 struct linelock *txLinelock(struct linelock * tlock)
1056 {
1057         lid_t lid;
1058         struct tlock *tlck;
1059         struct linelock *linelock;
1060 
1061         TXN_LOCK();
1062 
1063         /* allocate a TxLock structure */
1064         lid = txLockAlloc();
1065         tlck = lid_to_tlock(lid);
1066 
1067         TXN_UNLOCK();
1068 
1069         /* initialize linelock */
1070         linelock = (struct linelock *) tlck;
1071         linelock->next = 0;
1072         linelock->flag = tlckLINELOCK;
1073         linelock->maxcnt = TLOCKLONG;
1074         linelock->index = 0;
1075         if (tlck->flag & tlckDIRECTORY)
1076                 linelock->flag |= tlckDIRECTORY;
1077 
1078         /* append linelock after tlock */
1079         linelock->next = tlock->next;
1080         tlock->next = lid;
1081 
1082         return linelock;
1083 }
1084 
1085 /*
1086  *              transaction commit management
1087  *              -----------------------------
1088  */
1089 
1090 /*
1091  * NAME:        txCommit()
1092  *
1093  * FUNCTION:    commit the changes to the objects specified in
1094  *              clist.  For journalled segments only the
1095  *              changes of the caller are committed, ie by tid.
1096  *              for non-journalled segments the data are flushed to
1097  *              disk and then the change to the disk inode and indirect
1098  *              blocks committed (so blocks newly allocated to the
1099  *              segment will be made a part of the segment atomically).
1100  *
1101  *              all of the segments specified in clist must be in
1102  *              one file system. no more than 6 segments are needed
1103  *              to handle all unix svcs.
1104  *
1105  *              if the i_nlink field (i.e. disk inode link count)
1106  *              is zero, and the type of inode is a regular file or
1107  *              directory, or symbolic link , the inode is truncated
1108  *              to zero length. the truncation is committed but the
1109  *              VM resources are unaffected until it is closed (see
1110  *              iput and iclose).
1111  *
1112  * PARAMETER:
1113  *
1114  * RETURN:
1115  *
1116  * serialization:
1117  *              on entry the inode lock on each segment is assumed
1118  *              to be held.
1119  *
1120  * i/o error:
1121  */
1122 int txCommit(tid_t tid,         /* transaction identifier */
1123              int nip,           /* number of inodes to commit */
1124              struct inode **iplist,     /* list of inode to commit */
1125              int flag)
1126 {
1127         int rc = 0;
1128         struct commit cd;
1129         struct jfs_log *log;
1130         struct tblock *tblk;
1131         struct lrd *lrd;
1132         struct inode *ip;
1133         struct jfs_inode_info *jfs_ip;
1134         int k, n;
1135         ino_t top;
1136         struct super_block *sb;
1137 
1138         jfs_info("txCommit, tid = %d, flag = %d", tid, flag);
1139         /* is read-only file system ? */
1140         if (isReadOnly(iplist[0])) {
1141                 rc = -EROFS;
1142                 goto TheEnd;
1143         }
1144 
1145         sb = cd.sb = iplist[0]->i_sb;
1146         cd.tid = tid;
1147 
1148         if (tid == 0)
1149                 tid = txBegin(sb, 0);
1150         tblk = tid_to_tblock(tid);
1151 
1152         /*
1153          * initialize commit structure
1154          */
1155         log = JFS_SBI(sb)->log;
1156         cd.log = log;
1157 
1158         /* initialize log record descriptor in commit */
1159         lrd = &cd.lrd;
1160         lrd->logtid = cpu_to_le32(tblk->logtid);
1161         lrd->backchain = 0;
1162 
1163         tblk->xflag |= flag;
1164 
1165         if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1166                 tblk->xflag |= COMMIT_LAZY;
1167         /*
1168          *      prepare non-journaled objects for commit
1169          *
1170          * flush data pages of non-journaled file
1171          * to prevent the file getting non-initialized disk blocks
1172          * in case of crash.
1173          * (new blocks - )
1174          */
1175         cd.iplist = iplist;
1176         cd.nip = nip;
1177 
1178         /*
1179          *      acquire transaction lock on (on-disk) inodes
1180          *
1181          * update on-disk inode from in-memory inode
1182          * acquiring transaction locks for AFTER records
1183          * on the on-disk inode of file object
1184          *
1185          * sort the inodes array by inode number in descending order
1186          * to prevent deadlock when acquiring transaction lock
1187          * of on-disk inodes on multiple on-disk inode pages by
1188          * multiple concurrent transactions
1189          */
1190         for (k = 0; k < cd.nip; k++) {
1191                 top = (cd.iplist[k])->i_ino;
1192                 for (n = k + 1; n < cd.nip; n++) {
1193                         ip = cd.iplist[n];
1194                         if (ip->i_ino > top) {
1195                                 top = ip->i_ino;
1196                                 cd.iplist[n] = cd.iplist[k];
1197                                 cd.iplist[k] = ip;
1198                         }
1199                 }
1200 
1201                 ip = cd.iplist[k];
1202                 jfs_ip = JFS_IP(ip);
1203 
1204                 /*
1205                  * BUGBUG - This code has temporarily been removed.  The
1206                  * intent is to ensure that any file data is written before
1207                  * the metadata is committed to the journal.  This prevents
1208                  * uninitialized data from appearing in a file after the
1209                  * journal has been replayed.  (The uninitialized data
1210                  * could be sensitive data removed by another user.)
1211                  *
1212                  * The problem now is that we are holding the IWRITELOCK
1213                  * on the inode, and calling filemap_fdatawrite on an
1214                  * unmapped page will cause a deadlock in jfs_get_block.
1215                  *
1216                  * The long term solution is to pare down the use of
1217                  * IWRITELOCK.  We are currently holding it too long.
1218                  * We could also be smarter about which data pages need
1219                  * to be written before the transaction is committed and
1220                  * when we don't need to worry about it at all.
1221                  *
1222                  * if ((!S_ISDIR(ip->i_mode))
1223                  *    && (tblk->flag & COMMIT_DELETE) == 0)
1224                  *      filemap_write_and_wait(ip->i_mapping);
1225                  */
1226 
1227                 /*
1228                  * Mark inode as not dirty.  It will still be on the dirty
1229                  * inode list, but we'll know not to commit it again unless
1230                  * it gets marked dirty again
1231                  */
1232                 clear_cflag(COMMIT_Dirty, ip);
1233 
1234                 /* inherit anonymous tlock(s) of inode */
1235                 if (jfs_ip->atlhead) {
1236                         lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
1237                         tblk->next = jfs_ip->atlhead;
1238                         if (!tblk->last)
1239                                 tblk->last = jfs_ip->atltail;
1240                         jfs_ip->atlhead = jfs_ip->atltail = 0;
1241                         TXN_LOCK();
1242                         list_del_init(&jfs_ip->anon_inode_list);
1243                         TXN_UNLOCK();
1244                 }
1245 
1246                 /*
1247                  * acquire transaction lock on on-disk inode page
1248                  * (become first tlock of the tblk's tlock list)
1249                  */
1250                 if (((rc = diWrite(tid, ip))))
1251                         goto out;
1252         }
1253 
1254         /*
1255          *      write log records from transaction locks
1256          *
1257          * txUpdateMap() resets XAD_NEW in XAD.
1258          */
1259         if ((rc = txLog(log, tblk, &cd)))
1260                 goto TheEnd;
1261 
1262         /*
1263          * Ensure that inode isn't reused before
1264          * lazy commit thread finishes processing
1265          */
1266         if (tblk->xflag & COMMIT_DELETE) {
1267                 ihold(tblk->u.ip);
1268                 /*
1269                  * Avoid a rare deadlock
1270                  *
1271                  * If the inode is locked, we may be blocked in
1272                  * jfs_commit_inode.  If so, we don't want the
1273                  * lazy_commit thread doing the last iput() on the inode
1274                  * since that may block on the locked inode.  Instead,
1275                  * commit the transaction synchronously, so the last iput
1276                  * will be done by the calling thread (or later)
1277                  */
1278                 /*
1279                  * I believe this code is no longer needed.  Splitting I_LOCK
1280                  * into two bits, I_NEW and I_SYNC should prevent this
1281                  * deadlock as well.  But since I don't have a JFS testload
1282                  * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1283                  * Joern
1284                  */
1285                 if (tblk->u.ip->i_state & I_SYNC)
1286                         tblk->xflag &= ~COMMIT_LAZY;
1287         }
1288 
1289         ASSERT((!(tblk->xflag & COMMIT_DELETE)) ||
1290                ((tblk->u.ip->i_nlink == 0) &&
1291                 !test_cflag(COMMIT_Nolink, tblk->u.ip)));
1292 
1293         /*
1294          *      write COMMIT log record
1295          */
1296         lrd->type = cpu_to_le16(LOG_COMMIT);
1297         lrd->length = 0;
1298         lmLog(log, tblk, lrd, NULL);
1299 
1300         lmGroupCommit(log, tblk);
1301 
1302         /*
1303          *      - transaction is now committed -
1304          */
1305 
1306         /*
1307          * force pages in careful update
1308          * (imap addressing structure update)
1309          */
1310         if (flag & COMMIT_FORCE)
1311                 txForce(tblk);
1312 
1313         /*
1314          *      update allocation map.
1315          *
1316          * update inode allocation map and inode:
1317          * free pager lock on memory object of inode if any.
1318          * update block allocation map.
1319          *
1320          * txUpdateMap() resets XAD_NEW in XAD.
1321          */
1322         if (tblk->xflag & COMMIT_FORCE)
1323                 txUpdateMap(tblk);
1324 
1325         /*
1326          *      free transaction locks and pageout/free pages
1327          */
1328         txRelease(tblk);
1329 
1330         if ((tblk->flag & tblkGC_LAZY) == 0)
1331                 txUnlock(tblk);
1332 
1333 
1334         /*
1335          *      reset in-memory object state
1336          */
1337         for (k = 0; k < cd.nip; k++) {
1338                 ip = cd.iplist[k];
1339                 jfs_ip = JFS_IP(ip);
1340 
1341                 /*
1342                  * reset in-memory inode state
1343                  */
1344                 jfs_ip->bxflag = 0;
1345                 jfs_ip->blid = 0;
1346         }
1347 
1348       out:
1349         if (rc != 0)
1350                 txAbort(tid, 1);
1351 
1352       TheEnd:
1353         jfs_info("txCommit: tid = %d, returning %d", tid, rc);
1354         return rc;
1355 }
1356 
1357 /*
1358  * NAME:        txLog()
1359  *
1360  * FUNCTION:    Writes AFTER log records for all lines modified
1361  *              by tid for segments specified by inodes in comdata.
1362  *              Code assumes only WRITELOCKS are recorded in lockwords.
1363  *
1364  * PARAMETERS:
1365  *
1366  * RETURN :
1367  */
1368 static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
1369 {
1370         int rc = 0;
1371         struct inode *ip;
1372         lid_t lid;
1373         struct tlock *tlck;
1374         struct lrd *lrd = &cd->lrd;
1375 
1376         /*
1377          * write log record(s) for each tlock of transaction,
1378          */
1379         for (lid = tblk->next; lid; lid = tlck->next) {
1380                 tlck = lid_to_tlock(lid);
1381 
1382                 tlck->flag |= tlckLOG;
1383 
1384                 /* initialize lrd common */
1385                 ip = tlck->ip;
1386                 lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate);
1387                 lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
1388                 lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
1389 
1390                 /* write log record of page from the tlock */
1391                 switch (tlck->type & tlckTYPE) {
1392                 case tlckXTREE:
1393                         xtLog(log, tblk, lrd, tlck);
1394                         break;
1395 
1396                 case tlckDTREE:
1397                         dtLog(log, tblk, lrd, tlck);
1398                         break;
1399 
1400                 case tlckINODE:
1401                         diLog(log, tblk, lrd, tlck, cd);
1402                         break;
1403 
1404                 case tlckMAP:
1405                         mapLog(log, tblk, lrd, tlck);
1406                         break;
1407 
1408                 case tlckDATA:
1409                         dataLog(log, tblk, lrd, tlck);
1410                         break;
1411 
1412                 default:
1413                         jfs_err("UFO tlock:0x%p", tlck);
1414                 }
1415         }
1416 
1417         return rc;
1418 }
1419 
1420 /*
1421  *      diLog()
1422  *
1423  * function:    log inode tlock and format maplock to update bmap;
1424  */
1425 static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1426                  struct tlock * tlck, struct commit * cd)
1427 {
1428         int rc = 0;
1429         struct metapage *mp;
1430         pxd_t *pxd;
1431         struct pxd_lock *pxdlock;
1432 
1433         mp = tlck->mp;
1434 
1435         /* initialize as REDOPAGE record format */
1436         lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
1437         lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
1438 
1439         pxd = &lrd->log.redopage.pxd;
1440 
1441         /*
1442          *      inode after image
1443          */
1444         if (tlck->type & tlckENTRY) {
1445                 /* log after-image for logredo(): */
1446                 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1447                 PXDaddress(pxd, mp->index);
1448                 PXDlength(pxd,
1449                           mp->logical_size >> tblk->sb->s_blocksize_bits);
1450                 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1451 
1452                 /* mark page as homeward bound */
1453                 tlck->flag |= tlckWRITEPAGE;
1454         } else if (tlck->type & tlckFREE) {
1455                 /*
1456                  *      free inode extent
1457                  *
1458                  * (pages of the freed inode extent have been invalidated and
1459                  * a maplock for free of the extent has been formatted at
1460                  * txLock() time);
1461                  *
1462                  * the tlock had been acquired on the inode allocation map page
1463                  * (iag) that specifies the freed extent, even though the map
1464                  * page is not itself logged, to prevent pageout of the map
1465                  * page before the log;
1466                  */
1467 
1468                 /* log LOG_NOREDOINOEXT of the freed inode extent for
1469                  * logredo() to start NoRedoPage filters, and to update
1470                  * imap and bmap for free of the extent;
1471                  */
1472                 lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
1473                 /*
1474                  * For the LOG_NOREDOINOEXT record, we need
1475                  * to pass the IAG number and inode extent
1476                  * index (within that IAG) from which the
1477                  * the extent being released.  These have been
1478                  * passed to us in the iplist[1] and iplist[2].
1479                  */
1480                 lrd->log.noredoinoext.iagnum =
1481                     cpu_to_le32((u32) (size_t) cd->iplist[1]);
1482                 lrd->log.noredoinoext.inoext_idx =
1483                     cpu_to_le32((u32) (size_t) cd->iplist[2]);
1484 
1485                 pxdlock = (struct pxd_lock *) & tlck->lock;
1486                 *pxd = pxdlock->pxd;
1487                 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1488 
1489                 /* update bmap */
1490                 tlck->flag |= tlckUPDATEMAP;
1491 
1492                 /* mark page as homeward bound */
1493                 tlck->flag |= tlckWRITEPAGE;
1494         } else
1495                 jfs_err("diLog: UFO type tlck:0x%p", tlck);
1496 #ifdef  _JFS_WIP
1497         /*
1498          *      alloc/free external EA extent
1499          *
1500          * a maplock for txUpdateMap() to update bPWMAP for alloc/free
1501          * of the extent has been formatted at txLock() time;
1502          */
1503         else {
1504                 assert(tlck->type & tlckEA);
1505 
1506                 /* log LOG_UPDATEMAP for logredo() to update bmap for
1507                  * alloc of new (and free of old) external EA extent;
1508                  */
1509                 lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1510                 pxdlock = (struct pxd_lock *) & tlck->lock;
1511                 nlock = pxdlock->index;
1512                 for (i = 0; i < nlock; i++, pxdlock++) {
1513                         if (pxdlock->flag & mlckALLOCPXD)
1514                                 lrd->log.updatemap.type =
1515                                     cpu_to_le16(LOG_ALLOCPXD);
1516                         else
1517                                 lrd->log.updatemap.type =
1518                                     cpu_to_le16(LOG_FREEPXD);
1519                         lrd->log.updatemap.nxd = cpu_to_le16(1);
1520                         lrd->log.updatemap.pxd = pxdlock->pxd;
1521                         lrd->backchain =
1522                             cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1523                 }
1524 
1525                 /* update bmap */
1526                 tlck->flag |= tlckUPDATEMAP;
1527         }
1528 #endif                          /* _JFS_WIP */
1529 
1530         return rc;
1531 }
1532 
1533 /*
1534  *      dataLog()
1535  *
1536  * function:    log data tlock
1537  */
1538 static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1539             struct tlock * tlck)
1540 {
1541         struct metapage *mp;
1542         pxd_t *pxd;
1543 
1544         mp = tlck->mp;
1545 
1546         /* initialize as REDOPAGE record format */
1547         lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
1548         lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
1549 
1550         pxd = &lrd->log.redopage.pxd;
1551 
1552         /* log after-image for logredo(): */
1553         lrd->type = cpu_to_le16(LOG_REDOPAGE);
1554 
1555         if (jfs_dirtable_inline(tlck->ip)) {
1556                 /*
1557                  * The table has been truncated, we've must have deleted
1558                  * the last entry, so don't bother logging this
1559                  */
1560                 mp->lid = 0;
1561                 grab_metapage(mp);
1562                 metapage_homeok(mp);
1563                 discard_metapage(mp);
1564                 tlck->mp = NULL;
1565                 return 0;
1566         }
1567 
1568         PXDaddress(pxd, mp->index);
1569         PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
1570 
1571         lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1572 
1573         /* mark page as homeward bound */
1574         tlck->flag |= tlckWRITEPAGE;
1575 
1576         return 0;
1577 }
1578 
1579 /*
1580  *      dtLog()
1581  *
1582  * function:    log dtree tlock and format maplock to update bmap;
1583  */
1584 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1585            struct tlock * tlck)
1586 {
1587         struct metapage *mp;
1588         struct pxd_lock *pxdlock;
1589         pxd_t *pxd;
1590 
1591         mp = tlck->mp;
1592 
1593         /* initialize as REDOPAGE/NOREDOPAGE record format */
1594         lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
1595         lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
1596 
1597         pxd = &lrd->log.redopage.pxd;
1598 
1599         if (tlck->type & tlckBTROOT)
1600                 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1601 
1602         /*
1603          *      page extension via relocation: entry insertion;
1604          *      page extension in-place: entry insertion;
1605          *      new right page from page split, reinitialized in-line
1606          *      root from root page split: entry insertion;
1607          */
1608         if (tlck->type & (tlckNEW | tlckEXTEND)) {
1609                 /* log after-image of the new page for logredo():
1610                  * mark log (LOG_NEW) for logredo() to initialize
1611                  * freelist and update bmap for alloc of the new page;
1612                  */
1613                 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1614                 if (tlck->type & tlckEXTEND)
1615                         lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
1616                 else
1617                         lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
1618                 PXDaddress(pxd, mp->index);
1619                 PXDlength(pxd,
1620                           mp->logical_size >> tblk->sb->s_blocksize_bits);
1621                 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1622 
1623                 /* format a maplock for txUpdateMap() to update bPMAP for
1624                  * alloc of the new page;
1625                  */
1626                 if (tlck->type & tlckBTROOT)
1627                         return;
1628                 tlck->flag |= tlckUPDATEMAP;
1629                 pxdlock = (struct pxd_lock *) & tlck->lock;
1630                 pxdlock->flag = mlckALLOCPXD;
1631                 pxdlock->pxd = *pxd;
1632 
1633                 pxdlock->index = 1;
1634 
1635                 /* mark page as homeward bound */
1636                 tlck->flag |= tlckWRITEPAGE;
1637                 return;
1638         }
1639 
1640         /*
1641          *      entry insertion/deletion,
1642          *      sibling page link update (old right page before split);
1643          */
1644         if (tlck->type & (tlckENTRY | tlckRELINK)) {
1645                 /* log after-image for logredo(): */
1646                 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1647                 PXDaddress(pxd, mp->index);
1648                 PXDlength(pxd,
1649                           mp->logical_size >> tblk->sb->s_blocksize_bits);
1650                 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1651 
1652                 /* mark page as homeward bound */
1653                 tlck->flag |= tlckWRITEPAGE;
1654                 return;
1655         }
1656 
1657         /*
1658          *      page deletion: page has been invalidated
1659          *      page relocation: source extent
1660          *
1661          *      a maplock for free of the page has been formatted
1662          *      at txLock() time);
1663          */
1664         if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1665                 /* log LOG_NOREDOPAGE of the deleted page for logredo()
1666                  * to start NoRedoPage filter and to update bmap for free
1667                  * of the deletd page
1668                  */
1669                 lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1670                 pxdlock = (struct pxd_lock *) & tlck->lock;
1671                 *pxd = pxdlock->pxd;
1672                 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1673 
1674                 /* a maplock for txUpdateMap() for free of the page
1675                  * has been formatted at txLock() time;
1676                  */
1677                 tlck->flag |= tlckUPDATEMAP;
1678         }
1679         return;
1680 }
1681 
1682 /*
1683  *      xtLog()
1684  *
1685  * function:    log xtree tlock and format maplock to update bmap;
1686  */
1687 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1688            struct tlock * tlck)
1689 {
1690         struct inode *ip;
1691         struct metapage *mp;
1692         xtpage_t *p;
1693         struct xtlock *xtlck;
1694         struct maplock *maplock;
1695         struct xdlistlock *xadlock;
1696         struct pxd_lock *pxdlock;
1697         pxd_t *page_pxd;
1698         int next, lwm, hwm;
1699 
1700         ip = tlck->ip;
1701         mp = tlck->mp;
1702 
1703         /* initialize as REDOPAGE/NOREDOPAGE record format */
1704         lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
1705         lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
1706 
1707         page_pxd = &lrd->log.redopage.pxd;
1708 
1709         if (tlck->type & tlckBTROOT) {
1710                 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1711                 p = &JFS_IP(ip)->i_xtroot;
1712                 if (S_ISDIR(ip->i_mode))
1713                         lrd->log.redopage.type |=
1714                             cpu_to_le16(LOG_DIR_XTREE);
1715         } else
1716                 p = (xtpage_t *) mp->data;
1717         next = le16_to_cpu(p->header.nextindex);
1718 
1719         xtlck = (struct xtlock *) & tlck->lock;
1720 
1721         maplock = (struct maplock *) & tlck->lock;
1722         xadlock = (struct xdlistlock *) maplock;
1723 
1724         /*
1725          *      entry insertion/extension;
1726          *      sibling page link update (old right page before split);
1727          */
1728         if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1729                 /* log after-image for logredo():
1730                  * logredo() will update bmap for alloc of new/extended
1731                  * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1732                  * after-image of XADlist;
1733                  * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1734                  * applying the after-image to the meta-data page.
1735                  */
1736                 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1737                 PXDaddress(page_pxd, mp->index);
1738                 PXDlength(page_pxd,
1739                           mp->logical_size >> tblk->sb->s_blocksize_bits);
1740                 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1741 
1742                 /* format a maplock for txUpdateMap() to update bPMAP
1743                  * for alloc of new/extended extents of XAD[lwm:next)
1744                  * from the page itself;
1745                  * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
1746                  */
1747                 lwm = xtlck->lwm.offset;
1748                 if (lwm == 0)
1749                         lwm = XTPAGEMAXSLOT;
1750 
1751                 if (lwm == next)
1752                         goto out;
1753                 if (lwm > next) {
1754                         jfs_err("xtLog: lwm > next");
1755                         goto out;
1756                 }
1757                 tlck->flag |= tlckUPDATEMAP;
1758                 xadlock->flag = mlckALLOCXADLIST;
1759                 xadlock->count = next - lwm;
1760                 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1761                         int i;
1762                         pxd_t *pxd;
1763                         /*
1764                          * Lazy commit may allow xtree to be modified before
1765                          * txUpdateMap runs.  Copy xad into linelock to
1766                          * preserve correct data.
1767                          *
1768                          * We can fit twice as may pxd's as xads in the lock
1769                          */
1770                         xadlock->flag = mlckALLOCPXDLIST;
1771                         pxd = xadlock->xdlist = &xtlck->pxdlock;
1772                         for (i = 0; i < xadlock->count; i++) {
1773                                 PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
1774                                 PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
1775                                 p->xad[lwm + i].flag &=
1776                                     ~(XAD_NEW | XAD_EXTENDED);
1777                                 pxd++;
1778                         }
1779                 } else {
1780                         /*
1781                          * xdlist will point to into inode's xtree, ensure
1782                          * that transaction is not committed lazily.
1783                          */
1784                         xadlock->flag = mlckALLOCXADLIST;
1785                         xadlock->xdlist = &p->xad[lwm];
1786                         tblk->xflag &= ~COMMIT_LAZY;
1787                 }
1788                 jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d",
1789                          tlck->ip, mp, tlck, lwm, xadlock->count);
1790 
1791                 maplock->index = 1;
1792 
1793               out:
1794                 /* mark page as homeward bound */
1795                 tlck->flag |= tlckWRITEPAGE;
1796 
1797                 return;
1798         }
1799 
1800         /*
1801          *      page deletion: file deletion/truncation (ref. xtTruncate())
1802          *
1803          * (page will be invalidated after log is written and bmap
1804          * is updated from the page);
1805          */
1806         if (tlck->type & tlckFREE) {
1807                 /* LOG_NOREDOPAGE log for NoRedoPage filter:
1808                  * if page free from file delete, NoRedoFile filter from
1809                  * inode image of zero link count will subsume NoRedoPage
1810                  * filters for each page;
1811                  * if page free from file truncattion, write NoRedoPage
1812                  * filter;
1813                  *
1814                  * upadte of block allocation map for the page itself:
1815                  * if page free from deletion and truncation, LOG_UPDATEMAP
1816                  * log for the page itself is generated from processing
1817                  * its parent page xad entries;
1818                  */
1819                 /* if page free from file truncation, log LOG_NOREDOPAGE
1820                  * of the deleted page for logredo() to start NoRedoPage
1821                  * filter for the page;
1822                  */
1823                 if (tblk->xflag & COMMIT_TRUNCATE) {
1824                         /* write NOREDOPAGE for the page */
1825                         lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1826                         PXDaddress(page_pxd, mp->index);
1827                         PXDlength(page_pxd,
1828                                   mp->logical_size >> tblk->sb->
1829                                   s_blocksize_bits);
1830                         lrd->backchain =
1831                             cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1832 
1833                         if (tlck->type & tlckBTROOT) {
1834                                 /* Empty xtree must be logged */
1835                                 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1836                                 lrd->backchain =
1837                                     cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1838                         }
1839                 }
1840 
1841                 /* init LOG_UPDATEMAP of the freed extents
1842                  * XAD[XTENTRYSTART:hwm) from the deleted page itself
1843                  * for logredo() to update bmap;
1844                  */
1845                 lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1846                 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
1847                 xtlck = (struct xtlock *) & tlck->lock;
1848                 hwm = xtlck->hwm.offset;
1849                 lrd->log.updatemap.nxd =
1850                     cpu_to_le16(hwm - XTENTRYSTART + 1);
1851                 /* reformat linelock for lmLog() */
1852                 xtlck->header.offset = XTENTRYSTART;
1853                 xtlck->header.length = hwm - XTENTRYSTART + 1;
1854                 xtlck->index = 1;
1855                 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1856 
1857                 /* format a maplock for txUpdateMap() to update bmap
1858                  * to free extents of XAD[XTENTRYSTART:hwm) from the
1859                  * deleted page itself;
1860                  */
1861                 tlck->flag |= tlckUPDATEMAP;
1862                 xadlock->count = hwm - XTENTRYSTART + 1;
1863                 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1864                         int i;
1865                         pxd_t *pxd;
1866                         /*
1867                          * Lazy commit may allow xtree to be modified before
1868                          * txUpdateMap runs.  Copy xad into linelock to
1869                          * preserve correct data.
1870                          *
1871                          * We can fit twice as may pxd's as xads in the lock
1872                          */
1873                         xadlock->flag = mlckFREEPXDLIST;
1874                         pxd = xadlock->xdlist = &xtlck->pxdlock;
1875                         for (i = 0; i < xadlock->count; i++) {
1876                                 PXDaddress(pxd,
1877                                         addressXAD(&p->xad[XTENTRYSTART + i]));
1878                                 PXDlength(pxd,
1879                                         lengthXAD(&p->xad[XTENTRYSTART + i]));
1880                                 pxd++;
1881                         }
1882                 } else {
1883                         /*
1884                          * xdlist will point to into inode's xtree, ensure
1885                          * that transaction is not committed lazily.
1886                          */
1887                         xadlock->flag = mlckFREEXADLIST;
1888                         xadlock->xdlist = &p->xad[XTENTRYSTART];
1889                         tblk->xflag &= ~COMMIT_LAZY;
1890                 }
1891                 jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
1892                          tlck->ip, mp, xadlock->count);
1893 
1894                 maplock->index = 1;
1895 
1896                 /* mark page as invalid */
1897                 if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
1898                     && !(tlck->type & tlckBTROOT))
1899                         tlck->flag |= tlckFREEPAGE;
1900                 /*
1901                    else (tblk->xflag & COMMIT_PMAP)
1902                    ? release the page;
1903                  */
1904                 return;
1905         }
1906 
1907         /*
1908          *      page/entry truncation: file truncation (ref. xtTruncate())
1909          *
1910          *      |----------+------+------+---------------|
1911          *                 |      |      |
1912          *                 |      |     hwm - hwm before truncation
1913          *                 |     next - truncation point
1914          *                lwm - lwm before truncation
1915          * header ?
1916          */
1917         if (tlck->type & tlckTRUNCATE) {
1918                 pxd_t pxd;      /* truncated extent of xad */
1919                 int twm;
1920 
1921                 /*
1922                  * For truncation the entire linelock may be used, so it would
1923                  * be difficult to store xad list in linelock itself.
1924                  * Therefore, we'll just force transaction to be committed
1925                  * synchronously, so that xtree pages won't be changed before
1926                  * txUpdateMap runs.
1927                  */
1928                 tblk->xflag &= ~COMMIT_LAZY;
1929                 lwm = xtlck->lwm.offset;
1930                 if (lwm == 0)
1931                         lwm = XTPAGEMAXSLOT;
1932                 hwm = xtlck->hwm.offset;
1933                 twm = xtlck->twm.offset;
1934 
1935                 /*
1936                  *      write log records
1937                  */
1938                 /* log after-image for logredo():
1939                  *
1940                  * logredo() will update bmap for alloc of new/extended
1941                  * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
1942                  * after-image of XADlist;
1943                  * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
1944                  * applying the after-image to the meta-data page.
1945                  */
1946                 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1947                 PXDaddress(page_pxd, mp->index);
1948                 PXDlength(page_pxd,
1949                           mp->logical_size >> tblk->sb->s_blocksize_bits);
1950                 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1951 
1952                 /*
1953                  * truncate entry XAD[twm == next - 1]:
1954                  */
1955                 if (twm == next - 1) {
1956                         /* init LOG_UPDATEMAP for logredo() to update bmap for
1957                          * free of truncated delta extent of the truncated
1958                          * entry XAD[next - 1]:
1959                          * (xtlck->pxdlock = truncated delta extent);
1960                          */
1961                         pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
1962                         /* assert(pxdlock->type & tlckTRUNCATE); */
1963                         lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1964                         lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
1965                         lrd->log.updatemap.nxd = cpu_to_le16(1);
1966                         lrd->log.updatemap.pxd = pxdlock->pxd;
1967                         pxd = pxdlock->pxd;     /* save to format maplock */
1968                         lrd->backchain =
1969                             cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1970                 }
1971 
1972                 /*
1973                  * free entries XAD[next:hwm]:
1974                  */
1975                 if (hwm >= next) {
1976                         /* init LOG_UPDATEMAP of the freed extents
1977                          * XAD[next:hwm] from the deleted page itself
1978                          * for logredo() to update bmap;
1979                          */
1980                         lrd->type = cpu_to_le16(LOG_UPDATEMAP);
1981                         lrd->log.updatemap.type =
1982                             cpu_to_le16(LOG_FREEXADLIST);
1983                         xtlck = (struct xtlock *) & tlck->lock;
1984                         hwm = xtlck->hwm.offset;
1985                         lrd->log.updatemap.nxd =
1986                             cpu_to_le16(hwm - next + 1);
1987                         /* reformat linelock for lmLog() */
1988                         xtlck->header.offset = next;
1989                         xtlck->header.length = hwm - next + 1;
1990                         xtlck->index = 1;
1991                         lrd->backchain =
1992                             cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1993                 }
1994 
1995                 /*
1996                  *      format maplock(s) for txUpdateMap() to update bmap
1997                  */
1998                 maplock->index = 0;
1999 
2000                 /*
2001                  * allocate entries XAD[lwm:next):
2002                  */
2003                 if (lwm < next) {
2004                         /* format a maplock for txUpdateMap() to update bPMAP
2005                          * for alloc of new/extended extents of XAD[lwm:next)
2006                          * from the page itself;
2007                          * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
2008                          */
2009                         tlck->flag |= tlckUPDATEMAP;
2010                         xadlock->flag = mlckALLOCXADLIST;
2011                         xadlock->count = next - lwm;
2012                         xadlock->xdlist = &p->xad[lwm];
2013 
2014                         jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d",
2015                                  tlck->ip, mp, xadlock->count, lwm, next);
2016                         maplock->index++;
2017                         xadlock++;
2018                 }
2019 
2020                 /*
2021                  * truncate entry XAD[twm == next - 1]:
2022                  */
2023                 if (twm == next - 1) {
2024                         /* format a maplock for txUpdateMap() to update bmap
2025                          * to free truncated delta extent of the truncated
2026                          * entry XAD[next - 1];
2027                          * (xtlck->pxdlock = truncated delta extent);
2028                          */
2029                         tlck->flag |= tlckUPDATEMAP;
2030                         pxdlock = (struct pxd_lock *) xadlock;
2031                         pxdlock->flag = mlckFREEPXD;
2032                         pxdlock->count = 1;
2033                         pxdlock->pxd = pxd;
2034 
2035                         jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d",
2036                                  ip, mp, pxdlock->count, hwm);
2037                         maplock->index++;
2038                         xadlock++;
2039                 }
2040 
2041                 /*
2042                  * free entries XAD[next:hwm]:
2043                  */
2044                 if (hwm >= next) {
2045                         /* format a maplock for txUpdateMap() to update bmap
2046                          * to free extents of XAD[next:hwm] from thedeleted
2047                          * page itself;
2048                          */
2049                         tlck->flag |= tlckUPDATEMAP;
2050                         xadlock->flag = mlckFREEXADLIST;
2051                         xadlock->count = hwm - next + 1;
2052                         xadlock->xdlist = &p->xad[next];
2053 
2054                         jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d",
2055                                  tlck->ip, mp, xadlock->count, next, hwm);
2056                         maplock->index++;
2057                 }
2058 
2059                 /* mark page as homeward bound */
2060                 tlck->flag |= tlckWRITEPAGE;
2061         }
2062         return;
2063 }
2064 
2065 /*
2066  *      mapLog()
2067  *
2068  * function:    log from maplock of freed data extents;
2069  */
2070 static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2071                    struct tlock * tlck)
2072 {
2073         struct pxd_lock *pxdlock;
2074         int i, nlock;
2075         pxd_t *pxd;
2076 
2077         /*
2078          *      page relocation: free the source page extent
2079          *
2080          * a maplock for txUpdateMap() for free of the page
2081          * has been formatted at txLock() time saving the src
2082          * relocated page address;
2083          */
2084         if (tlck->type & tlckRELOCATE) {
2085                 /* log LOG_NOREDOPAGE of the old relocated page
2086                  * for logredo() to start NoRedoPage filter;
2087                  */
2088                 lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
2089                 pxdlock = (struct pxd_lock *) & tlck->lock;
2090                 pxd = &lrd->log.redopage.pxd;
2091                 *pxd = pxdlock->pxd;
2092                 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2093 
2094                 /* (N.B. currently, logredo() does NOT update bmap
2095                  * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
2096                  * if page free from relocation, LOG_UPDATEMAP log is
2097                  * specifically generated now for logredo()
2098                  * to update bmap for free of src relocated page;
2099                  * (new flag LOG_RELOCATE may be introduced which will
2100                  * inform logredo() to start NORedoPage filter and also
2101                  * update block allocation map at the same time, thus
2102                  * avoiding an extra log write);
2103                  */
2104                 lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2105                 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
2106                 lrd->log.updatemap.nxd = cpu_to_le16(1);
2107                 lrd->log.updatemap.pxd = pxdlock->pxd;
2108                 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2109 
2110                 /* a maplock for txUpdateMap() for free of the page
2111                  * has been formatted at txLock() time;
2112                  */
2113                 tlck->flag |= tlckUPDATEMAP;
2114                 return;
2115         }
2116         /*
2117 
2118          * Otherwise it's not a relocate request
2119          *
2120          */
2121         else {
2122                 /* log LOG_UPDATEMAP for logredo() to update bmap for
2123                  * free of truncated/relocated delta extent of the data;
2124                  * e.g.: external EA extent, relocated/truncated extent
2125                  * from xtTailgate();
2126                  */
2127                 lrd->type = cpu_to_le16(LOG_UPDATEMAP);
2128                 pxdlock = (struct pxd_lock *) & tlck->lock;
2129                 nlock = pxdlock->index;
2130                 for (i = 0; i < nlock; i++, pxdlock++) {
2131                         if (pxdlock->flag & mlckALLOCPXD)
2132                                 lrd->log.updatemap.type =
2133                                     cpu_to_le16(LOG_ALLOCPXD);
2134                         else
2135                                 lrd->log.updatemap.type =
2136                                     cpu_to_le16(LOG_FREEPXD);
2137                         lrd->log.updatemap.nxd = cpu_to_le16(1);
2138                         lrd->log.updatemap.pxd = pxdlock->pxd;
2139                         lrd->backchain =
2140                             cpu_to_le32(lmLog(log, tblk, lrd, NULL));
2141                         jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
2142                                  (ulong) addressPXD(&pxdlock->pxd),
2143                                  lengthPXD(&pxdlock->pxd));
2144                 }
2145 
2146                 /* update bmap */
2147                 tlck->flag |= tlckUPDATEMAP;
2148         }
2149 }
2150 
2151 /*
2152  *      txEA()
2153  *
2154  * function:    acquire maplock for EA/ACL extents or
2155  *              set COMMIT_INLINE flag;
2156  */
2157 void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2158 {
2159         struct tlock *tlck = NULL;
2160         struct pxd_lock *maplock = NULL, *pxdlock = NULL;
2161 
2162         /*
2163          * format maplock for alloc of new EA extent
2164          */
2165         if (newea) {
2166                 /* Since the newea could be a completely zeroed entry we need to
2167                  * check for the two flags which indicate we should actually
2168                  * commit new EA data
2169                  */
2170                 if (newea->flag & DXD_EXTENT) {
2171                         tlck = txMaplock(tid, ip, tlckMAP);
2172                         maplock = (struct pxd_lock *) & tlck->lock;
2173                         pxdlock = (struct pxd_lock *) maplock;
2174                         pxdlock->flag = mlckALLOCPXD;
2175                         PXDaddress(&pxdlock->pxd, addressDXD(newea));
2176                         PXDlength(&pxdlock->pxd, lengthDXD(newea));
2177                         pxdlock++;
2178                         maplock->index = 1;
2179                 } else if (newea->flag & DXD_INLINE) {
2180                         tlck = NULL;
2181 
2182                         set_cflag(COMMIT_Inlineea, ip);
2183                 }
2184         }
2185 
2186         /*
2187          * format maplock for free of old EA extent
2188          */
2189         if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
2190                 if (tlck == NULL) {
2191                         tlck = txMaplock(tid, ip, tlckMAP);
2192                         maplock = (struct pxd_lock *) & tlck->lock;
2193                         pxdlock = (struct pxd_lock *) maplock;
2194                         maplock->index = 0;
2195                 }
2196                 pxdlock->flag = mlckFREEPXD;
2197                 PXDaddress(&pxdlock->pxd, addressDXD(oldea));
2198                 PXDlength(&pxdlock->pxd, lengthDXD(oldea));
2199                 maplock->index++;
2200         }
2201 }
2202 
2203 /*
2204  *      txForce()
2205  *
2206  * function: synchronously write pages locked by transaction
2207  *           after txLog() but before txUpdateMap();
2208  */
2209 static void txForce(struct tblock * tblk)
2210 {
2211         struct tlock *tlck;
2212         lid_t lid, next;
2213         struct metapage *mp;
2214 
2215         /*
2216          * reverse the order of transaction tlocks in
2217          * careful update order of address index pages
2218          * (right to left, bottom up)
2219          */
2220         tlck = lid_to_tlock(tblk->next);
2221         lid = tlck->next;
2222         tlck->next = 0;
2223         while (lid) {
2224                 tlck = lid_to_tlock(lid);
2225                 next = tlck->next;
2226                 tlck->next = tblk->next;
2227                 tblk->next = lid;
2228                 lid = next;
2229         }
2230 
2231         /*
2232          * synchronously write the page, and
2233          * hold the page for txUpdateMap();
2234          */
2235         for (lid = tblk->next; lid; lid = next) {
2236                 tlck = lid_to_tlock(lid);
2237                 next = tlck->next;
2238 
2239                 if ((mp = tlck->mp) != NULL &&
2240                     (tlck->type & tlckBTROOT) == 0) {
2241                         assert(mp->xflag & COMMIT_PAGE);
2242 
2243                         if (tlck->flag & tlckWRITEPAGE) {
2244                                 tlck->flag &= ~tlckWRITEPAGE;
2245 
2246                                 /* do not release page to freelist */
2247                                 force_metapage(mp);
2248 #if 0
2249                                 /*
2250                                  * The "right" thing to do here is to
2251                                  * synchronously write the metadata.
2252                                  * With the current implementation this
2253                                  * is hard since write_metapage requires
2254                                  * us to kunmap & remap the page.  If we
2255                                  * have tlocks pointing into the metadata
2256                                  * pages, we don't want to do this.  I think
2257                                  * we can get by with synchronously writing
2258                                  * the pages when they are released.
2259                                  */
2260                                 assert(mp->nohomeok);
2261                                 set_bit(META_dirty, &mp->flag);
2262                                 set_bit(META_sync, &mp->flag);
2263 #endif
2264                         }
2265                 }
2266         }
2267 }
2268 
2269 /*
2270  *      txUpdateMap()
2271  *
2272  * function:    update persistent allocation map (and working map
2273  *              if appropriate);
2274  *
2275  * parameter:
2276  */
2277 static void txUpdateMap(struct tblock * tblk)
2278 {
2279         struct inode *ip;
2280         struct inode *ipimap;
2281         lid_t lid;
2282         struct tlock *tlck;
2283         struct maplock *maplock;
2284         struct pxd_lock pxdlock;
2285         int maptype;
2286         int k, nlock;
2287         struct metapage *mp = NULL;
2288 
2289         ipimap = JFS_SBI(tblk->sb)->ipimap;
2290 
2291         maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
2292 
2293 
2294         /*
2295          *      update block allocation map
2296          *
2297          * update allocation state in pmap (and wmap) and
2298          * update lsn of the pmap page;
2299          */
2300         /*
2301          * scan each tlock/page of transaction for block allocation/free:
2302          *
2303          * for each tlock/page of transaction, update map.
2304          *  ? are there tlock for pmap and pwmap at the same time ?
2305          */
2306         for (lid = tblk->next; lid; lid = tlck->next) {
2307                 tlck = lid_to_tlock(lid);
2308 
2309                 if ((tlck->flag & tlckUPDATEMAP) == 0)
2310                         continue;
2311 
2312                 if (tlck->flag & tlckFREEPAGE) {
2313                         /*
2314                          * Another thread may attempt to reuse freed space
2315                          * immediately, so we want to get rid of the metapage
2316                          * before anyone else has a chance to get it.
2317                          * Lock metapage, update maps, then invalidate
2318                          * the metapage.
2319                          */
2320                         mp = tlck->mp;
2321                         ASSERT(mp->xflag & COMMIT_PAGE);
2322                         grab_metapage(mp);
2323                 }
2324 
2325                 /*
2326                  * extent list:
2327                  * . in-line PXD list:
2328                  * . out-of-line XAD list:
2329                  */
2330                 maplock = (struct maplock *) & tlck->lock;
2331                 nlock = maplock->index;
2332 
2333                 for (k = 0; k < nlock; k++, maplock++) {
2334                         /*
2335                          * allocate blocks in persistent map:
2336                          *
2337                          * blocks have been allocated from wmap at alloc time;
2338                          */
2339                         if (maplock->flag & mlckALLOC) {
2340                                 txAllocPMap(ipimap, maplock, tblk);
2341                         }
2342                         /*
2343                          * free blocks in persistent and working map:
2344                          * blocks will be freed in pmap and then in wmap;
2345                          *
2346                          * ? tblock specifies the PMAP/PWMAP based upon
2347                          * transaction
2348                          *
2349                          * free blocks in persistent map:
2350                          * blocks will be freed from wmap at last reference
2351                          * release of the object for regular files;
2352                          *
2353                          * Alway free blocks from both persistent & working
2354                          * maps for directories
2355                          */
2356                         else {  /* (maplock->flag & mlckFREE) */
2357 
2358                                 if (tlck->flag & tlckDIRECTORY)
2359                                         txFreeMap(ipimap, maplock,
2360                                                   tblk, COMMIT_PWMAP);
2361                                 else
2362                                         txFreeMap(ipimap, maplock,
2363                                                   tblk, maptype);
2364                         }
2365                 }
2366                 if (tlck->flag & tlckFREEPAGE) {
2367                         if (!(tblk->flag & tblkGC_LAZY)) {
2368                                 /* This is equivalent to txRelease */
2369                                 ASSERT(mp->lid == lid);
2370                                 tlck->mp->lid = 0;
2371                         }
2372                         assert(mp->nohomeok == 1);
2373                         metapage_homeok(mp);
2374                         discard_metapage(mp);
2375                         tlck->mp = NULL;
2376                 }
2377         }
2378         /*
2379          *      update inode allocation map
2380          *
2381          * update allocation state in pmap and
2382          * update lsn of the pmap page;
2383          * update in-memory inode flag/state
2384          *
2385          * unlock mapper/write lock
2386          */
2387         if (tblk->xflag & COMMIT_CREATE) {
2388                 diUpdatePMap(ipimap, tblk->ino, false, tblk);
2389                 /* update persistent block allocation map
2390                  * for the allocation of inode extent;
2391                  */
2392                 pxdlock.flag = mlckALLOCPXD;
2393                 pxdlock.pxd = tblk->u.ixpxd;
2394                 pxdlock.index = 1;
2395                 txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk);
2396         } else if (tblk->xflag & COMMIT_DELETE) {
2397                 ip = tblk->u.ip;
2398                 diUpdatePMap(ipimap, ip->i_ino, true, tblk);
2399                 iput(ip);
2400         }
2401 }
2402 
2403 /*
2404  *      txAllocPMap()
2405  *
2406  * function: allocate from persistent map;
2407  *
2408  * parameter:
2409  *      ipbmap  -
2410  *      malock  -
2411  *              xad list:
2412  *              pxd:
2413  *
2414  *      maptype -
2415  *              allocate from persistent map;
2416  *              free from persistent map;
2417  *              (e.g., tmp file - free from working map at releae
2418  *               of last reference);
2419  *              free from persistent and working map;
2420  *
2421  *      lsn     - log sequence number;
2422  */
2423 static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2424                         struct tblock * tblk)
2425 {
2426         struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2427         struct xdlistlock *xadlistlock;
2428         xad_t *xad;
2429         s64 xaddr;
2430         int xlen;
2431         struct pxd_lock *pxdlock;
2432         struct xdlistlock *pxdlistlock;
2433         pxd_t *pxd;
2434         int n;
2435 
2436         /*
2437          * allocate from persistent map;
2438          */
2439         if (maplock->flag & mlckALLOCXADLIST) {
2440                 xadlistlock = (struct xdlistlock *) maplock;
2441                 xad = xadlistlock->xdlist;
2442                 for (n = 0; n < xadlistlock->count; n++, xad++) {
2443                         if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
2444                                 xaddr = addressXAD(xad);
2445                                 xlen = lengthXAD(xad);
2446                                 dbUpdatePMap(ipbmap, false, xaddr,
2447                                              (s64) xlen, tblk);
2448                                 xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
2449                                 jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2450                                          (ulong) xaddr, xlen);
2451                         }
2452                 }
2453         } else if (maplock->flag & mlckALLOCPXD) {
2454                 pxdlock = (struct pxd_lock *) maplock;
2455                 xaddr = addressPXD(&pxdlock->pxd);
2456                 xlen = lengthPXD(&pxdlock->pxd);
2457                 dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk);
2458                 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen);
2459         } else {                /* (maplock->flag & mlckALLOCPXDLIST) */
2460 
2461                 pxdlistlock = (struct xdlistlock *) maplock;
2462                 pxd = pxdlistlock->xdlist;
2463                 for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2464                         xaddr = addressPXD(pxd);
2465                         xlen = lengthPXD(pxd);
2466                         dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen,
2467                                      tblk);
2468                         jfs_info("allocPMap: xaddr:0x%lx xlen:%d",
2469                                  (ulong) xaddr, xlen);
2470                 }
2471         }
2472 }
2473 
2474 /*
2475  *      txFreeMap()
2476  *
2477  * function:    free from persistent and/or working map;
2478  *
2479  * todo: optimization
2480  */
2481 void txFreeMap(struct inode *ip,
2482                struct maplock * maplock, struct tblock * tblk, int maptype)
2483 {
2484         struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
2485         struct xdlistlock *xadlistlock;
2486         xad_t *xad;
2487         s64 xaddr;
2488         int xlen;
2489         struct pxd_lock *pxdlock;
2490         struct xdlistlock *pxdlistlock;
2491         pxd_t *pxd;
2492         int n;
2493 
2494         jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x",
2495                  tblk, maplock, maptype);
2496 
2497         /*
2498          * free from persistent map;
2499          */
2500         if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
2501                 if (maplock->flag & mlckFREEXADLIST) {
2502                         xadlistlock = (struct xdlistlock *) maplock;
2503                         xad = xadlistlock->xdlist;
2504                         for (n = 0; n < xadlistlock->count; n++, xad++) {
2505                                 if (!(xad->flag & XAD_NEW)) {
2506                                         xaddr = addressXAD(xad);
2507                                         xlen = lengthXAD(xad);
2508                                         dbUpdatePMap(ipbmap, true, xaddr,
2509                                                      (s64) xlen, tblk);
2510                                         jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2511                                                  (ulong) xaddr, xlen);
2512                                 }
2513                         }
2514                 } else if (maplock->flag & mlckFREEPXD) {
2515                         pxdlock = (struct pxd_lock *) maplock;
2516                         xaddr = addressPXD(&pxdlock->pxd);
2517                         xlen = lengthPXD(&pxdlock->pxd);
2518                         dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen,
2519                                      tblk);
2520                         jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2521                                  (ulong) xaddr, xlen);
2522                 } else {        /* (maplock->flag & mlckALLOCPXDLIST) */
2523 
2524                         pxdlistlock = (struct xdlistlock *) maplock;
2525                         pxd = pxdlistlock->xdlist;
2526                         for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2527                                 xaddr = addressPXD(pxd);
2528                                 xlen = lengthPXD(pxd);
2529                                 dbUpdatePMap(ipbmap, true, xaddr,
2530                                              (s64) xlen, tblk);
2531                                 jfs_info("freePMap: xaddr:0x%lx xlen:%d",
2532                                          (ulong) xaddr, xlen);
2533                         }
2534                 }
2535         }
2536 
2537         /*
2538          * free from working map;
2539          */
2540         if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
2541                 if (maplock->flag & mlckFREEXADLIST) {
2542                         xadlistlock = (struct xdlistlock *) maplock;
2543                         xad = xadlistlock->xdlist;
2544                         for (n = 0; n < xadlistlock->count; n++, xad++) {
2545                                 xaddr = addressXAD(xad);
2546                                 xlen = lengthXAD(xad);
2547                                 dbFree(ip, xaddr, (s64) xlen);
2548                                 xad->flag = 0;
2549                                 jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2550                                          (ulong) xaddr, xlen);
2551                         }
2552                 } else if (maplock->flag & mlckFREEPXD) {
2553                         pxdlock = (struct pxd_lock *) maplock;
2554                         xaddr = addressPXD(&pxdlock->pxd);
2555                         xlen = lengthPXD(&pxdlock->pxd);
2556                         dbFree(ip, xaddr, (s64) xlen);
2557                         jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2558                                  (ulong) xaddr, xlen);
2559                 } else {        /* (maplock->flag & mlckFREEPXDLIST) */
2560 
2561                         pxdlistlock = (struct xdlistlock *) maplock;
2562                         pxd = pxdlistlock->xdlist;
2563                         for (n = 0; n < pxdlistlock->count; n++, pxd++) {
2564                                 xaddr = addressPXD(pxd);
2565                                 xlen = lengthPXD(pxd);
2566                                 dbFree(ip, xaddr, (s64) xlen);
2567                                 jfs_info("freeWMap: xaddr:0x%lx xlen:%d",
2568                                          (ulong) xaddr, xlen);
2569                         }
2570                 }
2571         }
2572 }
2573 
2574 /*
2575  *      txFreelock()
2576  *
2577  * function:    remove tlock from inode anonymous locklist
2578  */
2579 void txFreelock(struct inode *ip)
2580 {
2581         struct jfs_inode_info *jfs_ip = JFS_IP(ip);
2582         struct tlock *xtlck, *tlck;
2583         lid_t xlid = 0, lid;
2584 
2585         if (!jfs_ip->atlhead)
2586                 return;
2587 
2588         TXN_LOCK();
2589         xtlck = (struct tlock *) &jfs_ip->atlhead;
2590 
2591         while ((lid = xtlck->next) != 0) {
2592                 tlck = lid_to_tlock(lid);
2593                 if (tlck->flag & tlckFREELOCK) {
2594                         xtlck->next = tlck->next;
2595                         txLockFree(lid);
2596                 } else {
2597                         xtlck = tlck;
2598                         xlid = lid;
2599                 }
2600         }
2601 
2602         if (jfs_ip->atlhead)
2603                 jfs_ip->atltail = xlid;
2604         else {
2605                 jfs_ip->atltail = 0;
2606                 /*
2607                  * If inode was on anon_list, remove it
2608                  */
2609                 list_del_init(&jfs_ip->anon_inode_list);
2610         }
2611         TXN_UNLOCK();
2612 }
2613 
2614 /*
2615  *      txAbort()
2616  *
2617  * function: abort tx before commit;
2618  *
2619  * frees line-locks and segment locks for all
2620  * segments in comdata structure.
2621  * Optionally sets state of file-system to FM_DIRTY in super-block.
2622  * log age of page-frames in memory for which caller has
2623  * are reset to 0 (to avoid logwarap).
2624  */
2625 void txAbort(tid_t tid, int dirty)
2626 {
2627         lid_t lid, next;
2628         struct metapage *mp;
2629         struct tblock *tblk = tid_to_tblock(tid);
2630         struct tlock *tlck;
2631 
2632         /*
2633          * free tlocks of the transaction
2634          */
2635         for (lid = tblk->next; lid; lid = next) {
2636                 tlck = lid_to_tlock(lid);
2637                 next = tlck->next;
2638                 mp = tlck->mp;
2639                 JFS_IP(tlck->ip)->xtlid = 0;
2640 
2641                 if (mp) {
2642                         mp->lid = 0;
2643 
2644                         /*
2645                          * reset lsn of page to avoid logwarap:
2646                          *
2647                          * (page may have been previously committed by another
2648                          * transaction(s) but has not been paged, i.e.,
2649                          * it may be on logsync list even though it has not
2650                          * been logged for the current tx.)
2651                          */
2652                         if (mp->xflag & COMMIT_PAGE && mp->lsn)
2653                                 LogSyncRelease(mp);
2654                 }
2655                 /* insert tlock at head of freelist */
2656                 TXN_LOCK();
2657                 txLockFree(lid);
2658                 TXN_UNLOCK();
2659         }
2660 
2661         /* caller will free the transaction block */
2662 
2663         tblk->next = tblk->last = 0;
2664 
2665         /*
2666          * mark filesystem dirty
2667          */
2668         if (dirty)
2669                 jfs_error(tblk->sb, "\n");
2670 
2671         return;
2672 }
2673 
2674 /*
2675  *      txLazyCommit(void)
2676  *
2677  *      All transactions except those changing ipimap (COMMIT_FORCE) are
2678  *      processed by this routine.  This insures that the inode and block
2679  *      allocation maps are updated in order.  For synchronous transactions,
2680  *      let the user thread finish processing after txUpdateMap() is called.
2681  */
2682 static void txLazyCommit(struct tblock * tblk)
2683 {
2684         struct jfs_log *log;
2685 
2686         while (((tblk->flag & tblkGC_READY) == 0) &&
2687                ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
2688                 /* We must have gotten ahead of the user thread
2689                  */
2690                 jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk);
2691                 yield();
2692         }
2693 
2694         jfs_info("txLazyCommit: processing tblk 0x%p", tblk);
2695 
2696         txUpdateMap(tblk);
2697 
2698         log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
2699 
2700         spin_lock_irq(&log->gclock);    // LOGGC_LOCK
2701 
2702         tblk->flag |= tblkGC_COMMITTED;
2703 
2704         if (tblk->flag & tblkGC_READY)
2705                 log->gcrtc--;
2706 
2707         wake_up_all(&tblk->gcwait);     // LOGGC_WAKEUP
2708 
2709         /*
2710          * Can't release log->gclock until we've tested tblk->flag
2711          */
2712         if (tblk->flag & tblkGC_LAZY) {
2713                 spin_unlock_irq(&log->gclock);  // LOGGC_UNLOCK
2714                 txUnlock(tblk);
2715                 tblk->flag &= ~tblkGC_LAZY;
2716                 txEnd(tblk - TxBlock);  /* Convert back to tid */
2717         } else
2718                 spin_unlock_irq(&log->gclock);  // LOGGC_UNLOCK
2719 
2720         jfs_info("txLazyCommit: done: tblk = 0x%p", tblk);
2721 }
2722 
2723 /*
2724  *      jfs_lazycommit(void)
2725  *
2726  *      To be run as a kernel daemon.  If lbmIODone is called in an interrupt
2727  *      context, or where blocking is not wanted, this routine will process
2728  *      committed transactions from the unlock queue.
2729  */
2730 int jfs_lazycommit(void *arg)
2731 {
2732         int WorkDone;
2733         struct tblock *tblk;
2734         unsigned long flags;
2735         struct jfs_sb_info *sbi;
2736 
2737         do {
2738                 LAZY_LOCK(flags);
2739                 jfs_commit_thread_waking = 0;   /* OK to wake another thread */
2740                 while (!list_empty(&TxAnchor.unlock_queue)) {
2741                         WorkDone = 0;
2742                         list_for_each_entry(tblk, &TxAnchor.unlock_queue,
2743                                             cqueue) {
2744 
2745                                 sbi = JFS_SBI(tblk->sb);
2746                                 /*
2747                                  * For each volume, the transactions must be
2748                                  * handled in order.  If another commit thread
2749                                  * is handling a tblk for this superblock,
2750                                  * skip it
2751                                  */
2752                                 if (sbi->commit_state & IN_LAZYCOMMIT)
2753                                         continue;
2754 
2755                                 sbi->commit_state |= IN_LAZYCOMMIT;
2756                                 WorkDone = 1;
2757 
2758                                 /*
2759                                  * Remove transaction from queue
2760                                  */
2761                                 list_del(&tblk->cqueue);
2762 
2763                                 LAZY_UNLOCK(flags);
2764                                 txLazyCommit(tblk);
2765                                 LAZY_LOCK(flags);
2766 
2767                                 sbi->commit_state &= ~IN_LAZYCOMMIT;
2768                                 /*
2769                                  * Don't continue in the for loop.  (We can't
2770                                  * anyway, it's unsafe!)  We want to go back to
2771                                  * the beginning of the list.
2772                                  */
2773                                 break;
2774                         }
2775 
2776                         /* If there was nothing to do, don't continue */
2777                         if (!WorkDone)
2778                                 break;
2779                 }
2780                 /* In case a wakeup came while all threads were active */
2781                 jfs_commit_thread_waking = 0;
2782 
2783                 if (freezing(current)) {
2784                         LAZY_UNLOCK(flags);
2785                         try_to_freeze();
2786                 } else {
2787                         DECLARE_WAITQUEUE(wq, current);
2788 
2789                         add_wait_queue(&jfs_commit_thread_wait, &wq);
2790                         set_current_state(TASK_INTERRUPTIBLE);
2791                         LAZY_UNLOCK(flags);
2792                         schedule();
2793                         remove_wait_queue(&jfs_commit_thread_wait, &wq);
2794                 }
2795         } while (!kthread_should_stop());
2796 
2797         if (!list_empty(&TxAnchor.unlock_queue))
2798                 jfs_err("jfs_lazycommit being killed w/pending transactions!");
2799         else
2800                 jfs_info("jfs_lazycommit being killed");
2801         return 0;
2802 }
2803 
2804 void txLazyUnlock(struct tblock * tblk)
2805 {
2806         unsigned long flags;
2807 
2808         LAZY_LOCK(flags);
2809 
2810         list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue);
2811         /*
2812          * Don't wake up a commit thread if there is already one servicing
2813          * this superblock, or if the last one we woke up hasn't started yet.
2814          */
2815         if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) &&
2816             !jfs_commit_thread_waking) {
2817                 jfs_commit_thread_waking = 1;
2818                 wake_up(&jfs_commit_thread_wait);
2819         }
2820         LAZY_UNLOCK(flags);
2821 }
2822 
2823 static void LogSyncRelease(struct metapage * mp)
2824 {
2825         struct jfs_log *log = mp->log;
2826 
2827         assert(mp->nohomeok);
2828         assert(log);
2829         metapage_homeok(mp);
2830 }
2831 
2832 /*
2833  *      txQuiesce
2834  *
2835  *      Block all new transactions and push anonymous transactions to
2836  *      completion
2837  *
2838  *      This does almost the same thing as jfs_sync below.  We don't
2839  *      worry about deadlocking when jfs_tlocks_low is set, since we would
2840  *      expect jfs_sync to get us out of that jam.
2841  */
2842 void txQuiesce(struct super_block *sb)
2843 {
2844         struct inode *ip;
2845         struct jfs_inode_info *jfs_ip;
2846         struct jfs_log *log = JFS_SBI(sb)->log;
2847         tid_t tid;
2848 
2849         set_bit(log_QUIESCE, &log->flag);
2850 
2851         TXN_LOCK();
2852 restart:
2853         while (!list_empty(&TxAnchor.anon_list)) {
2854                 jfs_ip = list_entry(TxAnchor.anon_list.next,
2855                                     struct jfs_inode_info,
2856                                     anon_inode_list);
2857                 ip = &jfs_ip->vfs_inode;
2858 
2859                 /*
2860                  * inode will be removed from anonymous list
2861                  * when it is committed
2862                  */
2863                 TXN_UNLOCK();
2864                 tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
2865                 mutex_lock(&jfs_ip->commit_mutex);
2866                 txCommit(tid, 1, &ip, 0);
2867                 txEnd(tid);
2868                 mutex_unlock(&jfs_ip->commit_mutex);
2869                 /*
2870                  * Just to be safe.  I don't know how
2871                  * long we can run without blocking
2872                  */
2873                 cond_resched();
2874                 TXN_LOCK();
2875         }
2876 
2877         /*
2878          * If jfs_sync is running in parallel, there could be some inodes
2879          * on anon_list2.  Let's check.
2880          */
2881         if (!list_empty(&TxAnchor.anon_list2)) {
2882                 list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2883                 goto restart;
2884         }
2885         TXN_UNLOCK();
2886 
2887         /*
2888          * We may need to kick off the group commit
2889          */
2890         jfs_flush_journal(log, 0);
2891 }
2892 
2893 /*
2894  * txResume()
2895  *
2896  * Allows transactions to start again following txQuiesce
2897  */
2898 void txResume(struct super_block *sb)
2899 {
2900         struct jfs_log *log = JFS_SBI(sb)->log;
2901 
2902         clear_bit(log_QUIESCE, &log->flag);
2903         TXN_WAKEUP(&log->syncwait);
2904 }
2905 
2906 /*
2907  *      jfs_sync(void)
2908  *
2909  *      To be run as a kernel daemon.  This is awakened when tlocks run low.
2910  *      We write any inodes that have anonymous tlocks so they will become
2911  *      available.
2912  */
2913 int jfs_sync(void *arg)
2914 {
2915         struct inode *ip;
2916         struct jfs_inode_info *jfs_ip;
2917         tid_t tid;
2918 
2919         do {
2920                 /*
2921                  * write each inode on the anonymous inode list
2922                  */
2923                 TXN_LOCK();
2924                 while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) {
2925                         jfs_ip = list_entry(TxAnchor.anon_list.next,
2926                                             struct jfs_inode_info,
2927                                             anon_inode_list);
2928                         ip = &jfs_ip->vfs_inode;
2929 
2930                         if (! igrab(ip)) {
2931                                 /*
2932                                  * Inode is being freed
2933                                  */
2934                                 list_del_init(&jfs_ip->anon_inode_list);
2935                         } else if (mutex_trylock(&jfs_ip->commit_mutex)) {
2936                                 /*
2937                                  * inode will be removed from anonymous list
2938                                  * when it is committed
2939                                  */
2940                                 TXN_UNLOCK();
2941                                 tid = txBegin(ip->i_sb, COMMIT_INODE);
2942                                 txCommit(tid, 1, &ip, 0);
2943                                 txEnd(tid);
2944                                 mutex_unlock(&jfs_ip->commit_mutex);
2945 
2946                                 iput(ip);
2947                                 /*
2948                                  * Just to be safe.  I don't know how
2949                                  * long we can run without blocking
2950                                  */
2951                                 cond_resched();
2952                                 TXN_LOCK();
2953                         } else {
2954                                 /* We can't get the commit mutex.  It may
2955                                  * be held by a thread waiting for tlock's
2956                                  * so let's not block here.  Save it to
2957                                  * put back on the anon_list.
2958                                  */
2959 
2960                                 /* Move from anon_list to anon_list2 */
2961                                 list_move(&jfs_ip->anon_inode_list,
2962                                           &TxAnchor.anon_list2);
2963 
2964                                 TXN_UNLOCK();
2965                                 iput(ip);
2966                                 TXN_LOCK();
2967                         }
2968                 }
2969                 /* Add anon_list2 back to anon_list */
2970                 list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2971 
2972                 if (freezing(current)) {
2973                         TXN_UNLOCK();
2974                         try_to_freeze();
2975                 } else {
2976                         set_current_state(TASK_INTERRUPTIBLE);
2977                         TXN_UNLOCK();
2978                         schedule();
2979                 }
2980         } while (!kthread_should_stop());
2981 
2982         jfs_info("jfs_sync being killed");
2983         return 0;
2984 }
2985 
2986 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
2987 int jfs_txanchor_proc_show(struct seq_file *m, void *v)
2988 {
2989         char *freewait;
2990         char *freelockwait;
2991         char *lowlockwait;
2992 
2993         freewait =
2994             waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
2995         freelockwait =
2996             waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
2997         lowlockwait =
2998             waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
2999 
3000         seq_printf(m,
3001                        "JFS TxAnchor\n"
3002                        "============\n"
3003                        "freetid = %d\n"
3004                        "freewait = %s\n"
3005                        "freelock = %d\n"
3006                        "freelockwait = %s\n"
3007                        "lowlockwait = %s\n"
3008                        "tlocksInUse = %d\n"
3009                        "jfs_tlocks_low = %d\n"
3010                        "unlock_queue is %sempty\n",
3011                        TxAnchor.freetid,
3012                        freewait,
3013                        TxAnchor.freelock,
3014                        freelockwait,
3015                        lowlockwait,
3016                        TxAnchor.tlocksInUse,
3017                        jfs_tlocks_low,
3018                        list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
3019         return 0;
3020 }
3021 #endif
3022 
3023 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
3024 int jfs_txstats_proc_show(struct seq_file *m, void *v)
3025 {
3026         seq_printf(m,
3027                        "JFS TxStats\n"
3028                        "===========\n"
3029                        "calls to txBegin = %d\n"
3030                        "txBegin blocked by sync barrier = %d\n"
3031                        "txBegin blocked by tlocks low = %d\n"
3032                        "txBegin blocked by no free tid = %d\n"
3033                        "calls to txBeginAnon = %d\n"
3034                        "txBeginAnon blocked by sync barrier = %d\n"
3035                        "txBeginAnon blocked by tlocks low = %d\n"
3036                        "calls to txLockAlloc = %d\n"
3037                        "tLockAlloc blocked by no free lock = %d\n",
3038                        TxStat.txBegin,
3039                        TxStat.txBegin_barrier,
3040                        TxStat.txBegin_lockslow,
3041                        TxStat.txBegin_freetid,
3042                        TxStat.txBeginAnon,
3043                        TxStat.txBeginAnon_barrier,
3044                        TxStat.txBeginAnon_lockslow,
3045                        TxStat.txLockAlloc,
3046                        TxStat.txLockAlloc_freelock);
3047         return 0;
3048 }
3049 #endif

/* [<][>][^][v][top][bottom][index][help] */