1/* 2 * linux/fs/ext3/dir.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/dir.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * ext3 directory handling functions 16 * 17 * Big-endian to little-endian byte-swapping/bitmaps by 18 * David S. Miller (davem@caip.rutgers.edu), 1995 19 * 20 * Hash Tree Directory indexing (c) 2001 Daniel Phillips 21 * 22 */ 23 24#include <linux/compat.h> 25#include "ext3.h" 26 27static unsigned char ext3_filetype_table[] = { 28 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 29}; 30 31static int ext3_dx_readdir(struct file *, struct dir_context *); 32 33static unsigned char get_dtype(struct super_block *sb, int filetype) 34{ 35 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || 36 (filetype >= EXT3_FT_MAX)) 37 return DT_UNKNOWN; 38 39 return (ext3_filetype_table[filetype]); 40} 41 42/** 43 * Check if the given dir-inode refers to an htree-indexed directory 44 * (or a directory which could potentially get converted to use htree 45 * indexing). 46 * 47 * Return 1 if it is a dx dir, 0 if not 48 */ 49static int is_dx_dir(struct inode *inode) 50{ 51 struct super_block *sb = inode->i_sb; 52 53 if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, 54 EXT3_FEATURE_COMPAT_DIR_INDEX) && 55 ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || 56 ((inode->i_size >> sb->s_blocksize_bits) == 1))) 57 return 1; 58 59 return 0; 60} 61 62int ext3_check_dir_entry (const char * function, struct inode * dir, 63 struct ext3_dir_entry_2 * de, 64 struct buffer_head * bh, 65 unsigned long offset) 66{ 67 const char * error_msg = NULL; 68 const int rlen = ext3_rec_len_from_disk(de->rec_len); 69 70 if (unlikely(rlen < EXT3_DIR_REC_LEN(1))) 71 error_msg = "rec_len is smaller than minimal"; 72 else if (unlikely(rlen % 4 != 0)) 73 error_msg = "rec_len % 4 != 0"; 74 else if (unlikely(rlen < EXT3_DIR_REC_LEN(de->name_len))) 75 error_msg = "rec_len is too small for name_len"; 76 else if (unlikely((((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))) 77 error_msg = "directory entry across blocks"; 78 else if (unlikely(le32_to_cpu(de->inode) > 79 le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))) 80 error_msg = "inode out of bounds"; 81 82 if (unlikely(error_msg != NULL)) 83 ext3_error (dir->i_sb, function, 84 "bad entry in directory #%lu: %s - " 85 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", 86 dir->i_ino, error_msg, offset, 87 (unsigned long) le32_to_cpu(de->inode), 88 rlen, de->name_len); 89 90 return error_msg == NULL ? 1 : 0; 91} 92 93static int ext3_readdir(struct file *file, struct dir_context *ctx) 94{ 95 unsigned long offset; 96 int i; 97 struct ext3_dir_entry_2 *de; 98 int err; 99 struct inode *inode = file_inode(file); 100 struct super_block *sb = inode->i_sb; 101 int dir_has_error = 0; 102 103 if (is_dx_dir(inode)) { 104 err = ext3_dx_readdir(file, ctx); 105 if (err != ERR_BAD_DX_DIR) 106 return err; 107 /* 108 * We don't set the inode dirty flag since it's not 109 * critical that it get flushed back to the disk. 110 */ 111 EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; 112 } 113 offset = ctx->pos & (sb->s_blocksize - 1); 114 115 while (ctx->pos < inode->i_size) { 116 unsigned long blk = ctx->pos >> EXT3_BLOCK_SIZE_BITS(sb); 117 struct buffer_head map_bh; 118 struct buffer_head *bh = NULL; 119 120 map_bh.b_state = 0; 121 err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0); 122 if (err > 0) { 123 pgoff_t index = map_bh.b_blocknr >> 124 (PAGE_CACHE_SHIFT - inode->i_blkbits); 125 if (!ra_has_index(&file->f_ra, index)) 126 page_cache_sync_readahead( 127 sb->s_bdev->bd_inode->i_mapping, 128 &file->f_ra, file, 129 index, 1); 130 file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; 131 bh = ext3_bread(NULL, inode, blk, 0, &err); 132 } 133 134 /* 135 * We ignore I/O errors on directories so users have a chance 136 * of recovering data when there's a bad sector 137 */ 138 if (!bh) { 139 if (!dir_has_error) { 140 ext3_error(sb, __func__, "directory #%lu " 141 "contains a hole at offset %lld", 142 inode->i_ino, ctx->pos); 143 dir_has_error = 1; 144 } 145 /* corrupt size? Maybe no more blocks to read */ 146 if (ctx->pos > inode->i_blocks << 9) 147 break; 148 ctx->pos += sb->s_blocksize - offset; 149 continue; 150 } 151 152 /* If the dir block has changed since the last call to 153 * readdir(2), then we might be pointing to an invalid 154 * dirent right now. Scan from the start of the block 155 * to make sure. */ 156 if (offset && file->f_version != inode->i_version) { 157 for (i = 0; i < sb->s_blocksize && i < offset; ) { 158 de = (struct ext3_dir_entry_2 *) 159 (bh->b_data + i); 160 /* It's too expensive to do a full 161 * dirent test each time round this 162 * loop, but we do have to test at 163 * least that it is non-zero. A 164 * failure will be detected in the 165 * dirent test below. */ 166 if (ext3_rec_len_from_disk(de->rec_len) < 167 EXT3_DIR_REC_LEN(1)) 168 break; 169 i += ext3_rec_len_from_disk(de->rec_len); 170 } 171 offset = i; 172 ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) 173 | offset; 174 file->f_version = inode->i_version; 175 } 176 177 while (ctx->pos < inode->i_size 178 && offset < sb->s_blocksize) { 179 de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); 180 if (!ext3_check_dir_entry ("ext3_readdir", inode, de, 181 bh, offset)) { 182 /* On error, skip the to the 183 next block. */ 184 ctx->pos = (ctx->pos | 185 (sb->s_blocksize - 1)) + 1; 186 break; 187 } 188 offset += ext3_rec_len_from_disk(de->rec_len); 189 if (le32_to_cpu(de->inode)) { 190 if (!dir_emit(ctx, de->name, de->name_len, 191 le32_to_cpu(de->inode), 192 get_dtype(sb, de->file_type))) { 193 brelse(bh); 194 return 0; 195 } 196 } 197 ctx->pos += ext3_rec_len_from_disk(de->rec_len); 198 } 199 offset = 0; 200 brelse (bh); 201 if (ctx->pos < inode->i_size) 202 if (!dir_relax(inode)) 203 return 0; 204 } 205 return 0; 206} 207 208static inline int is_32bit_api(void) 209{ 210#ifdef CONFIG_COMPAT 211 return is_compat_task(); 212#else 213 return (BITS_PER_LONG == 32); 214#endif 215} 216 217/* 218 * These functions convert from the major/minor hash to an f_pos 219 * value for dx directories 220 * 221 * Upper layer (for example NFS) should specify FMODE_32BITHASH or 222 * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted 223 * directly on both 32-bit and 64-bit nodes, under such case, neither 224 * FMODE_32BITHASH nor FMODE_64BITHASH is specified. 225 */ 226static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) 227{ 228 if ((filp->f_mode & FMODE_32BITHASH) || 229 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) 230 return major >> 1; 231 else 232 return ((__u64)(major >> 1) << 32) | (__u64)minor; 233} 234 235static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) 236{ 237 if ((filp->f_mode & FMODE_32BITHASH) || 238 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) 239 return (pos << 1) & 0xffffffff; 240 else 241 return ((pos >> 32) << 1) & 0xffffffff; 242} 243 244static inline __u32 pos2min_hash(struct file *filp, loff_t pos) 245{ 246 if ((filp->f_mode & FMODE_32BITHASH) || 247 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) 248 return 0; 249 else 250 return pos & 0xffffffff; 251} 252 253/* 254 * Return 32- or 64-bit end-of-file for dx directories 255 */ 256static inline loff_t ext3_get_htree_eof(struct file *filp) 257{ 258 if ((filp->f_mode & FMODE_32BITHASH) || 259 (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) 260 return EXT3_HTREE_EOF_32BIT; 261 else 262 return EXT3_HTREE_EOF_64BIT; 263} 264 265 266/* 267 * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both 268 * non-htree and htree directories, where the "offset" is in terms 269 * of the filename hash value instead of the byte offset. 270 * 271 * Because we may return a 64-bit hash that is well beyond s_maxbytes, 272 * we need to pass the max hash as the maximum allowable offset in 273 * the htree directory case. 274 * 275 * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) 276 * will be invalid once the directory was converted into a dx directory 277 */ 278static loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence) 279{ 280 struct inode *inode = file->f_mapping->host; 281 int dx_dir = is_dx_dir(inode); 282 loff_t htree_max = ext3_get_htree_eof(file); 283 284 if (likely(dx_dir)) 285 return generic_file_llseek_size(file, offset, whence, 286 htree_max, htree_max); 287 else 288 return generic_file_llseek(file, offset, whence); 289} 290 291/* 292 * This structure holds the nodes of the red-black tree used to store 293 * the directory entry in hash order. 294 */ 295struct fname { 296 __u32 hash; 297 __u32 minor_hash; 298 struct rb_node rb_hash; 299 struct fname *next; 300 __u32 inode; 301 __u8 name_len; 302 __u8 file_type; 303 char name[0]; 304}; 305 306/* 307 * This functoin implements a non-recursive way of freeing all of the 308 * nodes in the red-black tree. 309 */ 310static void free_rb_tree_fname(struct rb_root *root) 311{ 312 struct fname *fname, *next; 313 314 rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash) 315 do { 316 struct fname *old = fname; 317 fname = fname->next; 318 kfree(old); 319 } while (fname); 320 321 *root = RB_ROOT; 322} 323 324static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp, 325 loff_t pos) 326{ 327 struct dir_private_info *p; 328 329 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); 330 if (!p) 331 return NULL; 332 p->curr_hash = pos2maj_hash(filp, pos); 333 p->curr_minor_hash = pos2min_hash(filp, pos); 334 return p; 335} 336 337void ext3_htree_free_dir_info(struct dir_private_info *p) 338{ 339 free_rb_tree_fname(&p->root); 340 kfree(p); 341} 342 343/* 344 * Given a directory entry, enter it into the fname rb tree. 345 */ 346int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, 347 __u32 minor_hash, 348 struct ext3_dir_entry_2 *dirent) 349{ 350 struct rb_node **p, *parent = NULL; 351 struct fname * fname, *new_fn; 352 struct dir_private_info *info; 353 int len; 354 355 info = (struct dir_private_info *) dir_file->private_data; 356 p = &info->root.rb_node; 357 358 /* Create and allocate the fname structure */ 359 len = sizeof(struct fname) + dirent->name_len + 1; 360 new_fn = kzalloc(len, GFP_KERNEL); 361 if (!new_fn) 362 return -ENOMEM; 363 new_fn->hash = hash; 364 new_fn->minor_hash = minor_hash; 365 new_fn->inode = le32_to_cpu(dirent->inode); 366 new_fn->name_len = dirent->name_len; 367 new_fn->file_type = dirent->file_type; 368 memcpy(new_fn->name, dirent->name, dirent->name_len); 369 new_fn->name[dirent->name_len] = 0; 370 371 while (*p) { 372 parent = *p; 373 fname = rb_entry(parent, struct fname, rb_hash); 374 375 /* 376 * If the hash and minor hash match up, then we put 377 * them on a linked list. This rarely happens... 378 */ 379 if ((new_fn->hash == fname->hash) && 380 (new_fn->minor_hash == fname->minor_hash)) { 381 new_fn->next = fname->next; 382 fname->next = new_fn; 383 return 0; 384 } 385 386 if (new_fn->hash < fname->hash) 387 p = &(*p)->rb_left; 388 else if (new_fn->hash > fname->hash) 389 p = &(*p)->rb_right; 390 else if (new_fn->minor_hash < fname->minor_hash) 391 p = &(*p)->rb_left; 392 else /* if (new_fn->minor_hash > fname->minor_hash) */ 393 p = &(*p)->rb_right; 394 } 395 396 rb_link_node(&new_fn->rb_hash, parent, p); 397 rb_insert_color(&new_fn->rb_hash, &info->root); 398 return 0; 399} 400 401 402 403/* 404 * This is a helper function for ext3_dx_readdir. It calls filldir 405 * for all entres on the fname linked list. (Normally there is only 406 * one entry on the linked list, unless there are 62 bit hash collisions.) 407 */ 408static bool call_filldir(struct file *file, struct dir_context *ctx, 409 struct fname *fname) 410{ 411 struct dir_private_info *info = file->private_data; 412 struct inode *inode = file_inode(file); 413 struct super_block *sb = inode->i_sb; 414 415 if (!fname) { 416 printk("call_filldir: called with null fname?!?\n"); 417 return true; 418 } 419 ctx->pos = hash2pos(file, fname->hash, fname->minor_hash); 420 while (fname) { 421 if (!dir_emit(ctx, fname->name, fname->name_len, 422 fname->inode, 423 get_dtype(sb, fname->file_type))) { 424 info->extra_fname = fname; 425 return false; 426 } 427 fname = fname->next; 428 } 429 return true; 430} 431 432static int ext3_dx_readdir(struct file *file, struct dir_context *ctx) 433{ 434 struct dir_private_info *info = file->private_data; 435 struct inode *inode = file_inode(file); 436 struct fname *fname; 437 int ret; 438 439 if (!info) { 440 info = ext3_htree_create_dir_info(file, ctx->pos); 441 if (!info) 442 return -ENOMEM; 443 file->private_data = info; 444 } 445 446 if (ctx->pos == ext3_get_htree_eof(file)) 447 return 0; /* EOF */ 448 449 /* Some one has messed with f_pos; reset the world */ 450 if (info->last_pos != ctx->pos) { 451 free_rb_tree_fname(&info->root); 452 info->curr_node = NULL; 453 info->extra_fname = NULL; 454 info->curr_hash = pos2maj_hash(file, ctx->pos); 455 info->curr_minor_hash = pos2min_hash(file, ctx->pos); 456 } 457 458 /* 459 * If there are any leftover names on the hash collision 460 * chain, return them first. 461 */ 462 if (info->extra_fname) { 463 if (!call_filldir(file, ctx, info->extra_fname)) 464 goto finished; 465 info->extra_fname = NULL; 466 goto next_node; 467 } else if (!info->curr_node) 468 info->curr_node = rb_first(&info->root); 469 470 while (1) { 471 /* 472 * Fill the rbtree if we have no more entries, 473 * or the inode has changed since we last read in the 474 * cached entries. 475 */ 476 if ((!info->curr_node) || 477 (file->f_version != inode->i_version)) { 478 info->curr_node = NULL; 479 free_rb_tree_fname(&info->root); 480 file->f_version = inode->i_version; 481 ret = ext3_htree_fill_tree(file, info->curr_hash, 482 info->curr_minor_hash, 483 &info->next_hash); 484 if (ret < 0) 485 return ret; 486 if (ret == 0) { 487 ctx->pos = ext3_get_htree_eof(file); 488 break; 489 } 490 info->curr_node = rb_first(&info->root); 491 } 492 493 fname = rb_entry(info->curr_node, struct fname, rb_hash); 494 info->curr_hash = fname->hash; 495 info->curr_minor_hash = fname->minor_hash; 496 if (!call_filldir(file, ctx, fname)) 497 break; 498 next_node: 499 info->curr_node = rb_next(info->curr_node); 500 if (info->curr_node) { 501 fname = rb_entry(info->curr_node, struct fname, 502 rb_hash); 503 info->curr_hash = fname->hash; 504 info->curr_minor_hash = fname->minor_hash; 505 } else { 506 if (info->next_hash == ~0) { 507 ctx->pos = ext3_get_htree_eof(file); 508 break; 509 } 510 info->curr_hash = info->next_hash; 511 info->curr_minor_hash = 0; 512 } 513 } 514finished: 515 info->last_pos = ctx->pos; 516 return 0; 517} 518 519static int ext3_release_dir (struct inode * inode, struct file * filp) 520{ 521 if (filp->private_data) 522 ext3_htree_free_dir_info(filp->private_data); 523 524 return 0; 525} 526 527const struct file_operations ext3_dir_operations = { 528 .llseek = ext3_dir_llseek, 529 .read = generic_read_dir, 530 .iterate = ext3_readdir, 531 .unlocked_ioctl = ext3_ioctl, 532#ifdef CONFIG_COMPAT 533 .compat_ioctl = ext3_compat_ioctl, 534#endif 535 .fsync = ext3_sync_file, 536 .release = ext3_release_dir, 537}; 538