2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
11 * Implements Extendible Hashing as described in:
12 * "Extendible Hashing" by Fagin, et al in
13 * __ACM Trans. on Database Systems__, Sept 1979.
16 * Here's the layout of dirents which is essentially the same as that of ext2
17 * within a single block. The field de_name_len is the number of bytes
18 * actually required for the name (no null terminator). The field de_rec_len
19 * is the number of bytes allocated to the dirent. The offset of the next
20 * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
21 * deleted, the preceding dirent inherits its allocated space, ie
22 * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
23 * by adding de_rec_len to the current dirent, this essentially causes the
24 * deleted dirent to get jumped over when iterating through all the dirents.
26 * When deleting the first dirent in a block, there is no previous dirent so
27 * the field de_ino is set to zero to designate it as deleted. When allocating
28 * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
29 * first dirent has (de_ino == 0) and de_rec_len is large enough, this first
30 * dirent is allocated. Otherwise it must go through all the 'used' dirents
31 * searching for one in which the amount of total space minus the amount of
32 * used space will provide enough space for the new dirent.
34 * There are two types of blocks in which dirents reside. In a stuffed dinode,
35 * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
36 * the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
37 * beginning of the leaf block. The dirents reside in leaves when
39 * dip->i_di.di_flags & GFS2_DIF_EXHASH is true
41 * Otherwise, the dirents are "linear", within a single stuffed dinode block.
43 * When the dirents are in leaves, the actual contents of the directory file are
44 * used as an array of 64-bit block pointers pointing to the leaf blocks. The
45 * dirents are NOT in the directory file itself. There can be more than one block
46 * pointer in the array that points to the same leaf. In fact, when a directory
47 * is first converted from linear to exhash, all of the pointers point to the
50 * When a leaf is completely full, the size of the hash table can be
51 * doubled unless it is already at the maximum size which is hard coded into
52 * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
53 * but never before the maximum hash table size has been reached.
56 #include <linux/sched.h>
57 #include <linux/slab.h>
58 #include <linux/spinlock.h>
59 #include <linux/completion.h>
60 #include <linux/buffer_head.h>
61 #include <linux/sort.h>
62 #include <asm/semaphore.h>
74 #define IS_LEAF 1 /* Hashed (leaf) directory */
75 #define IS_DINODE 2 /* Linear (stuffed dinode block) directory */
78 #define gfs2_disk_hash2offset(h) (((uint64_t)(h)) >> 1)
79 #define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)) << 1))
81 #define gfs2_disk_hash2offset(h) (((uint64_t)(h)))
82 #define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p))))
85 typedef int (*leaf_call_t) (struct gfs2_inode *dip,
86 uint32_t index, uint32_t len, uint64_t leaf_no,
89 int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
90 struct buffer_head **bhp)
92 struct buffer_head *bh;
96 bh = gfs2_meta_new(ip->i_gl, block);
97 gfs2_trans_add_bh(ip->i_gl, bh, 1);
98 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
99 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
101 error = gfs2_meta_read(ip->i_gl, block, DIO_START | DIO_WAIT, &bh);
104 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) {
116 static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
117 unsigned int offset, unsigned int size)
120 struct buffer_head *dibh;
123 error = gfs2_meta_inode_buffer(ip, &dibh);
127 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
128 memcpy(dibh->b_data + offset + sizeof(struct gfs2_inode), buf, size);
129 if (ip->i_di.di_size < offset + size)
130 ip->i_di.di_size = offset + size;
131 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
132 gfs2_dinode_out(&ip->i_di, dibh->b_data);
142 * gfs2_dir_write_data - Write directory information to the inode
143 * @ip: The GFS2 inode
144 * @buf: The buffer containing information to be written
145 * @offset: The file offset to start writing at
146 * @size: The amount of data to write
148 * Returns: The number of bytes correctly written or error code
150 static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
151 uint64_t offset, unsigned int size)
153 struct gfs2_sbd *sdp = ip->i_sbd;
154 struct buffer_head *dibh;
155 uint64_t lblock, dblock;
164 if (gfs2_is_stuffed(ip) &&
165 offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
166 return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset, size);
168 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
171 if (gfs2_is_stuffed(ip)) {
172 error = gfs2_unstuff_dinode(ip, NULL, NULL);
178 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
180 while (copied < size) {
182 struct buffer_head *bh;
185 amount = size - copied;
186 if (amount > sdp->sd_sb.sb_bsize - o)
187 amount = sdp->sd_sb.sb_bsize - o;
191 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
195 if (gfs2_assert_withdraw(sdp, dblock))
199 error = gfs2_dir_get_buffer(ip, dblock, (amount == sdp->sd_jbsize) ? 1 : new, &bh);
203 gfs2_trans_add_bh(ip->i_gl, bh, 1);
204 memcpy(bh->b_data + o, buf, amount);
214 o = sizeof(struct gfs2_meta_header);
218 error = gfs2_meta_inode_buffer(ip, &dibh);
222 if (ip->i_di.di_size < offset + copied)
223 ip->i_di.di_size = offset + copied;
224 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
226 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
227 gfs2_dinode_out(&ip->i_di, dibh->b_data);
237 static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
238 unsigned int offset, unsigned int size)
240 struct buffer_head *dibh;
243 error = gfs2_meta_inode_buffer(ip, &dibh);
245 offset += sizeof(struct gfs2_dinode);
246 memcpy(buf, dibh->b_data + offset, size);
250 return (error) ? error : size;
255 * gfs2_dir_read_data - Read a data from a directory inode
256 * @ip: The GFS2 Inode
257 * @buf: The buffer to place result into
258 * @offset: File offset to begin jdata_readng from
259 * @size: Amount of data to transfer
261 * Returns: The amount of data actually copied or the error
263 static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf,
264 uint64_t offset, unsigned int size)
266 struct gfs2_sbd *sdp = ip->i_sbd;
267 uint64_t lblock, dblock;
273 if (offset >= ip->i_di.di_size)
276 if ((offset + size) > ip->i_di.di_size)
277 size = ip->i_di.di_size - offset;
282 if (gfs2_is_stuffed(ip))
283 return gfs2_dir_read_stuffed(ip, buf, (unsigned int)offset, size);
285 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
289 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
291 while (copied < size) {
293 struct buffer_head *bh;
296 amount = size - copied;
297 if (amount > sdp->sd_sb.sb_bsize - o)
298 amount = sdp->sd_sb.sb_bsize - o;
302 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
308 gfs2_meta_ra(ip->i_gl, dblock, extlen);
311 error = gfs2_dir_get_buffer(ip, dblock, new, &bh);
319 memcpy(buf, bh->b_data + o, amount);
327 o = sizeof(struct gfs2_meta_header);
332 return (copied) ? copied : error;
336 * int gfs2_filecmp - Compare two filenames
337 * @file1: The first filename
338 * @file2: The second filename
339 * @len_of_file2: The length of the second file
341 * This routine compares two filenames and returns 1 if they are equal.
343 * Returns: 1 if the files are the same, otherwise 0.
346 int gfs2_filecmp(struct qstr *file1, char *file2, int len_of_file2)
348 if (file1->len != len_of_file2)
350 if (memcmp(file1->name, file2, file1->len))
356 * dirent_first - Return the first dirent
357 * @dip: the directory
359 * @dent: Pointer to list of dirents
361 * return first dirent whether bh points to leaf or stuffed dinode
363 * Returns: IS_LEAF, IS_DINODE, or -errno
366 static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh,
367 struct gfs2_dirent **dent)
369 struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data;
371 if (be16_to_cpu(h->mh_type) == GFS2_METATYPE_LF) {
372 if (gfs2_meta_check(dip->i_sbd, bh))
374 *dent = (struct gfs2_dirent *)(bh->b_data +
375 sizeof(struct gfs2_leaf));
378 if (gfs2_metatype_check(dip->i_sbd, bh, GFS2_METATYPE_DI))
380 *dent = (struct gfs2_dirent *)(bh->b_data +
381 sizeof(struct gfs2_dinode));
387 * dirent_next - Next dirent
388 * @dip: the directory
390 * @dent: Pointer to list of dirents
392 * Returns: 0 on success, error code otherwise
395 static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
396 struct gfs2_dirent **dent)
398 struct gfs2_dirent *tmp, *cur;
400 uint32_t cur_rec_len;
403 bh_end = bh->b_data + bh->b_size;
404 cur_rec_len = be32_to_cpu(cur->de_rec_len);
406 if ((char *)cur + cur_rec_len >= bh_end) {
407 if ((char *)cur + cur_rec_len > bh_end) {
408 gfs2_consist_inode(dip);
414 tmp = (struct gfs2_dirent *)((char *)cur + cur_rec_len);
416 if ((char *)tmp + be32_to_cpu(tmp->de_rec_len) > bh_end) {
417 gfs2_consist_inode(dip);
420 /* Only the first dent could ever have de_inum.no_addr == 0 */
421 if (!tmp->de_inum.no_addr) {
422 gfs2_consist_inode(dip);
432 * dirent_del - Delete a dirent
433 * @dip: The GFS2 inode
435 * @prev: The previous dirent
436 * @cur: The current dirent
440 static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
441 struct gfs2_dirent *prev, struct gfs2_dirent *cur)
443 uint32_t cur_rec_len, prev_rec_len;
445 if (!cur->de_inum.no_addr) {
446 gfs2_consist_inode(dip);
450 gfs2_trans_add_bh(dip->i_gl, bh, 1);
452 /* If there is no prev entry, this is the first entry in the block.
453 The de_rec_len is already as big as it needs to be. Just zero
454 out the inode number and return. */
457 cur->de_inum.no_addr = 0; /* No endianess worries */
461 /* Combine this dentry with the previous one. */
463 prev_rec_len = be32_to_cpu(prev->de_rec_len);
464 cur_rec_len = be32_to_cpu(cur->de_rec_len);
466 if ((char *)prev + prev_rec_len != (char *)cur)
467 gfs2_consist_inode(dip);
468 if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size)
469 gfs2_consist_inode(dip);
471 prev_rec_len += cur_rec_len;
472 prev->de_rec_len = cpu_to_be32(prev_rec_len);
476 * gfs2_dirent_alloc - Allocate a directory entry
477 * @dip: The GFS2 inode
479 * @name_len: The length of the name
480 * @dent_out: Pointer to list of dirents
482 * Returns: 0 on success, error code otherwise
485 int gfs2_dirent_alloc(struct gfs2_inode *dip, struct buffer_head *bh,
486 int name_len, struct gfs2_dirent **dent_out)
488 struct gfs2_dirent *dent, *new;
489 unsigned int rec_len = GFS2_DIRENT_SIZE(name_len);
490 unsigned int entries = 0, offset = 0;
493 type = dirent_first(dip, bh, &dent);
497 if (type == IS_LEAF) {
498 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
499 entries = be16_to_cpu(leaf->lf_entries);
500 offset = sizeof(struct gfs2_leaf);
502 struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
503 entries = be32_to_cpu(dinode->di_entries);
504 offset = sizeof(struct gfs2_dinode);
508 if (dent->de_inum.no_addr) {
509 gfs2_consist_inode(dip);
513 gfs2_trans_add_bh(dip->i_gl, bh, 1);
515 dent->de_rec_len = bh->b_size - offset;
516 dent->de_rec_len = cpu_to_be32(dent->de_rec_len);
517 dent->de_name_len = name_len;
524 uint32_t cur_rec_len, cur_name_len;
526 cur_rec_len = be32_to_cpu(dent->de_rec_len);
527 cur_name_len = dent->de_name_len;
529 if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) ||
530 (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len)) {
531 gfs2_trans_add_bh(dip->i_gl, bh, 1);
533 if (dent->de_inum.no_addr) {
534 new = (struct gfs2_dirent *)((char *)dent +
535 GFS2_DIRENT_SIZE(cur_name_len));
536 memset(new, 0, sizeof(struct gfs2_dirent));
538 new->de_rec_len = cur_rec_len - GFS2_DIRENT_SIZE(cur_name_len);
539 new->de_rec_len = cpu_to_be32(new->de_rec_len);
540 new->de_name_len = name_len;
542 dent->de_rec_len = cur_rec_len - be32_to_cpu(new->de_rec_len);
543 dent->de_rec_len = cpu_to_be32(dent->de_rec_len);
549 dent->de_name_len = name_len;
554 } while (dirent_next(dip, bh, &dent) == 0);
560 * dirent_fits - See if we can fit a entry in this buffer
561 * @dip: The GFS2 inode
563 * @name_len: The length of the name
565 * Returns: 1 if it can fit, 0 otherwise
568 static int dirent_fits(struct gfs2_inode *dip, struct buffer_head *bh,
571 struct gfs2_dirent *dent;
572 unsigned int rec_len = GFS2_DIRENT_SIZE(name_len);
573 unsigned int entries = 0;
576 type = dirent_first(dip, bh, &dent);
580 if (type == IS_LEAF) {
581 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
582 entries = be16_to_cpu(leaf->lf_entries);
584 struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
585 entries = be32_to_cpu(dinode->di_entries);
592 uint32_t cur_rec_len, cur_name_len;
594 cur_rec_len = be32_to_cpu(dent->de_rec_len);
595 cur_name_len = dent->de_name_len;
597 if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) ||
598 (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len))
600 } while (dirent_next(dip, bh, &dent) == 0);
605 static int leaf_search(struct gfs2_inode *dip, struct buffer_head *bh,
606 struct qstr *filename, struct gfs2_dirent **dent_out,
607 struct gfs2_dirent **dent_prev)
610 struct gfs2_dirent *dent, *prev = NULL;
611 unsigned int entries = 0;
614 type = dirent_first(dip, bh, &dent);
618 if (type == IS_LEAF) {
619 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
620 entries = be16_to_cpu(leaf->lf_entries);
621 } else if (type == IS_DINODE) {
622 struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
623 entries = be32_to_cpu(dinode->di_entries);
626 hash = gfs2_disk_hash(filename->name, filename->len);
629 if (!dent->de_inum.no_addr) {
634 if (be32_to_cpu(dent->de_hash) == hash &&
635 gfs2_filecmp(filename, (char *)(dent + 1),
636 dent->de_name_len)) {
645 } while (dirent_next(dip, bh, &dent) == 0);
650 static int get_leaf(struct gfs2_inode *dip, uint64_t leaf_no,
651 struct buffer_head **bhp)
655 error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_START | DIO_WAIT, bhp);
656 if (!error && gfs2_metatype_check(dip->i_sbd, *bhp, GFS2_METATYPE_LF))
663 * get_leaf_nr - Get a leaf number associated with the index
664 * @dip: The GFS2 inode
668 * Returns: 0 on success, error code otherwise
671 static int get_leaf_nr(struct gfs2_inode *dip, uint32_t index,
677 error = gfs2_dir_read_data(dip, (char *)&leaf_no,
678 index * sizeof(uint64_t),
680 if (error != sizeof(uint64_t))
681 return (error < 0) ? error : -EIO;
683 *leaf_out = be64_to_cpu(leaf_no);
688 static int get_first_leaf(struct gfs2_inode *dip, uint32_t index,
689 struct buffer_head **bh_out)
694 error = get_leaf_nr(dip, index, &leaf_no);
696 error = get_leaf(dip, leaf_no, bh_out);
701 static int get_next_leaf(struct gfs2_inode *dip, struct buffer_head *bh_in,
702 struct buffer_head **bh_out)
704 struct gfs2_leaf *leaf;
707 leaf = (struct gfs2_leaf *)bh_in->b_data;
712 error = get_leaf(dip, be64_to_cpu(leaf->lf_next), bh_out);
717 static int linked_leaf_search(struct gfs2_inode *dip, struct qstr *filename,
718 struct gfs2_dirent **dent_out,
719 struct gfs2_dirent **dent_prev,
720 struct buffer_head **bh_out)
722 struct buffer_head *bh = NULL, *bh_next;
723 uint32_t hsize, index;
727 hsize = 1 << dip->i_di.di_depth;
728 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
729 gfs2_consist_inode(dip);
733 /* Figure out the address of the leaf node. */
735 hash = gfs2_disk_hash(filename->name, filename->len);
736 index = hash >> (32 - dip->i_di.di_depth);
738 error = get_first_leaf(dip, index, &bh_next);
749 error = leaf_search(dip, bh, filename, dent_out, dent_prev);
763 error = get_next_leaf(dip, bh, &bh_next);
773 * dir_make_exhash - Convert a stuffed directory into an ExHash directory
774 * @dip: The GFS2 inode
776 * Returns: 0 on success, error code otherwise
779 static int dir_make_exhash(struct gfs2_inode *dip)
781 struct gfs2_sbd *sdp = dip->i_sbd;
782 struct gfs2_dirent *dent;
783 struct buffer_head *bh, *dibh;
784 struct gfs2_leaf *leaf;
790 error = gfs2_meta_inode_buffer(dip, &dibh);
794 /* Allocate a new block for the first leaf node */
796 bn = gfs2_alloc_meta(dip);
798 /* Turn over a new leaf */
800 bh = gfs2_meta_new(dip->i_gl, bn);
801 gfs2_trans_add_bh(dip->i_gl, bh, 1);
802 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
803 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
805 /* Fill in the leaf structure */
807 leaf = (struct gfs2_leaf *)bh->b_data;
809 gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16));
811 leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
812 leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries);
816 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh,
817 sizeof(struct gfs2_dinode));
819 /* Find last entry */
822 dirent_first(dip, bh, &dent);
825 if (!dent->de_inum.no_addr)
827 if (++x == dip->i_di.di_entries)
830 while (dirent_next(dip, bh, &dent) == 0);
832 /* Adjust the last dirent's record length
833 (Remember that dent still points to the last entry.) */
835 dent->de_rec_len = be32_to_cpu(dent->de_rec_len) +
836 sizeof(struct gfs2_dinode) -
837 sizeof(struct gfs2_leaf);
838 dent->de_rec_len = cpu_to_be32(dent->de_rec_len);
842 /* We're done with the new leaf block, now setup the new
845 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
846 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
848 lp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode));
850 for (x = sdp->sd_hash_ptrs; x--; lp++)
851 *lp = cpu_to_be64(bn);
853 dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
854 dip->i_di.di_blocks++;
855 dip->i_di.di_flags |= GFS2_DIF_EXHASH;
856 dip->i_di.di_payload_format = 0;
858 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
859 dip->i_di.di_depth = y;
861 gfs2_dinode_out(&dip->i_di, dibh->b_data);
869 * dir_split_leaf - Split a leaf block into two
870 * @dip: The GFS2 inode
874 * Returns: 0 on success, error code on failure
877 static int dir_split_leaf(struct gfs2_inode *dip, uint32_t index,
880 struct buffer_head *nbh, *obh, *dibh;
881 struct gfs2_leaf *nleaf, *oleaf;
882 struct gfs2_dirent *dent, *prev = NULL, *next = NULL, *new;
883 uint32_t start, len, half_len, divider;
889 /* Allocate the new leaf block */
891 bn = gfs2_alloc_meta(dip);
893 /* Get the new leaf block */
895 nbh = gfs2_meta_new(dip->i_gl, bn);
896 gfs2_trans_add_bh(dip->i_gl, nbh, 1);
897 gfs2_metatype_set(nbh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
898 gfs2_buffer_clear_tail(nbh, sizeof(struct gfs2_meta_header));
900 nleaf = (struct gfs2_leaf *)nbh->b_data;
902 nleaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
904 /* Get the old leaf block */
906 error = get_leaf(dip, leaf_no, &obh);
910 gfs2_trans_add_bh(dip->i_gl, obh, 1);
912 oleaf = (struct gfs2_leaf *)obh->b_data;
914 /* Compute the start and len of leaf pointers in the hash table. */
916 len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth));
919 gfs2_consist_inode(dip);
924 start = (index & ~(len - 1));
926 /* Change the pointers.
927 Don't bother distinguishing stuffed from non-stuffed.
928 This code is complicated enough already. */
930 lp = kcalloc(half_len, sizeof(uint64_t), GFP_KERNEL | __GFP_NOFAIL);
932 error = gfs2_dir_read_data(dip, (char *)lp, start * sizeof(uint64_t),
933 half_len * sizeof(uint64_t));
934 if (error != half_len * sizeof(uint64_t)) {
940 /* Change the pointers */
942 for (x = 0; x < half_len; x++)
943 lp[x] = cpu_to_be64(bn);
945 error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(uint64_t),
946 half_len * sizeof(uint64_t));
947 if (error != half_len * sizeof(uint64_t)) {
955 /* Compute the divider */
957 divider = (start + half_len) << (32 - dip->i_di.di_depth);
959 /* Copy the entries */
961 dirent_first(dip, obh, &dent);
965 if (dirent_next(dip, obh, &next))
968 if (dent->de_inum.no_addr &&
969 be32_to_cpu(dent->de_hash) < divider) {
970 name_len = dent->de_name_len;
972 gfs2_dirent_alloc(dip, nbh, name_len, &new);
974 new->de_inum = dent->de_inum; /* No endian worries */
975 new->de_hash = dent->de_hash; /* No endian worries */
976 new->de_type = dent->de_type; /* No endian worries */
977 memcpy((char *)(new + 1), (char *)(dent + 1),
980 nleaf->lf_entries = be16_to_cpu(nleaf->lf_entries)+1;
981 nleaf->lf_entries = cpu_to_be16(nleaf->lf_entries);
983 dirent_del(dip, obh, prev, dent);
985 if (!oleaf->lf_entries)
986 gfs2_consist_inode(dip);
987 oleaf->lf_entries = be16_to_cpu(oleaf->lf_entries)-1;
988 oleaf->lf_entries = cpu_to_be16(oleaf->lf_entries);
1001 /* If none of the entries got moved into the new leaf,
1002 artificially fill in the first entry. */
1005 gfs2_dirent_alloc(dip, nbh, 0, &new);
1006 new->de_inum.no_addr = 0;
1009 oleaf->lf_depth = be16_to_cpu(oleaf->lf_depth) + 1;
1010 oleaf->lf_depth = cpu_to_be16(oleaf->lf_depth);
1011 nleaf->lf_depth = oleaf->lf_depth;
1013 error = gfs2_meta_inode_buffer(dip, &dibh);
1014 if (!gfs2_assert_withdraw(dip->i_sbd, !error)) {
1015 dip->i_di.di_blocks++;
1016 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1037 * dir_double_exhash - Double size of ExHash table
1038 * @dip: The GFS2 dinode
1040 * Returns: 0 on success, error code on failure
1043 static int dir_double_exhash(struct gfs2_inode *dip)
1045 struct gfs2_sbd *sdp = dip->i_sbd;
1046 struct buffer_head *dibh;
1049 uint64_t *from, *to;
1054 hsize = 1 << dip->i_di.di_depth;
1055 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1056 gfs2_consist_inode(dip);
1060 /* Allocate both the "from" and "to" buffers in one big chunk */
1062 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL);
1064 for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
1065 error = gfs2_dir_read_data(dip, (char *)buf,
1066 block * sdp->sd_hash_bsize,
1067 sdp->sd_hash_bsize);
1068 if (error != sdp->sd_hash_bsize) {
1075 to = (uint64_t *)((char *)buf + sdp->sd_hash_bsize);
1077 for (x = sdp->sd_hash_ptrs; x--; from++) {
1078 *to++ = *from; /* No endianess worries */
1082 error = gfs2_dir_write_data(dip,
1083 (char *)buf + sdp->sd_hash_bsize,
1084 block * sdp->sd_sb.sb_bsize,
1085 sdp->sd_sb.sb_bsize);
1086 if (error != sdp->sd_sb.sb_bsize) {
1095 error = gfs2_meta_inode_buffer(dip, &dibh);
1096 if (!gfs2_assert_withdraw(sdp, !error)) {
1097 dip->i_di.di_depth++;
1098 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1111 * compare_dents - compare directory entries by hash value
1115 * When comparing the hash entries of @a to @b:
1121 static int compare_dents(const void *a, const void *b)
1123 struct gfs2_dirent *dent_a, *dent_b;
1124 uint32_t hash_a, hash_b;
1127 dent_a = *(struct gfs2_dirent **)a;
1128 hash_a = dent_a->de_hash;
1129 hash_a = be32_to_cpu(hash_a);
1131 dent_b = *(struct gfs2_dirent **)b;
1132 hash_b = dent_b->de_hash;
1133 hash_b = be32_to_cpu(hash_b);
1135 if (hash_a > hash_b)
1137 else if (hash_a < hash_b)
1140 unsigned int len_a = dent_a->de_name_len;
1141 unsigned int len_b = dent_b->de_name_len;
1145 else if (len_a < len_b)
1148 ret = memcmp((char *)(dent_a + 1),
1149 (char *)(dent_b + 1),
1157 * do_filldir_main - read out directory entries
1158 * @dip: The GFS2 inode
1159 * @offset: The offset in the file to read from
1160 * @opaque: opaque data to pass to filldir
1161 * @filldir: The function to pass entries to
1162 * @darr: an array of struct gfs2_dirent pointers to read
1163 * @entries: the number of entries in darr
1164 * @copied: pointer to int that's non-zero if a entry has been copied out
1166 * Jump through some hoops to make sure that if there are hash collsions,
1167 * they are read out at the beginning of a buffer. We want to minimize
1168 * the possibility that they will fall into different readdir buffers or
1169 * that someone will want to seek to that location.
1171 * Returns: errno, >0 on exception from filldir
1174 static int do_filldir_main(struct gfs2_inode *dip, uint64_t *offset,
1175 void *opaque, gfs2_filldir_t filldir,
1176 struct gfs2_dirent **darr, uint32_t entries,
1179 struct gfs2_dirent *dent, *dent_next;
1180 struct gfs2_inum inum;
1181 uint64_t off, off_next;
1186 sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
1188 dent_next = darr[0];
1189 off_next = be32_to_cpu(dent_next->de_hash);
1190 off_next = gfs2_disk_hash2offset(off_next);
1192 for (x = 0, y = 1; x < entries; x++, y++) {
1197 dent_next = darr[y];
1198 off_next = be32_to_cpu(dent_next->de_hash);
1199 off_next = gfs2_disk_hash2offset(off_next);
1205 if (off_next == off) {
1206 if (*copied && !run)
1217 gfs2_inum_in(&inum, (char *)&dent->de_inum);
1219 error = filldir(opaque, (char *)(dent + 1),
1229 /* Increment the *offset by one, so the next time we come into the
1230 do_filldir fxn, we get the next entry instead of the last one in the
1239 * do_filldir_single - Read directory entries out of a single block
1240 * @dip: The GFS2 inode
1241 * @offset: The offset in the file to read from
1242 * @opaque: opaque data to pass to filldir
1243 * @filldir: The function to pass entries to
1245 * @entries: the number of entries in the block
1246 * @copied: pointer to int that's non-zero if a entry has been copied out
1248 * Returns: errno, >0 on exception from filldir
1251 static int do_filldir_single(struct gfs2_inode *dip, uint64_t *offset,
1252 void *opaque, gfs2_filldir_t filldir,
1253 struct buffer_head *bh, uint32_t entries,
1256 struct gfs2_dirent **darr;
1257 struct gfs2_dirent *de;
1264 darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL);
1268 dirent_first(dip, bh, &de);
1270 if (!de->de_inum.no_addr)
1273 gfs2_consist_inode(dip);
1279 while (dirent_next(dip, bh, &de) == 0);
1282 gfs2_consist_inode(dip);
1287 error = do_filldir_main(dip, offset, opaque, filldir, darr,
1297 * do_filldir_multi - Read directory entries out of a linked leaf list
1298 * @dip: The GFS2 inode
1299 * @offset: The offset in the file to read from
1300 * @opaque: opaque data to pass to filldir
1301 * @filldir: The function to pass entries to
1302 * @bh: the first leaf in the list
1303 * @copied: pointer to int that's non-zero if a entry has been copied out
1305 * Returns: errno, >0 on exception from filldir
1308 static int do_filldir_multi(struct gfs2_inode *dip, uint64_t *offset,
1309 void *opaque, gfs2_filldir_t filldir,
1310 struct buffer_head *bh, int *copied)
1312 struct buffer_head **larr = NULL;
1313 struct gfs2_dirent **darr;
1314 struct gfs2_leaf *leaf;
1315 struct buffer_head *tmp_bh;
1316 struct gfs2_dirent *de;
1317 unsigned int entries, e = 0;
1318 unsigned int leaves = 0, l = 0;
1323 /* Count leaves and entries */
1325 leaf = (struct gfs2_leaf *)bh->b_data;
1326 entries = be16_to_cpu(leaf->lf_entries);
1330 ln = be64_to_cpu(ln);
1332 error = get_leaf(dip, ln, &tmp_bh);
1336 leaf = (struct gfs2_leaf *)tmp_bh->b_data;
1337 if (leaf->lf_entries) {
1338 entries += be16_to_cpu(leaf->lf_entries);
1350 larr = kcalloc(leaves, sizeof(struct buffer_head *),GFP_KERNEL);
1355 darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL);
1361 leaf = (struct gfs2_leaf *)bh->b_data;
1362 if (leaf->lf_entries) {
1363 dirent_first(dip, bh, &de);
1365 if (!de->de_inum.no_addr)
1368 gfs2_consist_inode(dip);
1374 while (dirent_next(dip, bh, &de) == 0);
1379 ln = be64_to_cpu(ln);
1381 error = get_leaf(dip, ln, &tmp_bh);
1385 leaf = (struct gfs2_leaf *)tmp_bh->b_data;
1386 if (leaf->lf_entries) {
1387 dirent_first(dip, tmp_bh, &de);
1389 if (!de->de_inum.no_addr)
1392 gfs2_consist_inode(dip);
1398 while (dirent_next(dip, tmp_bh, &de) == 0);
1409 if (gfs2_assert_withdraw(dip->i_sbd, l == leaves)) {
1414 gfs2_consist_inode(dip);
1419 error = do_filldir_main(dip, offset, opaque, filldir, darr,
1424 for (x = 0; x < l; x++)
1432 * dir_e_search - Search exhash (leaf) dir for inode matching name
1433 * @dip: The GFS2 inode
1434 * @filename: Filename string
1435 * @inode: If non-NULL, function fills with formal inode # and block address
1436 * @type: If non-NULL, function fills with DT_... dinode type
1441 static int dir_e_search(struct gfs2_inode *dip, struct qstr *filename,
1442 struct gfs2_inum *inum, unsigned int *type)
1444 struct buffer_head *bh;
1445 struct gfs2_dirent *dent;
1448 error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
1453 gfs2_inum_in(inum, (char *)&dent->de_inum);
1455 *type = dent->de_type;
1462 static int dir_e_add(struct gfs2_inode *dip, struct qstr *filename,
1463 struct gfs2_inum *inum, unsigned int type)
1465 struct buffer_head *bh, *nbh, *dibh;
1466 struct gfs2_leaf *leaf, *nleaf;
1467 struct gfs2_dirent *dent;
1468 uint32_t hsize, index;
1470 uint64_t leaf_no, bn;
1474 hsize = 1 << dip->i_di.di_depth;
1475 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1476 gfs2_consist_inode(dip);
1480 /* Figure out the address of the leaf node. */
1482 hash = gfs2_disk_hash(filename->name, filename->len);
1483 index = hash >> (32 - dip->i_di.di_depth);
1485 error = get_leaf_nr(dip, index, &leaf_no);
1489 /* Add entry to the leaf */
1492 error = get_leaf(dip, leaf_no, &bh);
1496 leaf = (struct gfs2_leaf *)bh->b_data;
1498 if (gfs2_dirent_alloc(dip, bh, filename->len, &dent)) {
1500 if (be16_to_cpu(leaf->lf_depth) < dip->i_di.di_depth) {
1501 /* Can we split the leaf? */
1505 error = dir_split_leaf(dip, index, leaf_no);
1511 } else if (dip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) {
1512 /* Can we double the hash table? */
1516 error = dir_double_exhash(dip);
1522 } else if (leaf->lf_next) {
1523 /* Can we try the next leaf in the list? */
1524 leaf_no = be64_to_cpu(leaf->lf_next);
1529 /* Create a new leaf and add it to the list. */
1531 bn = gfs2_alloc_meta(dip);
1533 nbh = gfs2_meta_new(dip->i_gl, bn);
1534 gfs2_trans_add_bh(dip->i_gl, nbh, 1);
1535 gfs2_metatype_set(nbh,
1538 gfs2_buffer_clear_tail(nbh,
1539 sizeof(struct gfs2_meta_header));
1541 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1542 leaf->lf_next = cpu_to_be64(bn);
1544 nleaf = (struct gfs2_leaf *)nbh->b_data;
1545 nleaf->lf_depth = leaf->lf_depth;
1546 nleaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
1548 gfs2_dirent_alloc(dip, nbh, filename->len,
1551 dip->i_di.di_blocks++;
1560 /* If the gfs2_dirent_alloc() succeeded, it pinned the "bh" */
1562 gfs2_inum_out(inum, (char *)&dent->de_inum);
1563 dent->de_hash = cpu_to_be32(hash);
1564 dent->de_type = type;
1565 memcpy((char *)(dent + 1), filename->name, filename->len);
1567 leaf->lf_entries = be16_to_cpu(leaf->lf_entries) + 1;
1568 leaf->lf_entries = cpu_to_be16(leaf->lf_entries);
1572 error = gfs2_meta_inode_buffer(dip, &dibh);
1576 dip->i_di.di_entries++;
1577 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1579 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1580 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1589 static int dir_e_del(struct gfs2_inode *dip, struct qstr *filename)
1591 struct buffer_head *bh, *dibh;
1592 struct gfs2_dirent *dent, *prev;
1593 struct gfs2_leaf *leaf;
1594 unsigned int entries;
1597 error = linked_leaf_search(dip, filename, &dent, &prev, &bh);
1598 if (error == -ENOENT) {
1599 gfs2_consist_inode(dip);
1605 dirent_del(dip, bh, prev, dent); /* Pins bh */
1607 leaf = (struct gfs2_leaf *)bh->b_data;
1608 entries = be16_to_cpu(leaf->lf_entries);
1610 gfs2_consist_inode(dip);
1612 leaf->lf_entries = cpu_to_be16(entries);
1616 error = gfs2_meta_inode_buffer(dip, &dibh);
1620 if (!dip->i_di.di_entries)
1621 gfs2_consist_inode(dip);
1622 dip->i_di.di_entries--;
1623 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1625 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1626 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1633 * dir_e_read - Reads the entries from a directory into a filldir buffer
1634 * @dip: dinode pointer
1635 * @offset: the hash of the last entry read shifted to the right once
1636 * @opaque: buffer for the filldir function to fill
1637 * @filldir: points to the filldir function to use
1642 static int dir_e_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
1643 gfs2_filldir_t filldir)
1645 struct gfs2_sbd *sdp = dip->i_sbd;
1646 struct buffer_head *bh;
1647 struct gfs2_leaf leaf;
1648 uint32_t hsize, len;
1649 uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
1650 uint32_t hash, index;
1655 hsize = 1 << dip->i_di.di_depth;
1656 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1657 gfs2_consist_inode(dip);
1661 hash = gfs2_dir_offset2hash(*offset);
1662 index = hash >> (32 - dip->i_di.di_depth);
1664 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1668 while (index < hsize) {
1669 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1670 ht_offset = index - lp_offset;
1672 if (ht_offset_cur != ht_offset) {
1673 error = gfs2_dir_read_data(dip, (char *)lp,
1674 ht_offset * sizeof(uint64_t),
1675 sdp->sd_hash_bsize);
1676 if (error != sdp->sd_hash_bsize) {
1681 ht_offset_cur = ht_offset;
1684 error = get_leaf(dip, be64_to_cpu(lp[lp_offset]), &bh);
1688 gfs2_leaf_in(&leaf, bh->b_data);
1691 error = do_filldir_multi(dip, offset, opaque, filldir,
1694 error = do_filldir_single(dip, offset, opaque, filldir,
1695 bh, leaf.lf_entries, &copied);
1705 len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
1706 index = (index & ~(len - 1)) + len;
1715 static int dir_e_mvino(struct gfs2_inode *dip, struct qstr *filename,
1716 struct gfs2_inum *inum, unsigned int new_type)
1718 struct buffer_head *bh, *dibh;
1719 struct gfs2_dirent *dent;
1722 error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
1723 if (error == -ENOENT) {
1724 gfs2_consist_inode(dip);
1730 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1732 gfs2_inum_out(inum, (char *)&dent->de_inum);
1733 dent->de_type = new_type;
1737 error = gfs2_meta_inode_buffer(dip, &dibh);
1741 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1743 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1744 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1751 * dir_l_search - Search linear (stuffed dinode) dir for inode matching name
1752 * @dip: The GFS2 inode
1753 * @filename: Filename string
1754 * @inode: If non-NULL, function fills with formal inode # and block address
1755 * @type: If non-NULL, function fills with DT_... dinode type
1760 static int dir_l_search(struct gfs2_inode *dip, struct qstr *filename,
1761 struct gfs2_inum *inum, unsigned int *type)
1763 struct buffer_head *dibh;
1764 struct gfs2_dirent *dent;
1767 if (!gfs2_is_stuffed(dip)) {
1768 gfs2_consist_inode(dip);
1772 error = gfs2_meta_inode_buffer(dip, &dibh);
1776 error = leaf_search(dip, dibh, filename, &dent, NULL);
1779 gfs2_inum_in(inum, (char *)&dent->de_inum);
1781 *type = dent->de_type;
1789 static int dir_l_add(struct gfs2_inode *dip, struct qstr *filename,
1790 struct gfs2_inum *inum, unsigned int type)
1792 struct buffer_head *dibh;
1793 struct gfs2_dirent *dent;
1796 if (!gfs2_is_stuffed(dip)) {
1797 gfs2_consist_inode(dip);
1801 error = gfs2_meta_inode_buffer(dip, &dibh);
1805 if (gfs2_dirent_alloc(dip, dibh, filename->len, &dent)) {
1808 error = dir_make_exhash(dip);
1810 error = dir_e_add(dip, filename, inum, type);
1815 /* gfs2_dirent_alloc() pins */
1817 gfs2_inum_out(inum, (char *)&dent->de_inum);
1818 dent->de_hash = gfs2_disk_hash(filename->name, filename->len);
1819 dent->de_hash = cpu_to_be32(dent->de_hash);
1820 dent->de_type = type;
1821 memcpy((char *)(dent + 1), filename->name, filename->len);
1823 dip->i_di.di_entries++;
1824 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1826 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1832 static int dir_l_del(struct gfs2_inode *dip, struct qstr *filename)
1834 struct buffer_head *dibh;
1835 struct gfs2_dirent *dent, *prev;
1838 if (!gfs2_is_stuffed(dip)) {
1839 gfs2_consist_inode(dip);
1843 error = gfs2_meta_inode_buffer(dip, &dibh);
1847 error = leaf_search(dip, dibh, filename, &dent, &prev);
1848 if (error == -ENOENT) {
1849 gfs2_consist_inode(dip);
1856 dirent_del(dip, dibh, prev, dent);
1858 /* dirent_del() pins */
1860 if (!dip->i_di.di_entries)
1861 gfs2_consist_inode(dip);
1862 dip->i_di.di_entries--;
1864 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1866 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1874 static int dir_l_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
1875 gfs2_filldir_t filldir)
1877 struct buffer_head *dibh;
1881 if (!gfs2_is_stuffed(dip)) {
1882 gfs2_consist_inode(dip);
1886 if (!dip->i_di.di_entries)
1889 error = gfs2_meta_inode_buffer(dip, &dibh);
1893 error = do_filldir_single(dip, offset,
1895 dibh, dip->i_di.di_entries,
1905 static int dir_l_mvino(struct gfs2_inode *dip, struct qstr *filename,
1906 struct gfs2_inum *inum, unsigned int new_type)
1908 struct buffer_head *dibh;
1909 struct gfs2_dirent *dent;
1912 if (!gfs2_is_stuffed(dip)) {
1913 gfs2_consist_inode(dip);
1917 error = gfs2_meta_inode_buffer(dip, &dibh);
1921 error = leaf_search(dip, dibh, filename, &dent, NULL);
1922 if (error == -ENOENT) {
1923 gfs2_consist_inode(dip);
1930 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1932 gfs2_inum_out(inum, (char *)&dent->de_inum);
1933 dent->de_type = new_type;
1935 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1937 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1946 * gfs2_dir_search - Search a directory
1947 * @dip: The GFS2 inode
1951 * This routine searches a directory for a file or another directory.
1952 * Assumes a glock is held on dip.
1957 int gfs2_dir_search(struct gfs2_inode *dip, struct qstr *filename,
1958 struct gfs2_inum *inum, unsigned int *type)
1962 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1963 error = dir_e_search(dip, filename, inum, type);
1965 error = dir_l_search(dip, filename, inum, type);
1971 * gfs2_dir_add - Add new filename into directory
1972 * @dip: The GFS2 inode
1973 * @filename: The new name
1974 * @inode: The inode number of the entry
1975 * @type: The type of the entry
1977 * Returns: 0 on success, error code on failure
1980 int gfs2_dir_add(struct gfs2_inode *dip, struct qstr *filename,
1981 struct gfs2_inum *inum, unsigned int type)
1985 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1986 error = dir_e_add(dip, filename, inum, type);
1988 error = dir_l_add(dip, filename, inum, type);
1994 * gfs2_dir_del - Delete a directory entry
1995 * @dip: The GFS2 inode
1996 * @filename: The filename
1998 * Returns: 0 on success, error code on failure
2001 int gfs2_dir_del(struct gfs2_inode *dip, struct qstr *filename)
2005 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
2006 error = dir_e_del(dip, filename);
2008 error = dir_l_del(dip, filename);
2013 int gfs2_dir_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
2014 gfs2_filldir_t filldir)
2018 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
2019 error = dir_e_read(dip, offset, opaque, filldir);
2021 error = dir_l_read(dip, offset, opaque, filldir);
2027 * gfs2_dir_mvino - Change inode number of directory entry
2028 * @dip: The GFS2 inode
2032 * This routine changes the inode number of a directory entry. It's used
2033 * by rename to change ".." when a directory is moved.
2034 * Assumes a glock is held on dvp.
2039 int gfs2_dir_mvino(struct gfs2_inode *dip, struct qstr *filename,
2040 struct gfs2_inum *inum, unsigned int new_type)
2044 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
2045 error = dir_e_mvino(dip, filename, inum, new_type);
2047 error = dir_l_mvino(dip, filename, inum, new_type);
2053 * foreach_leaf - call a function for each leaf in a directory
2054 * @dip: the directory
2055 * @lc: the function to call for each each
2056 * @data: private data to pass to it
2061 static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
2063 struct gfs2_sbd *sdp = dip->i_sbd;
2064 struct buffer_head *bh;
2065 struct gfs2_leaf leaf;
2066 uint32_t hsize, len;
2067 uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
2073 hsize = 1 << dip->i_di.di_depth;
2074 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
2075 gfs2_consist_inode(dip);
2079 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
2083 while (index < hsize) {
2084 lp_offset = index & (sdp->sd_hash_ptrs - 1);
2085 ht_offset = index - lp_offset;
2087 if (ht_offset_cur != ht_offset) {
2088 error = gfs2_dir_read_data(dip, (char *)lp,
2089 ht_offset * sizeof(uint64_t),
2090 sdp->sd_hash_bsize);
2091 if (error != sdp->sd_hash_bsize) {
2096 ht_offset_cur = ht_offset;
2099 leaf_no = be64_to_cpu(lp[lp_offset]);
2101 error = get_leaf(dip, leaf_no, &bh);
2104 gfs2_leaf_in(&leaf, bh->b_data);
2107 len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
2109 error = lc(dip, index, len, leaf_no, data);
2113 index = (index & ~(len - 1)) + len;
2118 if (index != hsize) {
2119 gfs2_consist_inode(dip);
2130 * leaf_dealloc - Deallocate a directory leaf
2131 * @dip: the directory
2132 * @index: the hash table offset in the directory
2133 * @len: the number of pointers to this leaf
2134 * @leaf_no: the leaf number
2140 static int leaf_dealloc(struct gfs2_inode *dip, uint32_t index, uint32_t len,
2141 uint64_t leaf_no, void *data)
2143 struct gfs2_sbd *sdp = dip->i_sbd;
2144 struct gfs2_leaf tmp_leaf;
2145 struct gfs2_rgrp_list rlist;
2146 struct buffer_head *bh, *dibh;
2148 unsigned int rg_blocks = 0, l_blocks = 0;
2150 unsigned int x, size = len * sizeof(uint64_t);
2153 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
2155 ht = kzalloc(size, GFP_KERNEL);
2159 gfs2_alloc_get(dip);
2161 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
2165 error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh);
2169 /* Count the number of leaves */
2171 for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
2172 error = get_leaf(dip, blk, &bh);
2175 gfs2_leaf_in(&tmp_leaf, (bh)->b_data);
2178 gfs2_rlist_add(sdp, &rlist, blk);
2182 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
2184 for (x = 0; x < rlist.rl_rgrps; x++) {
2185 struct gfs2_rgrpd *rgd;
2186 rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl);
2187 rg_blocks += rgd->rd_ri.ri_length;
2190 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
2194 error = gfs2_trans_begin(sdp,
2195 rg_blocks + (DIV_RU(size, sdp->sd_jbsize) + 1) +
2196 RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks);
2198 goto out_rg_gunlock;
2200 for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
2201 error = get_leaf(dip, blk, &bh);
2204 gfs2_leaf_in(&tmp_leaf, bh->b_data);
2207 gfs2_free_meta(dip, blk, 1);
2209 if (!dip->i_di.di_blocks)
2210 gfs2_consist_inode(dip);
2211 dip->i_di.di_blocks--;
2214 error = gfs2_dir_write_data(dip, ht, index * sizeof(uint64_t), size);
2215 if (error != size) {
2221 error = gfs2_meta_inode_buffer(dip, &dibh);
2225 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
2226 gfs2_dinode_out(&dip->i_di, dibh->b_data);
2230 gfs2_trans_end(sdp);
2233 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
2236 gfs2_rlist_free(&rlist);
2237 gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh);
2240 gfs2_quota_unhold(dip);
2243 gfs2_alloc_put(dip);
2250 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory
2251 * @dip: the directory
2253 * Dealloc all on-disk directory leaves to FREEMETA state
2254 * Change on-disk inode type to "regular file"
2259 int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
2261 struct gfs2_sbd *sdp = dip->i_sbd;
2262 struct buffer_head *bh;
2265 /* Dealloc on-disk leaves to FREEMETA state */
2266 error = foreach_leaf(dip, leaf_dealloc, NULL);
2270 /* Make this a regular file in case we crash.
2271 (We don't want to free these blocks a second time.) */
2273 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2277 error = gfs2_meta_inode_buffer(dip, &bh);
2279 gfs2_trans_add_bh(dip->i_gl, bh, 1);
2280 ((struct gfs2_dinode *)bh->b_data)->di_mode = cpu_to_be32(S_IFREG);
2284 gfs2_trans_end(sdp);
2290 * gfs2_diradd_alloc_required - find if adding entry will require an allocation
2291 * @ip: the file being written to
2292 * @filname: the filename that's going to be added
2293 * @alloc_required: set to 1 if an alloc is required, 0 otherwise
2298 int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename,
2299 int *alloc_required)
2301 struct buffer_head *bh = NULL, *bh_next;
2302 uint32_t hsize, hash, index;
2305 *alloc_required = 0;
2307 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
2308 hsize = 1 << dip->i_di.di_depth;
2309 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
2310 gfs2_consist_inode(dip);
2314 hash = gfs2_disk_hash(filename->name, filename->len);
2315 index = hash >> (32 - dip->i_di.di_depth);
2317 error = get_first_leaf(dip, index, &bh_next);
2326 if (dirent_fits(dip, bh, filename->len))
2329 error = get_next_leaf(dip, bh, &bh_next);
2330 if (error == -ENOENT) {
2331 *alloc_required = 1;
2340 error = gfs2_meta_inode_buffer(dip, &bh);
2344 if (!dirent_fits(dip, bh, filename->len))
2345 *alloc_required = 1;