patch-2.4.1 linux/fs/reiserfs/namei.c

Next file: linux/fs/reiserfs/objectid.c
Previous file: linux/fs/reiserfs/lbalance.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/namei.c linux/fs/reiserfs/namei.c
@@ -0,0 +1,1221 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/smp_lock.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+				/* there should be an overview right
+                                   here, as there should be in every
+                                   conceptual grouping of code.  This
+                                   should be combined with dir.c and
+                                   called dir.c (naming will become
+                                   too large to be called one file in
+                                   a few years), stop senselessly
+                                   imitating the incoherent
+                                   structuring of code used by other
+                                   filesystems.  */
+
+#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { i->i_nlink++; if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; }
+#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) i->i_nlink--;
+
+// directory item contains array of entry headers. This performs
+// binary search through that array
+static int bin_search_in_dir_item (struct reiserfs_dir_entry * de, loff_t off)
+{
+    struct item_head * ih = de->de_ih;
+    struct reiserfs_de_head * deh = de->de_deh;
+    int rbound, lbound, j;
+
+    lbound = 0;
+    rbound = I_ENTRY_COUNT (ih) - 1;
+
+    for (j = (rbound + lbound) / 2; lbound <= rbound; j = (rbound + lbound) / 2) {
+	if (off < deh_offset (deh + j)) {
+	    rbound = j - 1;
+	    continue;
+	}
+	if (off > deh_offset (deh + j)) {
+	    lbound = j + 1;
+	    continue;
+	}
+	// this is not name found, but matched third key component
+	de->de_entry_num = j;
+	return NAME_FOUND;
+    }
+
+    de->de_entry_num = lbound;
+    return NAME_NOT_FOUND;
+}
+
+
+// comment?  maybe something like set de to point to what the path points to?
+static inline void set_de_item_location (struct reiserfs_dir_entry * de, struct path * path)
+{
+    de->de_bh = get_bh (path);
+    de->de_ih = get_ih (path);
+    de->de_deh = B_I_DEH (de->de_bh, de->de_ih);
+    de->de_item_num = PATH_LAST_POSITION (path);
+} 
+
+
+// de_bh, de_ih, de_deh (points to first element of array), de_item_num is set
+inline void set_de_name_and_namelen (struct reiserfs_dir_entry * de)
+{
+    struct reiserfs_de_head * deh = de->de_deh + de->de_entry_num;
+
+    if (de->de_entry_num >= ih_entry_count (de->de_ih))
+	BUG ();
+
+    de->de_entrylen = entry_length (de->de_bh, de->de_ih, de->de_entry_num);
+    de->de_namelen = de->de_entrylen - (de_with_sd (deh) ? SD_SIZE : 0);
+    de->de_name = B_I_PITEM (de->de_bh, de->de_ih) + le16_to_cpu (deh->deh_location);
+    if (de->de_name[de->de_namelen - 1] == 0)
+	de->de_namelen = strlen (de->de_name);
+}
+
+
+// what entry points to
+static inline void set_de_object_key (struct reiserfs_dir_entry * de)
+{
+    if (de->de_entry_num >= ih_entry_count (de->de_ih))
+	BUG ();
+    de->de_dir_id = le32_to_cpu (de->de_deh[de->de_entry_num].deh_dir_id);
+    de->de_objectid = le32_to_cpu (de->de_deh[de->de_entry_num].deh_objectid);
+}
+
+
+static inline void store_de_entry_key (struct reiserfs_dir_entry * de)
+{
+    struct reiserfs_de_head * deh = de->de_deh + de->de_entry_num;
+
+    if (de->de_entry_num >= ih_entry_count (de->de_ih))
+	BUG ();
+
+    /* store key of the found entry */
+    de->de_entry_key.version = ITEM_VERSION_1;
+    de->de_entry_key.on_disk_key.k_dir_id = le32_to_cpu (de->de_ih->ih_key.k_dir_id);
+    de->de_entry_key.on_disk_key.k_objectid = le32_to_cpu (de->de_ih->ih_key.k_objectid);
+    set_cpu_key_k_offset (&(de->de_entry_key), deh_offset (deh));
+    set_cpu_key_k_type (&(de->de_entry_key), TYPE_DIRENTRY);
+}
+
+
+/* We assign a key to each directory item, and place multiple entries
+in a single directory item.  A directory item has a key equal to the
+key of the first directory entry in it.
+
+This function first calls search_by_key, then, if item whose first
+entry matches is not found it looks for the entry inside directory
+item found by search_by_key. Fills the path to the entry, and to the
+entry position in the item 
+
+*/
+
+/* The function is NOT SCHEDULE-SAFE! */
+int search_by_entry_key (struct super_block * sb, struct cpu_key * key,
+			 struct path * path, struct reiserfs_dir_entry * de)
+{
+    int retval;
+
+    retval = search_item (sb, key, path);
+    switch (retval) {
+    case ITEM_NOT_FOUND:
+	if (!PATH_LAST_POSITION (path)) {
+	    reiserfs_warning ("vs-7000: search_by_entry_key: search_by_key returned item position == 0");
+	    pathrelse(path) ;
+	    return IO_ERROR ;
+	}
+	PATH_LAST_POSITION (path) --;
+
+    case ITEM_FOUND:
+	break;
+
+    case IO_ERROR:
+	return retval;
+
+    default:
+	pathrelse (path);
+	reiserfs_warning ("vs-7002: search_by_entry_key: no path to here");
+	return IO_ERROR;
+    }
+
+    set_de_item_location (de, path);
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (!is_direntry_le_ih (de->de_ih) || 
+	COMP_SHORT_KEYS (&(de->de_ih->ih_key), key)) {
+	print_block (de->de_bh, 0, -1, -1);
+	reiserfs_panic (sb, "vs-7005: search_by_entry_key: found item %h is not directory item or "
+			"does not belong to the same directory as key %k", de->de_ih, key);
+    }
+#endif /* CONFIG_REISERFS_CHECK */
+
+    /* binary search in directory item by third componen t of the
+       key. sets de->de_entry_num of de */
+    retval = bin_search_in_dir_item (de, cpu_key_k_offset (key));
+    path->pos_in_item = de->de_entry_num;
+    if (retval != NAME_NOT_FOUND) {
+	// ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set
+	set_de_name_and_namelen (de);
+	set_de_object_key (de);
+    }
+    return retval;
+}
+
+
+
+/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */
+
+/* The third component is hashed, and you can choose from more than
+   one hash function.  Per directory hashes are not yet implemented
+   but are thought about. This function should be moved to hashes.c
+   Jedi, please do so.  -Hans */
+
+static __u32 get_third_component (struct super_block * s, 
+				  const char * name, int len)
+{
+    __u32 res;
+
+    if (!len || (len == 1 && name[0] == '.'))
+	return DOT_OFFSET;
+    if (len == 2 && name[0] == '.' && name[1] == '.')
+	return DOT_DOT_OFFSET;
+
+    res = s->u.reiserfs_sb.s_hash_function (name, len);
+
+    // take bits from 7-th to 30-th including both bounds
+    res = GET_HASH_VALUE(res);
+    if (res == 0)
+	// needed to have no names before "." and ".." those have hash
+	// value == 0 and generation conters 1 and 2 accordingly
+	res = 128;
+    return res + MAX_GENERATION_NUMBER;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+static int reiserfs_match (struct reiserfs_dir_entry * de, 
+			   const char * name, int namelen)
+{
+    int retval = NAME_NOT_FOUND;
+
+    if ((namelen == de->de_namelen) &&
+	!memcmp(de->de_name, name, de->de_namelen))
+	retval = (de_visible (de->de_deh + de->de_entry_num) ? NAME_FOUND : NAME_FOUND_INVISIBLE);
+
+    return retval;
+}
+
+
+/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */
+
+				/* used when hash collisions exist */
+
+
+static int linear_search_in_dir_item (struct cpu_key * key, struct reiserfs_dir_entry * de,
+				      const char * name, int namelen)
+{
+    struct reiserfs_de_head * deh = de->de_deh;
+    int retval;
+    int i;
+
+    i = de->de_entry_num;
+
+    if (i == I_ENTRY_COUNT (de->de_ih) ||
+	GET_HASH_VALUE (deh_offset (deh + i)) != GET_HASH_VALUE (cpu_key_k_offset (key))) {
+	i --;
+    }
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (de->de_deh != B_I_DEH (de->de_bh, de->de_ih))
+	reiserfs_panic (0, "vs-7010: linear_search_in_dir_item: array of entry headers not found");
+#endif /* CONFIG_REISERFS_CHECK */
+
+    deh += i;
+
+    for (; i >= 0; i --, deh --) {
+	if (GET_HASH_VALUE (deh_offset (deh)) !=
+	    GET_HASH_VALUE (cpu_key_k_offset (key))) {
+	    // hash value does not match, no need to check whole name
+	    return NAME_NOT_FOUND;
+	}
+   
+	/* mark, that this generation number is used */
+	if (de->de_gen_number_bit_string)
+	    set_bit (GET_GENERATION_NUMBER (deh_offset (deh)), de->de_gen_number_bit_string);
+
+	// calculate pointer to name and namelen
+	de->de_entry_num = i;
+	set_de_name_and_namelen (de);
+
+	if ((retval = reiserfs_match (de, name, namelen)) != NAME_NOT_FOUND) {
+	    // de's de_name, de_namelen, de_recordlen are set. Fill the rest:
+
+	    // key of pointed object
+	    set_de_object_key (de);
+
+	    store_de_entry_key (de);
+
+	    // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE
+	    return retval;
+	}
+    }
+
+    if (GET_GENERATION_NUMBER (le_ih_k_offset (de->de_ih)) == 0)
+	/* we have reached left most entry in the node. In common we
+           have to go to the left neighbor, but if generation counter
+           is 0 already, we know for sure, that there is no name with
+           the same hash value */
+	// FIXME: this work correctly only because hash value can not
+	// be 0. Btw, in case of Yura's hash it is probably possible,
+	// so, this is a bug
+	return NAME_NOT_FOUND;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (de->de_item_num)
+	reiserfs_panic (0, "vs-7015: linear_search_in_dir_item: "
+			"two diritems of the same directory in one node?");
+#endif /* CONFIG_REISERFS_CHECK */
+
+    return GOTO_PREVIOUS_ITEM;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+// may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND
+// FIXME: should add something like IOERROR
+static int reiserfs_find_entry (struct inode * dir, const char * name, int namelen, 
+				struct path * path_to_entry, struct reiserfs_dir_entry * de)
+{
+    struct cpu_key key_to_search;
+    int retval;
+
+
+    if (namelen > REISERFS_MAX_NAME_LEN (dir->i_sb->s_blocksize))
+	return NAME_NOT_FOUND;
+
+    /* we will search for this key in the tree */
+    make_cpu_key (&key_to_search, dir, 
+		  get_third_component (dir->i_sb, name, namelen), TYPE_DIRENTRY, 3);
+
+    while (1) {
+	retval = search_by_entry_key (dir->i_sb, &key_to_search, path_to_entry, de);
+	if (retval == IO_ERROR)
+	    // FIXME: still has to be dealt with
+
+				/* I want you to conform to our error
+                                   printing standard.  How many times
+                                   do I have to ask? -Hans */
+
+	    BUG ();
+
+	/* compare names for all entries having given hash value */
+	retval = linear_search_in_dir_item (&key_to_search, de, name, namelen);
+	if (retval != GOTO_PREVIOUS_ITEM) {
+	    /* there is no need to scan directory anymore. Given entry found or does not exist */
+	    path_to_entry->pos_in_item = de->de_entry_num;
+	    return retval;
+	}
+
+	/* there is left neighboring item of this directory and given entry can be there */
+	set_cpu_key_k_offset (&key_to_search, le_ih_k_offset (de->de_ih) - 1);
+	pathrelse (path_to_entry);
+
+    } /* while (1) */
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dentry)
+{
+    int retval;
+    struct inode * inode = 0;
+    struct reiserfs_dir_entry de;
+    INITIALIZE_PATH (path_to_entry);
+
+    reiserfs_check_lock_depth("lookup") ;
+
+    if (dentry->d_name.len > REISERFS_MAX_NAME_LEN (dir->i_sb->s_blocksize))
+	return ERR_PTR(-ENAMETOOLONG);
+
+    de.de_gen_number_bit_string = 0;
+    retval = reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path_to_entry, &de);
+    pathrelse (&path_to_entry);
+    if (retval == NAME_FOUND) {
+	inode = reiserfs_iget (dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
+	if (!inode) {
+	    return ERR_PTR(-EACCES);
+        }
+    }
+
+    d_add(dentry, inode);
+    return NULL;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+
+/* add entry to the directory (entry can be hidden). 
+
+insert definition of when hidden directories are used here -Hans
+
+ Does not mark dir   inode dirty, do it after successesfull call to it */
+
+static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct inode * dir,
+                               const char * name, int namelen, struct inode * inode,
+			       int visible)
+{
+    struct cpu_key entry_key;
+    struct reiserfs_de_head * deh;
+    INITIALIZE_PATH (path);
+    struct reiserfs_dir_entry de;
+    int bit_string [MAX_GENERATION_NUMBER / (sizeof(int) * 8) + 1];
+    int gen_number;
+    char small_buf[32+DEH_SIZE] ; /* 48 bytes now and we avoid kmalloc
+                                     if we create file with short name */
+    char * buffer;
+    int buflen, paste_size;
+    int retval;
+
+
+    /* cannot allow items to be added into a busy deleted directory */
+    if (!namelen)
+	return -EINVAL;
+
+    if (namelen > REISERFS_MAX_NAME_LEN (dir->i_sb->s_blocksize))
+	return -ENAMETOOLONG;
+
+    /* each entry has unique key. compose it */
+    make_cpu_key (&entry_key, dir, 
+		  get_third_component (dir->i_sb, name, namelen), TYPE_DIRENTRY, 3);
+
+    /* get memory for composing the entry */
+    buflen = DEH_SIZE + ROUND_UP (namelen);
+    if (buflen > sizeof (small_buf)) {
+	buffer = reiserfs_kmalloc (buflen, GFP_BUFFER, dir->i_sb);
+	if (buffer == 0)
+	    return -ENOMEM;
+    } else
+	buffer = small_buf;
+
+    paste_size = (old_format_only (dir->i_sb)) ? (DEH_SIZE + namelen) : buflen;
+
+    /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */
+    deh = (struct reiserfs_de_head *)buffer;
+    deh->deh_location = 0;
+    deh->deh_offset = cpu_to_le32 (cpu_key_k_offset (&entry_key));
+    deh->deh_state = 0;
+    /* put key (ino analog) to de */
+    deh->deh_dir_id = INODE_PKEY (inode)->k_dir_id;
+    deh->deh_objectid = INODE_PKEY (inode)->k_objectid;
+
+    /* copy name */
+    memcpy ((char *)(deh + 1), name, namelen);
+    /* padd by 0s to the 4 byte boundary */
+    padd_item ((char *)(deh + 1), ROUND_UP (namelen), namelen);
+
+    /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */
+    mark_de_without_sd (deh);
+    visible ? mark_de_visible (deh) : mark_de_hidden (deh);
+
+    /* find the proper place for the new entry */
+    memset (bit_string, 0, sizeof (bit_string));
+    de.de_gen_number_bit_string = (char *)bit_string;
+    if (reiserfs_find_entry (dir, name, namelen, &path, &de) == NAME_FOUND) {
+	if (buffer != small_buf)
+	    reiserfs_kfree (buffer, buflen, dir->i_sb);
+	pathrelse (&path);
+	return -EEXIST;
+    }
+
+    if (find_first_nonzero_bit (bit_string, MAX_GENERATION_NUMBER + 1) < MAX_GENERATION_NUMBER + 1) {
+	/* there are few names with given hash value */
+	gen_number = find_first_zero_bit (bit_string, MAX_GENERATION_NUMBER + 1);
+	if (gen_number > MAX_GENERATION_NUMBER) {
+	    /* there is no free generation number */
+	    reiserfs_warning ("reiserfs_add_entry: Congratulations! we have got hash function screwed up\n");
+	    if (buffer != small_buf)
+		reiserfs_kfree (buffer, buflen, dir->i_sb);
+	    pathrelse (&path);
+	    return -EHASHCOLLISION;//EBADSLT
+	}
+	/* adjust offset of directory enrty */
+	deh->deh_offset = cpu_to_le32 (SET_GENERATION_NUMBER (deh_offset (deh), gen_number));
+	set_cpu_key_k_offset (&entry_key, le32_to_cpu (deh->deh_offset));
+
+	/* find place for new entry */
+	if (search_by_entry_key (dir->i_sb, &entry_key, &path, &de) == NAME_FOUND) {
+	    reiserfs_warning ("vs-7032: reiserfs_add_entry: "
+			      "entry with this key (%k) already exists", &entry_key);
+	    if (buffer != small_buf)
+		reiserfs_kfree (buffer, buflen, dir->i_sb);
+	    pathrelse (&path);
+	    return -EHASHCOLLISION;
+	}
+    } else {
+	deh->deh_offset = cpu_to_le32 (SET_GENERATION_NUMBER (le32_to_cpu (deh->deh_offset), 0));
+	set_cpu_key_k_offset (&entry_key, le32_to_cpu (deh->deh_offset));
+    }
+  
+    /* perform the insertion of the entry that we have prepared */
+    retval = reiserfs_paste_into_item (th, &path, &entry_key, buffer, paste_size);
+    if (buffer != small_buf)
+	reiserfs_kfree (buffer, buflen, dir->i_sb);
+    if (retval) {
+	reiserfs_check_path(&path) ;
+	return retval;
+    }
+
+    dir->i_size += paste_size;
+    dir->i_blocks = ((dir->i_size + 511) >> 9);
+    dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+    if (!S_ISDIR (inode->i_mode) && visible)
+	// reiserfs_mkdir or reiserfs_rename will do that by itself
+	reiserfs_update_sd (th, dir);
+
+    reiserfs_check_path(&path) ;
+    return 0;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode)
+{
+    int retval;
+    struct inode * inode;
+    int windex ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 ;
+    struct reiserfs_transaction_handle th ;
+
+
+    inode = get_empty_inode() ;
+    if (!inode) {
+	return -ENOMEM ;
+    }
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    th.t_caller = "create" ;
+    windex = push_journal_writer("reiserfs_create") ;
+    inode = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode, &retval);
+    if (!inode) {
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	return retval;
+    }
+	
+    inode->i_op = &reiserfs_file_inode_operations;
+    inode->i_fop = &reiserfs_file_operations;
+    inode->i_mapping->a_ops = &reiserfs_address_space_operations ;
+
+    retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, 
+				inode, 1/*visible*/);
+    if (retval) {
+	inode->i_nlink--;
+	reiserfs_update_sd (&th, inode);
+	pop_journal_writer(windex) ;
+	// FIXME: should we put iput here and have stat data deleted
+	// in the same transactioin
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	iput (inode);
+	return retval;
+    }
+
+    d_instantiate(dentry, inode);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    return 0;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode, int rdev)
+{
+    int retval;
+    struct inode * inode;
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+
+    inode = get_empty_inode() ;
+    if (!inode) {
+	return -ENOMEM ;
+    }
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_mknod") ;
+
+    inode = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode, &retval);
+    if (!inode) {
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	return retval;
+    }
+
+    init_special_inode(inode, mode, rdev) ;
+
+    //FIXME: needed for block and char devices only
+    reiserfs_update_sd (&th, inode);
+
+    retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, 
+				 inode, 1/*visible*/);
+    if (retval) {
+	inode->i_nlink--;
+	reiserfs_update_sd (&th, inode);
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	iput (inode);
+	return retval;
+    }
+
+    d_instantiate(dentry, inode);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    return 0;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode)
+{
+    int retval;
+    struct inode * inode;
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+
+    inode = get_empty_inode() ;
+    if (!inode) {
+	return -ENOMEM ;
+    }
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_mkdir") ;
+
+    /* inc the link count now, so another writer doesn't overflow it while
+    ** we sleep later on.
+    */
+    INC_DIR_INODE_NLINK(dir)
+
+    mode = S_IFDIR | mode;
+    inode = reiserfs_new_inode (&th, dir, mode, 0/*symlink*/,
+				old_format_only (dir->i_sb) ? EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
+				dentry, inode, &retval);
+    if (!inode) {
+	pop_journal_writer(windex) ;
+	dir->i_nlink-- ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	return retval;
+    }
+
+    inode->i_op = &reiserfs_dir_inode_operations;
+    inode->i_fop = &reiserfs_dir_operations;
+
+    // note, _this_ add_entry will not update dir's stat data
+    retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, 
+				inode, 1/*visible*/);
+    if (retval) {
+	inode->i_nlink = 0;
+	DEC_DIR_INODE_NLINK(dir);
+	reiserfs_update_sd (&th, inode);
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	iput (inode);
+	return retval;
+    }
+
+    // the above add_entry did not update dir's stat data
+    reiserfs_update_sd (&th, dir);
+
+    d_instantiate(dentry, inode);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    return 0;
+}
+
+static inline int reiserfs_empty_dir(struct inode *inode) {
+    /* we can cheat because an old format dir cannot have
+    ** EMPTY_DIR_SIZE, and a new format dir cannot have
+    ** EMPTY_DIR_SIZE_V1.  So, if the inode is either size, 
+    ** regardless of disk format version, the directory is empty.
+    */
+    if (inode->i_size != EMPTY_DIR_SIZE &&
+        inode->i_size != EMPTY_DIR_SIZE_V1) {
+        return 0 ;
+    }
+    return 1 ;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_rmdir (struct inode * dir, struct dentry *dentry)
+{
+    int retval;
+    struct inode * inode;
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+    INITIALIZE_PATH (path);
+    struct reiserfs_dir_entry de;
+
+
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_rmdir") ;
+
+    de.de_gen_number_bit_string = 0;
+    if (reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path, &de) == NAME_NOT_FOUND) {
+	retval = -ENOENT;
+	goto end_rmdir;
+    }
+    inode = dentry->d_inode;
+
+    if (de.de_objectid != inode->i_ino) {
+	// FIXME: compare key of an object and a key found in the
+	// entry
+	retval = -EIO;
+	goto end_rmdir;
+    }
+    if (!reiserfs_empty_dir(inode)) {
+	retval = -ENOTEMPTY;
+	goto end_rmdir;
+    }
+
+    /* cut entry from dir directory */
+    retval = reiserfs_cut_from_item (&th, &path, &(de.de_entry_key), dir, 
+                                     NULL, /* page */ 
+				     0/*new file size - not used here*/);
+    if (retval < 0)
+	goto end_rmdir;
+
+    if ( inode->i_nlink != 2 && inode->i_nlink != 1 )
+	printk ("reiserfs_rmdir: empty directory has nlink != 2 (%d)\n", inode->i_nlink);
+
+    inode->i_nlink = 0;
+    inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+    reiserfs_update_sd (&th, inode);
+
+    DEC_DIR_INODE_NLINK(dir)
+    dir->i_size -= (DEH_SIZE + de.de_entrylen);
+    dir->i_blocks = ((dir->i_size + 511) >> 9);
+    reiserfs_update_sd (&th, dir);
+
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    reiserfs_check_path(&path) ;
+    return 0;
+	
+ end_rmdir:
+    /* we must release path, because we did not call
+       reiserfs_cut_from_item, or reiserfs_cut_from_item does not
+       release path if operation was not complete */
+    pathrelse (&path);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    return retval;	
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_unlink (struct inode * dir, struct dentry *dentry)
+{
+    int retval;
+    struct inode * inode;
+    struct reiserfs_dir_entry de;
+    INITIALIZE_PATH (path);
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_unlink") ;
+	
+    de.de_gen_number_bit_string = 0;
+    if (reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path, &de) == NAME_NOT_FOUND) {
+	retval = -ENOENT;
+	goto end_unlink;
+    }
+    inode = dentry->d_inode;
+
+    if (de.de_objectid != inode->i_ino) {
+	// FIXME: compare key of an object and a key found in the
+	// entry
+	retval = -EIO;
+	goto end_unlink;
+    }
+  
+    if (!inode->i_nlink) {
+	printk("reiserfs_unlink: deleting nonexistent file (%s:%lu), %d\n",
+	       kdevname(inode->i_dev), inode->i_ino, inode->i_nlink);
+	inode->i_nlink = 1;
+    }
+
+    retval = reiserfs_cut_from_item (&th, &path, &(de.de_entry_key), dir, NULL, 0);
+    if (retval < 0)
+	goto end_unlink;
+
+    inode->i_nlink--;
+    inode->i_ctime = CURRENT_TIME;
+    reiserfs_update_sd (&th, inode);
+
+    dir->i_size -= (de.de_entrylen + DEH_SIZE);
+    dir->i_blocks = ((dir->i_size + 511) >> 9);
+    dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+    reiserfs_update_sd (&th, dir);
+
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    reiserfs_check_path(&path) ;
+    return 0;
+
+ end_unlink:
+    pathrelse (&path);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    reiserfs_check_path(&path) ;
+    return retval;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_symlink (struct inode * dir, struct dentry * dentry, const char * symname)
+{
+    int retval;
+    struct inode * inode;
+    char * name;
+    int item_len;
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+
+
+    inode = get_empty_inode() ;
+    if (!inode) {
+	return -ENOMEM ;
+    }
+
+    item_len = ROUND_UP (strlen (symname));
+    if (item_len > MAX_ITEM_LEN (dir->i_sb->s_blocksize)) {
+	iput(inode) ;
+	return -ENAMETOOLONG;
+    }
+  
+    name = kmalloc (item_len, GFP_BUFFER);
+    if (!name) {
+	iput(inode) ;
+	return -ENOMEM;
+    }
+    memcpy (name, symname, strlen (symname));
+    padd_item (name, item_len, strlen (symname));
+
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_symlink") ;
+
+    inode = reiserfs_new_inode (&th, dir, S_IFLNK | S_IRWXUGO, name, strlen (symname), dentry,
+				inode, &retval);
+    kfree (name);
+    if (inode == 0) { /* reiserfs_new_inode iputs for us */
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	return retval;
+    }
+
+    inode->i_op = &page_symlink_inode_operations;
+    inode->i_mapping->a_ops = &reiserfs_address_space_operations;
+
+    // must be sure this inode is written with this transaction
+    //
+    //reiserfs_update_sd (&th, inode, READ_BLOCKS);
+
+    retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, 
+				 inode, 1/*visible*/);
+    if (retval) {
+	inode->i_nlink--;
+	reiserfs_update_sd (&th, inode);
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	iput (inode);
+	return retval;
+    }
+
+    d_instantiate(dentry, inode);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    return 0;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct dentry * dentry)
+{
+    int retval;
+    struct inode *inode = old_dentry->d_inode;
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+
+
+    if (S_ISDIR(inode->i_mode))
+	return -EPERM;
+  
+    if (inode->i_nlink >= REISERFS_LINK_MAX) {
+	//FIXME: sd_nlink is 32 bit for new files
+	return -EMLINK;
+    }
+
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_link") ;
+
+    /* create new entry */
+    retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len,
+				 inode, 1/*visible*/);
+    if (retval) {
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	return retval;
+    }
+
+    inode->i_nlink++;
+    inode->i_ctime = CURRENT_TIME;
+    reiserfs_update_sd (&th, inode);
+
+    atomic_inc(&inode->i_count) ;
+    d_instantiate(dentry, inode);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    return 0;
+}
+
+
+// de contains information pointing to an entry which 
+static int de_still_valid (const char * name, int len, struct reiserfs_dir_entry * de)
+{
+    struct reiserfs_dir_entry tmp = *de;
+    
+    // recalculate pointer to name and name length
+    set_de_name_and_namelen (&tmp);
+    // FIXME: could check more
+    if (tmp.de_namelen != len || memcmp (name, de->de_name, len))
+	return 0;
+    return 1;
+}
+
+
+static int entry_points_to_object (const char * name, int len, struct reiserfs_dir_entry * de, struct inode * inode)
+{
+    if (!de_still_valid (name, len, de))
+	return 0;
+
+    if (inode) {
+	if (!de_visible (de->de_deh + de->de_entry_num))
+	    reiserfs_panic (0, "vs-7042: entry_points_to_object: entry must be visible");
+	return (de->de_objectid == inode->i_ino) ? 1 : 0;
+    }
+
+    /* this must be added hidden entry */
+    if (de_visible (de->de_deh + de->de_entry_num))
+	reiserfs_panic (0, "vs-7043: entry_points_to_object: entry must be visible");
+
+    return 1;
+}
+
+
+/* sets key of objectid the entry has to point to */
+static void set_ino_in_dir_entry (struct reiserfs_dir_entry * de, struct key * key)
+{
+    de->de_deh[de->de_entry_num].deh_dir_id = key->k_dir_id;
+    de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+
+/* 
+ * process, that is going to call fix_nodes/do_balance must hold only
+ * one path. If it holds 2 or more, it can get into endless waiting in
+ * get_empty_nodes or its clones 
+ */
+int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry,
+		     struct inode * new_dir, struct dentry *new_dentry)
+{
+    int retval;
+    INITIALIZE_PATH (old_entry_path);
+    INITIALIZE_PATH (new_entry_path);
+    INITIALIZE_PATH (dot_dot_entry_path);
+    struct item_head new_entry_ih, old_entry_ih ;
+    struct reiserfs_dir_entry old_de, new_de, dot_dot_de;
+    struct inode * old_inode, * new_inode;
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+
+
+    old_inode = old_dentry->d_inode;
+    new_inode = new_dentry->d_inode;
+
+    // make sure, that oldname still exists and points to an object we
+    // are going to rename
+    old_de.de_gen_number_bit_string = 0;
+    retval = reiserfs_find_entry (old_dir, old_dentry->d_name.name, old_dentry->d_name.len,
+				  &old_entry_path, &old_de);
+    pathrelse (&old_entry_path);
+    if (retval != NAME_FOUND || old_de.de_objectid != old_inode->i_ino) {
+	// FIXME: IO error is possible here
+	return -ENOENT;
+    }
+
+    if (S_ISDIR(old_inode->i_mode)) {
+	// make sure, that directory being renamed has correct ".." 
+	// and that its new parent directory has not too many links
+	// already
+
+	if (new_inode) {
+	    if (!reiserfs_empty_dir(new_inode)) {
+		return -ENOTEMPTY;
+	    }
+	}
+	
+	/* directory is renamed, its parent directory will be changed, 
+	** so find ".." entry 
+	*/
+	dot_dot_de.de_gen_number_bit_string = 0;
+	retval = reiserfs_find_entry (old_inode, "..", 2, &dot_dot_entry_path, &dot_dot_de);
+	pathrelse (&dot_dot_entry_path);
+	if (retval != NAME_FOUND)
+	    return -EIO;
+
+	/* inode number of .. must equal old_dir->i_ino */
+	if (dot_dot_de.de_objectid != old_dir->i_ino)
+	    return -EIO;
+    }
+
+    journal_begin(&th, old_dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_rename") ;
+
+    /* add new entry (or find the existing one) */
+    retval = reiserfs_add_entry (&th, new_dir, new_dentry->d_name.name, new_dentry->d_name.len, 
+				 old_inode, 0);
+    if (retval == -EEXIST) {
+	// FIXME: is it possible, that new_inode == 0 here? If yes, it
+	// is not clear how does ext2 handle that
+	if (!new_inode) {
+	    printk ("reiserfs_rename: new entry is found, new inode == 0\n");
+	    BUG ();
+	}
+    } else if (retval) {
+	pop_journal_writer(windex) ;
+	journal_end(&th, old_dir->i_sb, jbegin_count) ;
+	return retval;
+    }
+
+
+    while (1) {
+	// look for old name using corresponding entry key (found by reiserfs_find_entry)
+	if (search_by_entry_key (new_dir->i_sb, &old_de.de_entry_key, &old_entry_path, &old_de) != NAME_FOUND)
+	    BUG ();
+
+	copy_item_head(&old_entry_ih, get_ih(&old_entry_path)) ;
+
+	// look for new name by reiserfs_find_entry
+	new_de.de_gen_number_bit_string = 0;
+	retval = reiserfs_find_entry (new_dir, new_dentry->d_name.name, new_dentry->d_name.len, 
+				      &new_entry_path, &new_de);
+	if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND)
+	    BUG ();
+
+	copy_item_head(&new_entry_ih, get_ih(&new_entry_path)) ;
+
+	reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1) ;
+
+	if (S_ISDIR(old_inode->i_mode)) {
+	    if (search_by_entry_key (new_dir->i_sb, &dot_dot_de.de_entry_key, &dot_dot_entry_path, &dot_dot_de) != NAME_FOUND)
+		BUG ();
+	    // node containing ".." gets into transaction
+	    reiserfs_prepare_for_journal(old_inode->i_sb, dot_dot_de.de_bh, 1) ;
+	}
+				/* we should check seals here, not do
+                                   this stuff, yes? Then, having
+                                   gathered everything into RAM we
+                                   should lock the buffers, yes?  -Hans */
+				/* probably.  our rename needs to hold more 
+				** than one path at once.  The seals would 
+				** have to be written to deal with multi-path 
+				** issues -chris
+				*/
+	/* sanity checking before doing the rename - avoid races many
+	** of the above checks could have scheduled.  We have to be
+	** sure our items haven't been shifted by another process.
+	*/
+	if (!entry_points_to_object(new_dentry->d_name.name, 
+	                            new_dentry->d_name.len,
+				    &new_de, new_inode) ||
+	    item_moved(&new_entry_ih, &new_entry_path) ||
+	    item_moved(&old_entry_ih, &old_entry_path) || 
+	    !entry_points_to_object (old_dentry->d_name.name, 
+	                             old_dentry->d_name.len,
+				     &old_de, old_inode)) {
+	    reiserfs_restore_prepared_buffer (old_inode->i_sb, new_de.de_bh);
+	    if (S_ISDIR(old_inode->i_mode))
+		reiserfs_restore_prepared_buffer (old_inode->i_sb, dot_dot_de.de_bh);
+#if 0
+	    // FIXME: do we need this? shouldn't we simply continue?
+	    run_task_queue(&tq_disk);
+	    current->policy |= SCHED_YIELD;
+	    /*current->counter = 0;*/
+	    schedule();
+#endif
+	    continue;
+	}
+
+#ifdef CONFIG_REISERFS_CHECK
+	if (S_ISDIR(old_inode->i_mode) && 
+	    (!entry_points_to_object ("..", 2, &dot_dot_de, old_dir) || 
+	     !reiserfs_buffer_prepared(dot_dot_de.de_bh))) {
+	    // this should be not changed
+	    BUG ();
+	}
+#endif	
+
+	break;
+    }
+
+    /* ok, all the changes can be done in one fell swoop when we
+       have claimed all the buffers needed.*/
+    
+    mark_de_visible (new_de.de_deh + new_de.de_entry_num);
+    set_ino_in_dir_entry (&new_de, INODE_PKEY (old_inode));
+    journal_mark_dirty (&th, old_dir->i_sb, new_de.de_bh);
+
+    mark_de_hidden (old_de.de_deh + old_de.de_entry_num);
+    old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+    new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME;
+
+    if (new_inode) {
+	// adjust link number of the victim
+	if (S_ISDIR(new_inode->i_mode)) {
+	  DEC_DIR_INODE_NLINK(new_inode)
+	} else {
+	  new_inode->i_nlink--;
+	}
+	new_inode->i_ctime = CURRENT_TIME;
+    }
+
+    if (S_ISDIR(old_inode->i_mode)) {
+      //if (dot_dot_de.de_bh) {
+	// adjust ".." of renamed directory
+	set_ino_in_dir_entry (&dot_dot_de, INODE_PKEY (new_dir));
+	journal_mark_dirty (&th, new_dir->i_sb, dot_dot_de.de_bh);
+
+	DEC_DIR_INODE_NLINK(old_dir)
+	if (new_inode) {
+	    if (S_ISDIR(new_inode->i_mode)) {
+		DEC_DIR_INODE_NLINK(new_inode)
+	    } else {
+	        new_inode->i_nlink--;
+	    }
+	} else {
+	    INC_DIR_INODE_NLINK(new_dir)
+	}
+    }
+
+    // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse
+    pathrelse (&new_entry_path);
+    pathrelse (&dot_dot_entry_path);
+
+    // FIXME: this reiserfs_cut_from_item's return value may screw up
+    // anybody, but it will panic if will not be able to find the
+    // entry. This needs one more clean up
+    if (reiserfs_cut_from_item (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, 0) < 0)
+	reiserfs_warning ("vs-: reiserfs_rename: coudl not cut old name. Fsck later?\n");
+
+    old_dir->i_size -= DEH_SIZE + old_de.de_entrylen;
+    old_dir->i_blocks = ((old_dir->i_size + 511) >> 9);
+
+    reiserfs_update_sd (&th, old_dir);
+    reiserfs_update_sd (&th, new_dir);
+    if (new_inode)
+	reiserfs_update_sd (&th, new_inode);
+
+    pop_journal_writer(windex) ;
+    journal_end(&th, old_dir->i_sb, jbegin_count) ;
+    return 0;
+}
+

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)