From 7344e72a912d555414c50dbc9d8ffa296419f016 Mon Sep 17 00:00:00 2001 From: dumbbell Date: Tue, 24 May 2005 12:24:45 +0000 Subject: [PATCH] Import of ReiserFS filesystem support (currently limited to read-only on i386). Source code is under the GNU GPL license. Approved by: mux (mentor) --- sys/gnu/reiserfs/README | 163 ++++ sys/gnu/reiserfs/reiserfs_fs.h | 1298 ++++++++++++++++++++++++++ sys/gnu/reiserfs/reiserfs_fs_i.h | 90 ++ sys/gnu/reiserfs/reiserfs_fs_sb.h | 143 +++ sys/gnu/reiserfs/reiserfs_hashes.c | 217 +++++ sys/gnu/reiserfs/reiserfs_inode.c | 926 ++++++++++++++++++ sys/gnu/reiserfs/reiserfs_item_ops.c | 158 ++++ sys/gnu/reiserfs/reiserfs_mount.h | 47 + sys/gnu/reiserfs/reiserfs_namei.c | 699 ++++++++++++++ sys/gnu/reiserfs/reiserfs_prints.c | 307 ++++++ sys/gnu/reiserfs/reiserfs_stree.c | 760 +++++++++++++++ sys/gnu/reiserfs/reiserfs_vfsops.c | 1186 +++++++++++++++++++++++ sys/gnu/reiserfs/reiserfs_vnops.c | 353 +++++++ 13 files changed, 6347 insertions(+) create mode 100644 sys/gnu/reiserfs/README create mode 100644 sys/gnu/reiserfs/reiserfs_fs.h create mode 100644 sys/gnu/reiserfs/reiserfs_fs_i.h create mode 100644 sys/gnu/reiserfs/reiserfs_fs_sb.h create mode 100644 sys/gnu/reiserfs/reiserfs_hashes.c create mode 100644 sys/gnu/reiserfs/reiserfs_inode.c create mode 100644 sys/gnu/reiserfs/reiserfs_item_ops.c create mode 100644 sys/gnu/reiserfs/reiserfs_mount.h create mode 100644 sys/gnu/reiserfs/reiserfs_namei.c create mode 100644 sys/gnu/reiserfs/reiserfs_prints.c create mode 100644 sys/gnu/reiserfs/reiserfs_stree.c create mode 100644 sys/gnu/reiserfs/reiserfs_vfsops.c create mode 100644 sys/gnu/reiserfs/reiserfs_vnops.c diff --git a/sys/gnu/reiserfs/README b/sys/gnu/reiserfs/README new file mode 100644 index 000000000000..f3917c1be08a --- /dev/null +++ b/sys/gnu/reiserfs/README @@ -0,0 +1,163 @@ +$FreeBSD$ + +[LICENSING] + +ReiserFS is hereby licensed under the GNU General +Public License version 2. + +Source code files that contain the phrase "licensing governed by +reiserfs/README" are "governed files" throughout this file. Governed +files are licensed under the GPL. The portions of them owned by Hans +Reiser, or authorized to be licensed by him, have been in the past, +and likely will be in the future, licensed to other parties under +other licenses. If you add your code to governed files, and don't +want it to be owned by Hans Reiser, put your copyright label on that +code so the poor blight and his customers can keep things straight. +All portions of governed files not labeled otherwise are owned by Hans +Reiser, and by adding your code to it, widely distributing it to +others or sending us a patch, and leaving the sentence in stating that +licensing is governed by the statement in this file, you accept this. +It will be a kindness if you identify whether Hans Reiser is allowed +to license code labeled as owned by you on your behalf other than +under the GPL, because he wants to know if it is okay to do so and put +a check in the mail to you (for non-trivial improvements) when he +makes his next sale. He makes no guarantees as to the amount if any, +though he feels motivated to motivate contributors, and you can surely +discuss this with him before or after contributing. You have the +right to decline to allow him to license your code contribution other +than under the GPL. + +Further licensing options are available for commercial and/or other +interests directly from Hans Reiser: hans@reiser.to. If you interpret +the GPL as not allowing those additional licensing options, you read +it wrongly, and Richard Stallman agrees with me, when carefully read +you can see that those restrictions on additional terms do not apply +to the owner of the copyright, and my interpretation of this shall +govern for this license. + +Finally, nothing in this license shall be interpreted to allow you to +fail to fairly credit me, or to remove my credits, without my +permission, unless you are an end user not redistributing to others. +If you have doubts about how to properly do that, or about what is +fair, ask. (Last I spoke with him Richard was contemplating how best +to address the fair crediting issue in the next GPL version.) + +[END LICENSING] + +Reiserfs is a file system based on balanced tree algorithms, which is +described at http://devlinux.com/namesys. + +Stop reading here. Go there, then return. + +Send bug reports to yura@namesys.botik.ru. + +mkreiserfs and other utilities are in reiserfs/utils, or wherever your +Linux provider put them. There is some disagreement about how useful +it is for users to get their fsck and mkreiserfs out of sync with the +version of reiserfs that is in their kernel, with many important +distributors wanting them out of sync.:-) Please try to remember to +recompile and reinstall fsck and mkreiserfs with every update of +reiserfs, this is a common source of confusion. Note that some of the +utilities cannot be compiled without accessing the balancing code +which is in the kernel code, and relocating the utilities may require +you to specify where that code can be found. + +Yes, if you update your reiserfs kernel module you do have to +recompile your kernel, most of the time. The errors you get will be +quite cryptic if your forget to do so. + +Real users, as opposed to folks who want to hack and then understand +what went wrong, will want REISERFS_CHECK off. + +Hideous Commercial Pitch: Spread your development costs across other OS +vendors. Select from the best in the world, not the best in your +building, by buying from third party OS component suppliers. Leverage +the software component development power of the internet. Be the most +aggressive in taking advantage of the commercial possibilities of +decentralized internet development, and add value through your branded +integration that you sell as an operating system. Let your competitors +be the ones to compete against the entire internet by themselves. Be +hip, get with the new economic trend, before your competitors do. Send +email to hans@reiser.to. + +To understand the code, after reading the website, start reading the +code by reading reiserfs_fs.h first. + +Hans Reiser was the project initiator, primary architect, source of all +funding for the first 5.5 years, and one of the programmers. He owns +the copyright. + +Vladimir Saveljev was one of the programmers, and he worked long hours +writing the cleanest code. He always made the effort to be the best he +could be, and to make his code the best that it could be. What resulted +was quite remarkable. I don't think that money can ever motivate someone +to work the way he did, he is one of the most selfless men I know. + +Yura helps with benchmarking, coding hashes, and block pre-allocation +code. + +Anatoly Pinchuk is a former member of our team who worked closely with +Vladimir throughout the project's development. He wrote a quite +substantial portion of the total code. He realized that there was a +space problem with packing tails of files for files larger than a node +that start on a node aligned boundary (there are reasons to want to node +align files), and he invented and implemented indirect items and +unformatted nodes as the solution. + +Konstantin Shvachko, with the help of the Russian version of a VC, +tried to put me in a position where I was forced into giving control +of the project to him. (Fortunately, as the person paying the money +for all salaries from my dayjob I owned all copyrights, and you can't +really force takeovers of sole proprietorships.) This was something +curious, because he never really understood the value of our project, +why we should do what we do, or why innovation was possible in +general, but he was sure that he ought to be controlling it. Every +innovation had to be forced past him while he was with us. He added +two years to the time required to complete reiserfs, and was a net +loss for me. Mikhail Gilula was a brilliant innovator who also left +in a destructive way that erased the value of his contributions, and +that he was shown much generosity just makes it more painful. + +Grigory Zaigralin was an extremely effective system administrator for +our group. + +Igor Krasheninnikov was wonderful at hardware procurement, repair, and +network installation. + +Jeremy Fitzhardinge wrote the teahash.c code, and he gives credit to a +textbook he got the algorithm from in the code. Note that his analysis +of how we could use the hashing code in making 32 bit NFS cookies work +was probably more important than the actual algorithm. Colin Plumb also +contributed to it. + +Chris Mason dived right into our code, and in just a few months produced +the journaling code that dramatically increased the value of ReiserFS. +He is just an amazing programmer. + +Igor Zagorovsky is writing much of the new item handler and extent code +for our next major release. + +Alexander Zarochentcev (sometimes known as zam, or sasha), wrote the +resizer, and is hard at work on implementing allocate on flush. SGI +implemented allocate on flush before us for XFS, and generously took +the time to convince me we should do it also. They are great people, +and a great company. + +Yuri Shevchuk and Nikita Danilov are doing squid cache optimization. + +Vitaly Fertman is doing fsck. + +Jeff Mahoney, of SuSE, contributed a few cleanup fixes, most notably +the endian safe patches which allow ReiserFS to run on any platform +supported by the Linux kernel. + +SuSE, IntegratedLinux.com, Ecila, MP3.com, bigstorage.com, and the +Alpha PC Company made it possible for me to not have a day job +anymore, and to dramatically increase our staffing. Ecila funded +hypertext feature development, MP3.com funded journaling, SuSE funded +core development, IntegratedLinux.com funded squid web cache +appliances, bigstorage.com funded HSM, and the alpha PC company funded +the alpha port. Many of these tasks were helped by sponsors other +than the ones just named. SuSE has helped in much more than just +funding.... + diff --git a/sys/gnu/reiserfs/reiserfs_fs.h b/sys/gnu/reiserfs/reiserfs_fs.h new file mode 100644 index 000000000000..e33c04eda0b5 --- /dev/null +++ b/sys/gnu/reiserfs/reiserfs_fs.h @@ -0,0 +1,1298 @@ +/*- + * Copyright 2000 Hans Reiser + * See README for licensing and copyright details + * + * Ported to FreeBSD by Jean-Sébastien Pédron + * + * $FreeBSD$ + */ + +#ifndef _GNU_REISERFS_REISERFS_FS_H +#define _GNU_REISERFS_REISERFS_FS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +//#include + +#include +#include + +#include +#include + +#ifdef __i386__ +# include +#else +# include +#endif + +#include +#include +#include + +/* n must be power of 2 */ +#define _ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u)) + +/* To be ok for alpha and others we have to align structures to 8 byte + * boundary. */ +#define ROUND_UP(x) _ROUND_UP(x, 8LL) + +/* ------------------------------------------------------------------- + * Global variables + * -------------------------------------------------------------------*/ + +extern struct vop_vector reiserfs_vnodeops; +extern struct vop_vector reiserfs_specops; + +/* ------------------------------------------------------------------- + * Super block + * -------------------------------------------------------------------*/ + +#define REISERFS_BSIZE 1024 + +/* ReiserFS leaves the first 64k unused, so that partition labels have + * enough space. If someone wants to write a fancy bootloader that needs + * more than 64k, let us know, and this will be increased in size. + * This number must be larger than than the largest block size on any + * platform, or code will break. -Hans */ +#define REISERFS_DISK_OFFSET 64 +#define REISERFS_DISK_OFFSET_IN_BYTES \ + ((REISERFS_DISK_OFFSET) * (REISERFS_BSIZE)) + +/* The spot for the super in versions 3.5 - 3.5.10 (inclusive) */ +#define REISERFS_OLD_DISK_OFFSET 8 +#define REISERFS_OLD_DISK_OFFSET_IN_BYTES \ + ((REISERFS_OLD_DISK_OFFSET) * (REISERFS_BSIZE)) + +/* + * Structure of a super block on disk, a version of which in RAM is + * often accessed as REISERFS_SB(s)->r_rs. The version in RAM is part of + * a larger structure containing fields never written to disk. + */ + +#define UNSET_HASH 0 /* read_super will guess about, what hash names + in directories were sorted with */ +#define TEA_HASH 1 +#define YURA_HASH 2 +#define R5_HASH 3 +#define DEFAULT_HASH R5_HASH + +struct journal_params { + uint32_t jp_journal_1st_block; /* Where does journal start + from on its device */ + uint32_t jp_journal_dev; /* Journal device st_rdev */ + uint32_t jp_journal_size; /* Size of the journal */ + uint32_t jp_journal_trans_max; /* Max number of blocks in + a transaction */ + uint32_t jp_journal_magic; /* Random value made on + fs creation (this was + sb_journal_block_count) */ + uint32_t jp_journal_max_batch; /* Max number of blocks to + batch into a + transaction */ + uint32_t jp_journal_max_commit_age; /* In seconds, how old can + an async commit be */ + uint32_t jp_journal_max_trans_age; /* In seconds, how old a + transaction be */ +}; + +struct reiserfs_super_block_v1 { + uint32_t s_block_count; /* Blocks count */ + uint32_t s_free_blocks; /* Free blocks count */ + uint32_t s_root_block; /* Root block number */ + + struct journal_params s_journal; + + uint16_t s_blocksize; + uint16_t s_oid_maxsize; + uint16_t s_oid_cursize; + uint16_t s_umount_state; + + char s_magic[10]; + + uint16_t s_fs_state; + uint32_t s_hash_function_code; + uint16_t s_tree_height; + uint16_t s_bmap_nr; + uint16_t s_version; + uint16_t s_reserved_for_journal; +} __packed; + +#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) + +struct reiserfs_super_block { + struct reiserfs_super_block_v1 s_v1; + uint32_t s_inode_generation; + uint32_t s_flags; + unsigned char s_uuid[16]; + unsigned char s_label[16]; + char s_unused[88]; +} __packed; + +#define SB_SIZE (sizeof(struct reiserfs_super_block)) + +#define REISERFS_VERSION_1 0 +#define REISERFS_VERSION_2 2 + +#define REISERFS_SB(sbi) (sbi) +#define SB_DISK_SUPER_BLOCK(sbi) (REISERFS_SB(sbi)->s_rs) +#define SB_V1_DISK_SUPER_BLOCK(sbi) (&(SB_DISK_SUPER_BLOCK(sbi)->s_v1)) + +#define SB_BLOCKSIZE(sbi) \ + le32toh((SB_V1_DISK_SUPER_BLOCK(sbi)->s_blocksize)) +#define SB_BLOCK_COUNT(sbi) \ + le32toh((SB_V1_DISK_SUPER_BLOCK(sbi)->s_block_count)) +#define SB_FREE_BLOCKS(s) \ + le32toh((SB_V1_DISK_SUPER_BLOCK(sbi)->s_free_blocks)) + +#define SB_REISERFS_MAGIC(sbi) \ + (SB_V1_DISK_SUPER_BLOCK(sbi)->s_magic) + +#define SB_ROOT_BLOCK(sbi) \ + le32toh((SB_V1_DISK_SUPER_BLOCK(sbi)->s_root_block)) + +#define SB_TREE_HEIGHT(sbi) \ + le16toh((SB_V1_DISK_SUPER_BLOCK(sbi)->s_tree_height)) + +#define SB_REISERFS_STATE(sbi) \ + le16toh((SB_V1_DISK_SUPER_BLOCK(sbi)->s_umount_state)) + +#define SB_VERSION(sbi) le16toh((SB_V1_DISK_SUPER_BLOCK(sbi)->s_version)) +#define SB_BMAP_NR(sbi) le16toh((SB_V1_DISK_SUPER_BLOCK(sbi)->s_bmap_nr)) + +#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" +#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" +#define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" + +extern const char reiserfs_3_5_magic_string[]; +extern const char reiserfs_3_6_magic_string[]; +extern const char reiserfs_jr_magic_string[]; + +int is_reiserfs_3_5(struct reiserfs_super_block *rs); +int is_reiserfs_3_6(struct reiserfs_super_block *rs); +int is_reiserfs_jr(struct reiserfs_super_block *rs); + +/* ReiserFS internal error code (used by search_by_key and fix_nodes) */ +#define IO_ERROR -2 + +typedef uint32_t b_blocknr_t; +typedef uint32_t unp_t; + +struct unfm_nodeinfo { + unp_t unfm_nodenum; + unsigned short unfm_freespace; +}; + +/* There are two formats of keys: 3.5 and 3.6 */ +#define KEY_FORMAT_3_5 0 +#define KEY_FORMAT_3_6 1 + +/* There are two stat datas */ +#define STAT_DATA_V1 0 +#define STAT_DATA_V2 1 + +#define REISERFS_I(ip) (ip) + +#define get_inode_item_key_version(ip) \ + ((REISERFS_I(ip)->i_flags & i_item_key_version_mask) ? \ + KEY_FORMAT_3_6 : KEY_FORMAT_3_5) + +#define set_inode_item_key_version(ip, version) ({ \ + if ((version) == KEY_FORMAT_3_6) \ + REISERFS_I(ip)->i_flags |= i_item_key_version_mask; \ + else \ + REISERFS_I(ip)->i_flags &= ~i_item_key_version_mask; \ +}) + +#define get_inode_sd_version(ip) \ + ((REISERFS_I(ip)->i_flags & i_stat_data_version_mask) ? \ + STAT_DATA_V2 : STAT_DATA_V1) + +#define set_inode_sd_version(inode, version) ({ \ + if((version) == STAT_DATA_V2) \ + REISERFS_I(ip)->i_flags |= i_stat_data_version_mask; \ + else \ + REISERFS_I(ip)->i_flags &= ~i_stat_data_version_mask; \ +}) + +/* Values for s_umount_state field */ +#define REISERFS_VALID_FS 1 +#define REISERFS_ERROR_FS 2 + +/* There are 5 item types currently */ +#define TYPE_STAT_DATA 0 +#define TYPE_INDIRECT 1 +#define TYPE_DIRECT 2 +#define TYPE_DIRENTRY 3 +#define TYPE_MAXTYPE 3 +#define TYPE_ANY 15 + +/* ------------------------------------------------------------------- + * Key & item head + * -------------------------------------------------------------------*/ + +struct offset_v1 { + uint32_t k_offset; + uint32_t k_uniqueness; +} __packed; + +struct offset_v2 { +#if BYTE_ORDER == LITTLE_ENDIAN + /* little endian version */ + uint64_t k_offset:60; + uint64_t k_type:4; +#else + /* big endian version */ + uint64_t k_type:4; + uint64_t k_offset:60; +#endif +} __packed; + +#if (BYTE_ORDER == BIG_ENDIAN) +typedef union { + struct offset_v2 offset_v2; + uint64_t linear; +} __packed offset_v2_esafe_overlay; + +static inline uint16_t +offset_v2_k_type(const struct offset_v2 *v2) +{ + + offset_v2_esafe_overlay tmp = *(const offset_v2_esafe_overlay *)v2; + tmp.linear = le64toh(tmp.linear); + return ((tmp.offset_v2.k_type <= TYPE_MAXTYPE) ? + tmp.offset_v2.k_type : TYPE_ANY); +} + +static inline void +set_offset_v2_k_type(struct offset_v2 *v2, int type) +{ + + offset_v2_esafe_overlay *tmp = (offset_v2_esafe_overlay *)v2; + tmp->linear = le64toh(tmp->linear); + tmp->offset_v2.k_type = type; + tmp->linear = htole64(tmp->linear); +} + +static inline off_t +offset_v2_k_offset(const struct offset_v2 *v2) +{ + + offset_v2_esafe_overlay tmp = *(const offset_v2_esafe_overlay *)v2; + tmp.linear = le64toh(tmp.linear); + return (tmp.offset_v2.k_offset); +} + +static inline void +set_offset_v2_k_offset(struct offset_v2 *v2, off_t offset) +{ + + offset_v2_esafe_overlay *tmp = (offset_v2_esafe_overlay *)v2; + tmp->linear = le64toh(tmp->linear); + tmp->offset_v2.k_offset = offset; + tmp->linear = htole64(tmp->linear); +} +#else /* BYTE_ORDER != BIG_ENDIAN */ +#define offset_v2_k_type(v2) ((v2)->k_type) +#define set_offset_v2_k_type(v2, val) (offset_v2_k_type(v2) = (val)) +#define offset_v2_k_offset(v2) ((v2)->k_offset) +#define set_offset_v2_k_offset(v2, val) (offset_v2_k_offset(v2) = (val)) +#endif /* BYTE_ORDER == BIG_ENDIAN */ + +/* + * Key of an item determines its location in the S+tree, and + * is composed of 4 components + */ +struct key { + uint32_t k_dir_id; /* Packing locality: by default parent + directory object id */ + uint32_t k_objectid; /* Object identifier */ + union { + struct offset_v1 k_offset_v1; + struct offset_v2 k_offset_v2; + } __packed u; +} __packed; + +struct cpu_key { + struct key on_disk_key; + int version; + int key_length; /* 3 in all cases but direct2indirect + and indirect2direct conversion */ +}; + +/* + * Our function for comparing keys can compare keys of different + * lengths. It takes as a parameter the length of the keys it is to + * compare. These defines are used in determining what is to be passed + * to it as that parameter. + */ +#define REISERFS_FULL_KEY_LEN 4 +#define REISERFS_SHORT_KEY_LEN 2 + +#define KEY_SIZE (sizeof(struct key)) +#define SHORT_KEY_SIZE (sizeof(uint32_t) + sizeof(uint32_t)) + +/* Return values for search_by_key and clones */ +#define ITEM_FOUND 1 +#define ITEM_NOT_FOUND 0 +#define ENTRY_FOUND 1 +#define ENTRY_NOT_FOUND 0 +#define DIRECTORY_NOT_FOUND -1 +#define REGULAR_FILE_FOUND -2 +#define DIRECTORY_FOUND -3 +#define BYTE_FOUND 1 +#define BYTE_NOT_FOUND 0 +#define FILE_NOT_FOUND -1 + +#define POSITION_FOUND 1 +#define POSITION_NOT_FOUND 0 + +/* Return values for reiserfs_find_entry and search_by_entry_key */ +#define NAME_FOUND 1 +#define NAME_NOT_FOUND 0 +#define GOTO_PREVIOUS_ITEM 2 +#define NAME_FOUND_INVISIBLE 3 + +/* + * Everything in the filesystem is stored as a set of items. The item + * head contains the key of the item, its free space (for indirect + * items) and specifies the location of the item itself within the + * block. + */ +struct item_head { + /* + * Everything in the tree is found by searching for it based on + * its key. + */ + struct key ih_key; + union { + /* + * The free space in the last unformatted node of an + * indirect item if this is an indirect item. This + * equals 0xFFFF iff this is a direct item or stat data + * item. Note that the key, not this field, is used to + * determine the item type, and thus which field this + * union contains. + */ + uint16_t ih_free_space_reserved; + + /* + * If this is a directory item, this field equals the number of + * directory entries in the directory item. + */ + uint16_t ih_entry_count; + } __packed u; + uint16_t ih_item_len; /* Total size of the item body */ + uint16_t ih_item_location; /* An offset to the item body within + the block */ + uint16_t ih_version; /* 0 for all old items, 2 for new + ones. Highest bit is set by fsck + temporary, cleaned after all + done */ +} __packed; + +/* Size of item header */ +#define IH_SIZE (sizeof(struct item_head)) + +#define ih_free_space(ih) le16toh((ih)->u.ih_free_space_reserved) +#define ih_version(ih) le16toh((ih)->ih_version) +#define ih_entry_count(ih) le16toh((ih)->u.ih_entry_count) +#define ih_location(ih) le16toh((ih)->ih_item_location) +#define ih_item_len(ih) le16toh((ih)->ih_item_len) + +/* + * These operate on indirect items, where you've got an array of ints at + * a possibly unaligned location. These are a noop on IA32. + * + * p is the array of uint32_t, i is the index into the array, v is the + * value to store there. + */ +#define get_unaligned(ptr) \ + ({ __typeof__(*(ptr)) __tmp; \ + memcpy(&__tmp, (ptr), sizeof(*(ptr))); __tmp; }) + +#define put_unaligned(val, ptr) \ + ({ __typeof__(*(ptr)) __tmp = (val); \ + memcpy((ptr), &__tmp, sizeof(*(ptr))); \ + (void)0; }) + +#define get_block_num(p, i) le32toh(get_unaligned((p) + (i))) +#define put_block_num(p, i, v) put_unaligned(htole32(v), (p) + (i)) + +/* In old version uniqueness field shows key type */ +#define V1_SD_UNIQUENESS 0 +#define V1_INDIRECT_UNIQUENESS 0xfffffffe +#define V1_DIRECT_UNIQUENESS 0xffffffff +#define V1_DIRENTRY_UNIQUENESS 500 +#define V1_ANY_UNIQUENESS 555 + +/* Here are conversion routines */ +static inline int uniqueness2type(uint32_t uniqueness); +static inline uint32_t type2uniqueness(int type); + +static inline int +uniqueness2type(uint32_t uniqueness) +{ + + switch ((int)uniqueness) { + case V1_SD_UNIQUENESS: + return (TYPE_STAT_DATA); + case V1_INDIRECT_UNIQUENESS: + return (TYPE_INDIRECT); + case V1_DIRECT_UNIQUENESS: + return (TYPE_DIRECT); + case V1_DIRENTRY_UNIQUENESS: + return (TYPE_DIRENTRY); + default: + log(LOG_NOTICE, "reiserfs: unknown uniqueness (%u)\n", + uniqueness); + case V1_ANY_UNIQUENESS: + return (TYPE_ANY); + } +} + +static inline uint32_t +type2uniqueness(int type) +{ + + switch (type) { + case TYPE_STAT_DATA: + return (V1_SD_UNIQUENESS); + case TYPE_INDIRECT: + return (V1_INDIRECT_UNIQUENESS); + case TYPE_DIRECT: + return (V1_DIRECT_UNIQUENESS); + case TYPE_DIRENTRY: + return (V1_DIRENTRY_UNIQUENESS); + default: + log(LOG_NOTICE, "reiserfs: unknown type (%u)\n", type); + case TYPE_ANY: + return (V1_ANY_UNIQUENESS); + } +} + +/* + * Key is pointer to on disk key which is stored in le, result is cpu, + * there is no way to get version of object from key, so, provide + * version to these defines. + */ +static inline off_t +le_key_k_offset(int version, const struct key *key) +{ + + return ((version == KEY_FORMAT_3_5) ? + le32toh(key->u.k_offset_v1.k_offset) : + offset_v2_k_offset(&(key->u.k_offset_v2))); +} + +static inline off_t +le_ih_k_offset(const struct item_head *ih) +{ + + return (le_key_k_offset(ih_version(ih), &(ih->ih_key))); +} + +static inline off_t +le_key_k_type(int version, const struct key *key) +{ + + return ((version == KEY_FORMAT_3_5) ? + uniqueness2type(le32toh(key->u.k_offset_v1.k_uniqueness)) : + offset_v2_k_type(&(key->u.k_offset_v2))); +} + +static inline off_t +le_ih_k_type(const struct item_head *ih) +{ + return (le_key_k_type(ih_version(ih), &(ih->ih_key))); +} + +static inline void +set_le_key_k_offset(int version, struct key *key, off_t offset) +{ + + (version == KEY_FORMAT_3_5) ? + (key->u.k_offset_v1.k_offset = htole32(offset)) : + (set_offset_v2_k_offset(&(key->u.k_offset_v2), offset)); +} + +static inline void +set_le_ih_k_offset(struct item_head *ih, off_t offset) +{ + + set_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset); +} + +static inline void +set_le_key_k_type(int version, struct key *key, int type) +{ + + (version == KEY_FORMAT_3_5) ? + (key->u.k_offset_v1.k_uniqueness = + htole32(type2uniqueness(type))) : + (set_offset_v2_k_type(&(key->u.k_offset_v2), type)); +} + +static inline void +set_le_ih_k_type(struct item_head *ih, int type) +{ + + set_le_key_k_type(ih_version(ih), &(ih->ih_key), type); +} + +#define is_direntry_le_key(version, key) \ + (le_key_k_type(version, key) == TYPE_DIRENTRY) +#define is_direct_le_key(version, key) \ + (le_key_k_type(version, key) == TYPE_DIRECT) +#define is_indirect_le_key(version, key) \ + (le_key_k_type(version, key) == TYPE_INDIRECT) +#define is_statdata_le_key(version, key) \ + (le_key_k_type(version, key) == TYPE_STAT_DATA) + +/* Item header has version. */ +#define is_direntry_le_ih(ih) \ + is_direntry_le_key(ih_version(ih), &((ih)->ih_key)) +#define is_direct_le_ih(ih) \ + is_direct_le_key(ih_version(ih), &((ih)->ih_key)) +#define is_indirect_le_ih(ih) \ + is_indirect_le_key(ih_version(ih), &((ih)->ih_key)) +#define is_statdata_le_ih(ih) \ + is_statdata_le_key(ih_version(ih), &((ih)->ih_key)) + +static inline void +set_cpu_key_k_offset(struct cpu_key *key, off_t offset) +{ + + (key->version == KEY_FORMAT_3_5) ? + (key->on_disk_key.u.k_offset_v1.k_offset = offset) : + (key->on_disk_key.u.k_offset_v2.k_offset = offset); +} + +static inline void +set_cpu_key_k_type(struct cpu_key *key, int type) +{ + + (key->version == KEY_FORMAT_3_5) ? + (key->on_disk_key.u.k_offset_v1.k_uniqueness = + type2uniqueness(type)): + (key->on_disk_key.u.k_offset_v2.k_type = type); +} + +#define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY) +#define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT) +#define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT) +#define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA) + +/* Maximal length of item */ +#define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE) +#define MIN_ITEM_LEN 1 + +/* Object identifier for root dir */ +#define REISERFS_ROOT_OBJECTID 2 +#define REISERFS_ROOT_PARENT_OBJECTID 1 + +/* key is pointer to cpu key, result is cpu */ +static inline off_t +cpu_key_k_offset(const struct cpu_key *key) +{ + + return ((key->version == KEY_FORMAT_3_5) ? + key->on_disk_key.u.k_offset_v1.k_offset : + key->on_disk_key.u.k_offset_v2.k_offset); +} + +static inline off_t +cpu_key_k_type(const struct cpu_key *key) +{ + + return ((key->version == KEY_FORMAT_3_5) ? + uniqueness2type(key->on_disk_key.u.k_offset_v1.k_uniqueness) : + key->on_disk_key.u.k_offset_v2.k_type); +} + +/* + * Header of a disk block. More precisely, header of a formatted leaf + * or internal node, and not the header of an unformatted node. + */ +struct block_head { + uint16_t blk_level; /* Level of a block in the + tree. */ + uint16_t blk_nr_item; /* Number of keys/items in a + block. */ + uint16_t blk_free_space; /* Block free space in bytes. */ + uint16_t blk_reserved; /* Dump this in v4/planA */ + struct key blk_right_delim_key; /* Kept only for compatibility */ +}; + +#define BLKH_SIZE (sizeof(struct block_head)) +#define blkh_level(p_blkh) (le16toh((p_blkh)->blk_level)) +#define blkh_nr_item(p_blkh) (le16toh((p_blkh)->blk_nr_item)) +#define blkh_free_space(p_blkh) (le16toh((p_blkh)->blk_free_space)) + +#define FREE_LEVEL 0 /* When node gets removed from the tree its + blk_level is set to FREE_LEVEL. It is then + used to see whether the node is still in the + tree */ + +/* Values for blk_level field of the struct block_head */ +#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level.*/ + +/* + * Given the buffer head of a formatted node, resolve to the block head + * of that node. + */ +#define B_BLK_HEAD(p_s_bp) ((struct block_head *)((p_s_bp)->b_data)) +#define B_NR_ITEMS(p_s_bp) (blkh_nr_item(B_BLK_HEAD(p_s_bp))) +#define B_LEVEL(p_s_bp) (blkh_level(B_BLK_HEAD(p_s_bp))) +#define B_FREE_SPACE(p_s_bp) (blkh_free_space(B_BLK_HEAD(p_s_bp))) + +/* ------------------------------------------------------------------- + * Stat data + * -------------------------------------------------------------------*/ + +/* + * Old stat data is 32 bytes long. We are going to distinguish new one + * by different size. + */ +struct stat_data_v1 { + uint16_t sd_mode; /* File type, permissions */ + uint16_t sd_nlink; /* Number of hard links */ + uint16_t sd_uid; /* Owner */ + uint16_t sd_gid; /* Group */ + uint32_t sd_size; /* File size */ + uint32_t sd_atime; /* Time of last access */ + uint32_t sd_mtime; /* Time file was last modified */ + uint32_t sd_ctime; /* Time inode (stat data) was last changed + (except changes to sd_atime and + sd_mtime) */ + union { + uint32_t sd_rdev; + uint32_t sd_blocks; /* Number of blocks file uses */ + } __packed u; + uint32_t sd_first_direct_byte; /* First byte of file which is + stored in a direct item: + except that if it equals 1 + it is a symlink and if it + equals ~(uint32_t)0 there + is no direct item. The + existence of this field + really grates on me. Let's + replace it with a macro based + on sd_size and our tail + suppression policy. Someday. + -Hans */ +} __packed; + +#define SD_V1_SIZE (sizeof(struct stat_data_v1)) +#define stat_data_v1(ih) (ih_version (ih) == KEY_FORMAT_3_5) +#define sd_v1_mode(sdp) (le16toh((sdp)->sd_mode)) +#define set_sd_v1_mode(sdp, v) ((sdp)->sd_mode = htole16(v)) +#define sd_v1_nlink(sdp) (le16toh((sdp)->sd_nlink)) +#define set_sd_v1_nlink(sdp, v) ((sdp)->sd_nlink = htole16(v)) +#define sd_v1_uid(sdp) (le16toh((sdp)->sd_uid)) +#define set_sd_v1_uid(sdp, v) ((sdp)->sd_uid = htole16(v)) +#define sd_v1_gid(sdp) (le16toh((sdp)->sd_gid)) +#define set_sd_v1_gid(sdp, v) ((sdp)->sd_gid = htole16(v)) +#define sd_v1_size(sdp) (le32toh((sdp)->sd_size)) +#define set_sd_v1_size(sdp, v) ((sdp)->sd_size = htole32(v)) +#define sd_v1_atime(sdp) (le32toh((sdp)->sd_atime)) +#define set_sd_v1_atime(sdp, v) ((sdp)->sd_atime = htole32(v)) +#define sd_v1_mtime(sdp) (le32toh((sdp)->sd_mtime)) +#define set_sd_v1_mtime(sdp, v) ((sdp)->sd_mtime = htole32(v)) +#define sd_v1_ctime(sdp) (le32toh((sdp)->sd_ctime)) +#define set_sd_v1_ctime(sdp, v) ((sdp)->sd_ctime = htole32(v)) +#define sd_v1_rdev(sdp) (le32toh((sdp)->u.sd_rdev)) +#define set_sd_v1_rdev(sdp, v) ((sdp)->u.sd_rdev = htole32(v)) +#define sd_v1_blocks(sdp) (le32toh((sdp)->u.sd_blocks)) +#define set_sd_v1_blocks(sdp, v) ((sdp)->u.sd_blocks = htole32(v)) +#define sd_v1_first_direct_byte(sdp) \ + (le32toh((sdp)->sd_first_direct_byte)) +#define set_sd_v1_first_direct_byte(sdp, v) \ + ((sdp)->sd_first_direct_byte = htole32(v)) + +/* + * We want common flags to have the same values as in ext2, + * so chattr(1) will work without problems + */ +#include +#define REISERFS_IMMUTABLE_FL EXT2_IMMUTABLE_FL +#define REISERFS_APPEND_FL EXT2_APPEND_FL +#define REISERFS_SYNC_FL EXT2_SYNC_FL +#define REISERFS_NOATIME_FL EXT2_NOATIME_FL +#define REISERFS_NODUMP_FL EXT2_NODUMP_FL +#define REISERFS_SECRM_FL EXT2_SECRM_FL +#define REISERFS_UNRM_FL EXT2_UNRM_FL +#define REISERFS_COMPR_FL EXT2_COMPR_FL +#define REISERFS_NOTAIL_FL EXT2_NOTAIL_FL + +/* + * Stat Data on disk (reiserfs version of UFS disk inode minus the + * address blocks) + */ +struct stat_data { + uint16_t sd_mode; /* File type, permissions */ + uint16_t sd_attrs; /* Persistent inode flags */ + uint32_t sd_nlink; /* Number of hard links */ + uint64_t sd_size; /* File size */ + uint32_t sd_uid; /* Owner */ + uint32_t sd_gid; /* Group */ + uint32_t sd_atime; /* Time of last access */ + uint32_t sd_mtime; /* Time file was last modified */ + uint32_t sd_ctime; /* Time inode (stat data) was last changed + (except changes to sd_atime and + sd_mtime) */ + uint32_t sd_blocks; + union { + uint32_t sd_rdev; + uint32_t sd_generation; + //uint32_t sd_first_direct_byte; + /* + * First byte of file which is stored in a + * direct item: except that if it equals 1 + * it is a symlink and if it equals + * ~(uint32_t)0 there is no direct item. The + * existence of this field really grates + * on me. Let's replace it with a macro + * based on sd_size and our tail + * suppression policy? + */ + } __packed u; +} __packed; + +/* This is 44 bytes long */ +#define SD_SIZE (sizeof(struct stat_data)) +#define SD_V2_SIZE SD_SIZE +#define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6) +#define sd_v2_mode(sdp) (le16toh((sdp)->sd_mode)) +#define set_sd_v2_mode(sdp, v) ((sdp)->sd_mode = htole16(v)) +/* sd_reserved */ +/* set_sd_reserved */ +#define sd_v2_nlink(sdp) (le32toh((sdp)->sd_nlink)) +#define set_sd_v2_nlink(sdp, v) ((sdp)->sd_nlink = htole32(v)) +#define sd_v2_size(sdp) (le64toh((sdp)->sd_size)) +#define set_sd_v2_size(sdp, v) ((sdp)->sd_size = cpu_to_le64(v)) +#define sd_v2_uid(sdp) (le32toh((sdp)->sd_uid)) +#define set_sd_v2_uid(sdp, v) ((sdp)->sd_uid = htole32(v)) +#define sd_v2_gid(sdp) (le32toh((sdp)->sd_gid)) +#define set_sd_v2_gid(sdp, v) ((sdp)->sd_gid = htole32(v)) +#define sd_v2_atime(sdp) (le32toh((sdp)->sd_atime)) +#define set_sd_v2_atime(sdp, v) ((sdp)->sd_atime = htole32(v)) +#define sd_v2_mtime(sdp) (le32toh((sdp)->sd_mtime)) +#define set_sd_v2_mtime(sdp, v) ((sdp)->sd_mtime = htole32(v)) +#define sd_v2_ctime(sdp) (le32toh((sdp)->sd_ctime)) +#define set_sd_v2_ctime(sdp, v) ((sdp)->sd_ctime = htole32(v)) +#define sd_v2_blocks(sdp) (le32toh((sdp)->sd_blocks)) +#define set_sd_v2_blocks(sdp, v) ((sdp)->sd_blocks = htole32(v)) +#define sd_v2_rdev(sdp) (le32toh((sdp)->u.sd_rdev)) +#define set_sd_v2_rdev(sdp, v) ((sdp)->u.sd_rdev = htole32(v)) +#define sd_v2_generation(sdp) (le32toh((sdp)->u.sd_generation)) +#define set_sd_v2_generation(sdp, v) ((sdp)->u.sd_generation = htole32(v)) +#define sd_v2_attrs(sdp) (le16toh((sdp)->sd_attrs)) +#define set_sd_v2_attrs(sdp, v) ((sdp)->sd_attrs = htole16(v)) + +/* ------------------------------------------------------------------- + * Directory structure + * -------------------------------------------------------------------*/ + +#define SD_OFFSET 0 +#define SD_UNIQUENESS 0 +#define DOT_OFFSET 1 +#define DOT_DOT_OFFSET 2 +#define DIRENTRY_UNIQUENESS 500 + +#define FIRST_ITEM_OFFSET 1 + +struct reiserfs_de_head { + uint32_t deh_offset; /* Third component of the directory + entry key */ + uint32_t deh_dir_id; /* Objectid of the parent directory of + the object, that is referenced by + directory entry */ + uint32_t deh_objectid; /* Objectid of the object, that is + referenced by directory entry */ + uint16_t deh_location; /* Offset of name in the whole item */ + uint16_t deh_state; /* Whether 1) entry contains stat data + (for future), and 2) whether entry + is hidden (unlinked) */ +} __packed; + +#define DEH_SIZE sizeof(struct reiserfs_de_head) +#define deh_offset(p_deh) (le32toh((p_deh)->deh_offset)) +#define deh_dir_id(p_deh) (le32toh((p_deh)->deh_dir_id)) +#define deh_objectid(p_deh) (le32toh((p_deh)->deh_objectid)) +#define deh_location(p_deh) (le16toh((p_deh)->deh_location)) +#define deh_state(p_deh) (le16toh((p_deh)->deh_state)) + +#define put_deh_offset(p_deh, v) ((p_deh)->deh_offset = htole32((v))) +#define put_deh_dir_id(p_deh, v) ((p_deh)->deh_dir_id = htole32((v))) +#define put_deh_objectid(p_deh, v) ((p_deh)->deh_objectid = htole32((v))) +#define put_deh_location(p_deh, v) ((p_deh)->deh_location = htole16((v))) +#define put_deh_state(p_deh, v) ((p_deh)->deh_state = htole16((v))) + +/* Empty directory contains two entries "." and ".." and their headers */ +#define EMPTY_DIR_SIZE \ + (DEH_SIZE * 2 + ROUND_UP(strlen(".")) + ROUND_UP(strlen(".."))) + +/* Old format directories have this size when empty */ +#define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3) + +#define DEH_Statdata 0 /* Not used now */ +#define DEH_Visible 2 + +/* 64 bit systems (and the S/390) need to be aligned explicitly -jdm */ +#if BITS_PER_LONG == 64 || defined(__sparc64__) +#define ADDR_UNALIGNED_BITS (3) +#endif + +#ifdef ADDR_UNALIGNED_BITS +#define aligned_address(addr) \ + ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1))) +#define unaligned_offset(addr) \ + (((int)((long)(addr) & ((1 << ADDR_UNALIGNED_BITS) - 1))) << 3) + +#define set_bit_unaligned(nr, addr) \ + set_bit((nr) + unaligned_offset(addr), aligned_address(addr)) +#define clear_bit_unaligned(nr, addr) \ + clear_bit((nr) + unaligned_offset(addr), aligned_address(addr)) +#define test_bit_unaligned(nr, addr) \ + test_bit((nr) + unaligned_offset(addr), aligned_address(addr)) +#else /* !defined ADDR_UNALIGNED_BITS */ +#define set_bit_unaligned(nr, addr) set_bit(nr, addr) +#define clear_bit_unaligned(nr, addr) clear_bit(nr, addr) +#define test_bit_unaligned(nr, addr) test_bit(nr, addr) +#endif /* defined ADDR_UNALIGNED_BITS */ + +#define mark_de_with_sd(deh) \ + set_bit_unaligned(DEH_Statdata, &((deh)->deh_state)) +#define mark_de_without_sd(deh) \ + clear_bit_unaligned(DEH_Statdata, &((deh)->deh_state)) +#define mark_de_visible(deh) \ + set_bit_unaligned (DEH_Visible, &((deh)->deh_state)) +#define mark_de_hidden(deh) \ + clear_bit_unaligned (DEH_Visible, &((deh)->deh_state)) + +#define de_with_sd(deh) \ + test_bit_unaligned(DEH_Statdata, &((deh)->deh_state)) +#define de_visible(deh) \ + test_bit_unaligned(DEH_Visible, &((deh)->deh_state)) +#define de_hidden(deh) \ + !test_bit_unaligned(DEH_Visible, &((deh)->deh_state)) + +/* Two entries per block (at least) */ +#define REISERFS_MAX_NAME(block_size) 255 + +/* + * This structure is used for operations on directory entries. It is not + * a disk structure. When reiserfs_find_entry or search_by_entry_key + * find directory entry, they return filled reiserfs_dir_entry structure + */ +struct reiserfs_dir_entry { + struct buf *de_bp; + int de_item_num; + struct item_head *de_ih; + int de_entry_num; + struct reiserfs_de_head *de_deh; + int de_entrylen; + int de_namelen; + char *de_name; + char *de_gen_number_bit_string; + + uint32_t de_dir_id; + uint32_t de_objectid; + + struct cpu_key de_entry_key; +}; + +/* Pointer to file name, stored in entry */ +#define B_I_DEH_ENTRY_FILE_NAME(bp, ih, deh) \ + (B_I_PITEM(bp, ih) + deh_location(deh)) + +/* Length of name */ +#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih, deh, entry_num) \ + (I_DEH_N_ENTRY_LENGTH(ih, deh, entry_num) - \ + (de_with_sd(deh) ? SD_SIZE : 0)) + +/* Hash value occupies bits from 7 up to 30 */ +#define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL) + +/* Generation number occupies 7 bits starting from 0 up to 6 */ +#define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL) +#define MAX_GENERATION_NUMBER 127 + +/* Get item body */ +#define B_I_PITEM(bp, ih) ((bp)->b_data + ih_location(ih)) +#define B_I_DEH(bp, ih) ((struct reiserfs_de_head *)(B_I_PITEM(bp, ih))) + +/* + * Length of the directory entry in directory item. This define + * calculates length of i-th directory entry using directory entry + * locations from dir entry head. When it calculates length of 0-th + * directory entry, it uses length of whole item in place of entry + * location of the non-existent following entry in the calculation. See + * picture above. + */ +static inline int +entry_length (const struct buf *bp, const struct item_head *ih, + int pos_in_item) +{ + struct reiserfs_de_head *deh; + + deh = B_I_DEH(bp, ih) + pos_in_item; + if (pos_in_item) + return (deh_location(deh - 1) - deh_location(deh)); + + return (ih_item_len(ih) - deh_location(deh)); +} + +/* + * Number of entries in the directory item, depends on ENTRY_COUNT + * being at the start of directory dynamic data. + */ +#define I_ENTRY_COUNT(ih) (ih_entry_count((ih))) + +/* ------------------------------------------------------------------- + * Disk child + * -------------------------------------------------------------------*/ + +/* + * Disk child pointer: The pointer from an internal node of the tree + * to a node that is on disk. + */ +struct disk_child { + uint32_t dc_block_number; /* Disk child's block number. */ + uint16_t dc_size; /* Disk child's used space. */ + uint16_t dc_reserved; +}; + +#define DC_SIZE (sizeof(struct disk_child)) +#define dc_block_number(dc_p) (le32toh((dc_p)->dc_block_number)) +#define dc_size(dc_p) (le16toh((dc_p)->dc_size)) +#define put_dc_block_number(dc_p, val) \ + do { (dc_p)->dc_block_number = htole32(val); } while (0) +#define put_dc_size(dc_p, val) \ + do { (dc_p)->dc_size = htole16(val); } while (0) + +/* Get disk child by buffer header and position in the tree node. */ +#define B_N_CHILD(p_s_bp, n_pos) \ + ((struct disk_child *)((p_s_bp)->b_data + BLKH_SIZE + \ + B_NR_ITEMS(p_s_bp) * KEY_SIZE + \ + DC_SIZE * (n_pos))) + +/* Get disk child number by buffer header and position in the tree node. */ +#define B_N_CHILD_NUM(p_s_bp, n_pos) \ + (dc_block_number(B_N_CHILD(p_s_bp, n_pos))) +#define PUT_B_N_CHILD_NUM(p_s_bp, n_pos, val) \ + (put_dc_block_number(B_N_CHILD(p_s_bp, n_pos), val)) + +/* ------------------------------------------------------------------- + * Path structures and defines + * -------------------------------------------------------------------*/ + +struct path_element { + struct buf *pe_buffer; /* Pointer to the buffer at the path in + the tree. */ + int pe_position; /* Position in the tree node which is + placed in the buffer above. */ +}; + +#define MAX_HEIGHT 5 /* Maximal height of a tree. Don't + change this without changing + JOURNAL_PER_BALANCE_CNT */ +#define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + + FIRST_PATH_ELEMENT_OFFSET */ +#define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */ +#define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to + FIRST_PATH_ELEMENT_OFFSET - 1 */ +#define MAX_FEB_SIZE 6 /* This MUST be MAX_HEIGHT + 1. + See about FEB below */ + +struct path { + /* Length of the array below. */ + int path_length; + /* Array of the path element */ + struct path_element path_elements[EXTENDED_MAX_HEIGHT]; + int pos_in_item; +}; + +#define pos_in_item(path) ((path)->pos_in_item) + +#define INITIALIZE_PATH(var) \ + struct path var = { ILLEGAL_PATH_ELEMENT_OFFSET, } + +/* Get path element by path and path position. */ +#define PATH_OFFSET_PELEMENT(p_s_path, n_offset) \ + ((p_s_path)->path_elements + (n_offset)) + +/* Get buffer header at the path by path and path position. */ +#define PATH_OFFSET_PBUFFER(p_s_path, n_offset) \ + (PATH_OFFSET_PELEMENT(p_s_path, n_offset)->pe_buffer) + +/* Get position in the element at the path by path and path position. */ +#define PATH_OFFSET_POSITION(p_s_path, n_offset) \ + (PATH_OFFSET_PELEMENT(p_s_path, n_offset)->pe_position) + +#define PATH_PLAST_BUFFER(p_s_path) \ + (PATH_OFFSET_PBUFFER((p_s_path), (p_s_path)->path_length)) + +#define PATH_LAST_POSITION(p_s_path) \ + (PATH_OFFSET_POSITION((p_s_path), (p_s_path)->path_length)) + +#define PATH_PITEM_HEAD(p_s_path) \ + B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_path), PATH_LAST_POSITION(p_s_path)) + +#define get_last_bp(path) PATH_PLAST_BUFFER(path) +#define get_ih(path) PATH_PITEM_HEAD(path) + +/* ------------------------------------------------------------------- + * Misc. + * -------------------------------------------------------------------*/ + +/* Size of pointer to the unformatted node. */ +#define UNFM_P_SIZE (sizeof(unp_t)) +#define UNFM_P_SHIFT 2 + +/* In in-core inode key is stored on le form */ +#define INODE_PKEY(ip) ((struct key *)(REISERFS_I(ip)->i_key)) + +#define MAX_UL_INT 0xffffffff +#define MAX_INT 0x7ffffff +#define MAX_US_INT 0xffff + +/* The purpose is to detect overflow of an unsigned short */ +#define REISERFS_LINK_MAX (MAX_US_INT - 1000) + +#define fs_generation(sbi) (REISERFS_SB(sbi)->s_generation_counter) +#define get_generation(sbi) (fs_generation(sbi)) + +#define __fs_changed(gen, sbi) (gen != get_generation (sbi)) +/*#define fs_changed(gen, sbi) ({ cond_resched(); \ + __fs_changed(gen, sbi); })*/ +#define fs_changed(gen, sbi) (__fs_changed(gen, sbi)) + +/* ------------------------------------------------------------------- + * Fixate node + * -------------------------------------------------------------------*/ + +/* + * To make any changes in the tree we always first find node, that + * contains item to be changed/deleted or place to insert a new item. + * We call this node S. To do balancing we need to decide what we will + * shift to left/right neighbor, or to a new node, where new item will + * be etc. To make this analysis simpler we build virtual node. Virtual + * node is an array of items, that will replace items of node S. (For + * instance if we are going to delete an item, virtual node does not + * contain it). Virtual node keeps information about item sizes and + * types, mergeability of first and last items, sizes of all entries in + * directory item. We use this array of items when calculating what we + * can shift to neighbors and how many nodes we have to have if we do + * not any shiftings, if we shift to left/right neighbor or to both. + */ +struct virtual_item { + int vi_index; /* Index in the array of item + operations */ + unsigned short vi_type; /* Left/right mergeability */ + unsigned short vi_item_len; /* Length of item that it will + have after balancing */ + struct item_head *vi_ih; + const char *vi_item; /* Body of item (old or new) */ + const void *vi_new_data; /* 0 always but paste mode */ + void *vi_uarea; /* Item specific area */ +}; + +struct virtual_node { + char *vn_free_ptr; /* This is a pointer to the free space + in the buffer */ + unsigned short vn_nr_item; /* Number of items in virtual node */ + short vn_size; /* Size of node , that node would have + if it has unlimited size and no + balancing is performed */ + short vn_mode; /* Mode of balancing (paste, insert, + delete, cut) */ + short vn_affected_item_num; + short vn_pos_in_item; + struct item_head *vn_ins_ih; /* Item header of inserted item, 0 for + other modes */ + const void *vn_data; + struct virtual_item *vn_vi; /* Array of items (including a new one, + excluding item to be deleted) */ +}; + +/* Used by directory items when creating virtual nodes */ +struct direntry_uarea { + int flags; + uint16_t entry_count; + uint16_t entry_sizes[1]; +} __packed; + +/* ------------------------------------------------------------------- + * Tree balance + * -------------------------------------------------------------------*/ + +struct reiserfs_iget_args { + uint32_t objectid; + uint32_t dirid; +}; + +struct item_operations { + int (*bytes_number)(struct item_head * ih, int block_size); + void (*decrement_key)(struct cpu_key *); + int (*is_left_mergeable)(struct key * ih, unsigned long bsize); + void (*print_item)(struct item_head *, char * item); + void (*check_item)(struct item_head *, char * item); + + int (*create_vi)(struct virtual_node * vn, + struct virtual_item * vi, int is_affected, int insert_size); + int (*check_left)(struct virtual_item * vi, int free, + int start_skip, int end_skip); + int (*check_right)(struct virtual_item * vi, int free); + int (*part_size)(struct virtual_item * vi, int from, int to); + int (*unit_num)(struct virtual_item * vi); + void (*print_vi)(struct virtual_item * vi); +}; + +extern struct item_operations *item_ops[TYPE_ANY + 1]; + +#define op_bytes_number(ih, bsize) \ + item_ops[le_ih_k_type(ih)]->bytes_number(ih, bsize) + +#define COMP_KEYS comp_keys +#define COMP_SHORT_KEYS comp_short_keys + +/* Get the item header */ +#define B_N_PITEM_HEAD(bp, item_num) \ + ((struct item_head *)((bp)->b_data + BLKH_SIZE) + (item_num)) + +/* Get key */ +#define B_N_PDELIM_KEY(bp, item_num) \ + ((struct key *)((bp)->b_data + BLKH_SIZE) + (item_num)) + +/* ------------------------------------------------------------------- + * Function declarations + * -------------------------------------------------------------------*/ + +/* reiserfs_stree.c */ +int B_IS_IN_TREE(const struct buf *p_s_bp); + +extern void copy_item_head(struct item_head * p_v_to, + const struct item_head * p_v_from); + +extern int comp_keys(const struct key *le_key, + const struct cpu_key *cpu_key); +extern int comp_short_keys(const struct key *le_key, + const struct cpu_key *cpu_key); + +extern int comp_le_keys(const struct key *, const struct key *); + +static inline int +le_key_version(const struct key *key) +{ + int type; + + type = offset_v2_k_type(&(key->u.k_offset_v2)); + if (type != TYPE_DIRECT && type != TYPE_INDIRECT && + type != TYPE_DIRENTRY) + return (KEY_FORMAT_3_5); + + return (KEY_FORMAT_3_6); +} + +static inline void +copy_key(struct key *to, const struct key *from) +{ + + memcpy(to, from, KEY_SIZE); +} + +const struct key *get_lkey(const struct path *p_s_chk_path, + const struct reiserfs_sb_info *p_s_sbi); +const struct key *get_rkey(const struct path *p_s_chk_path, + const struct reiserfs_sb_info *p_s_sbi); +inline int bin_search(const void * p_v_key, const void * p_v_base, + int p_n_num, int p_n_width, int * p_n_pos); + +void pathrelse(struct path *p_s_search_path); +int reiserfs_check_path(struct path *p); + +int search_by_key(struct reiserfs_sb_info *p_s_sbi, + const struct cpu_key *p_s_key, + struct path *p_s_search_path, + int n_stop_level); +#define search_item(sbi, key, path) \ + search_by_key(sbi, key, path, DISK_LEAF_NODE_LEVEL) +int search_for_position_by_key(struct reiserfs_sb_info *p_s_sbi, + const struct cpu_key *p_s_cpu_key, + struct path *p_s_search_path); +void decrement_counters_in_path(struct path *p_s_search_path); + +/* reiserfs_inode.c */ +vop_read_t reiserfs_read; +vop_inactive_t reiserfs_inactive; +vop_reclaim_t reiserfs_reclaim; + +int reiserfs_get_block(struct reiserfs_node *ip, long block, + off_t offset, struct uio *uio); + +void make_cpu_key(struct cpu_key *cpu_key, struct reiserfs_node *ip, + off_t offset, int type, int key_length); + +void reiserfs_read_locked_inode(struct reiserfs_node *ip, + struct reiserfs_iget_args *args); +int reiserfs_iget(struct mount *mp, const struct cpu_key *key, + struct vnode **vpp, struct thread *td); + +void sd_attrs_to_i_attrs(uint16_t sd_attrs, struct reiserfs_node *ip); +void i_attrs_to_sd_attrs(struct reiserfs_node *ip, uint16_t *sd_attrs); + +/* reiserfs_namei.c */ +vop_readdir_t reiserfs_readdir; +vop_cachedlookup_t reiserfs_lookup; + +void set_de_name_and_namelen(struct reiserfs_dir_entry * de); +int search_by_entry_key(struct reiserfs_sb_info *sbi, + const struct cpu_key *key, struct path *path, + struct reiserfs_dir_entry *de); + +/* reiserfs_prints.c */ +char *reiserfs_hashname(int code); +void reiserfs_dump_buffer(caddr_t buf, off_t len); + +#if defined(REISERFS_DEBUG) +#define reiserfs_log(lvl, fmt, ...) \ + log(lvl, "ReiserFS/%s: " fmt, __func__, ## __VA_ARGS__) +#elif defined (REISERFS_DEBUG_CONS) +#define reiserfs_log(lvl, fmt, ...) \ + printf("%s:%d: " fmt, __func__, __LINE__, ## __VA_ARGS__) +#else +#define reiserfs_log(lvl, fmt, ...) +#endif + +#define reiserfs_log_0(lvl, fmt, ...) \ + printf("%s:%d: " fmt, __func__, __LINE__, ## __VA_ARGS__) + +/* reiserfs_hashes.c */ +uint32_t keyed_hash(const signed char *msg, int len); +uint32_t yura_hash(const signed char *msg, int len); +uint32_t r5_hash(const signed char *msg, int len); + +#define reiserfs_test_le_bit test_bit + +#endif /* !defined _GNU_REISERFS_REISERFS_FS_H */ diff --git a/sys/gnu/reiserfs/reiserfs_fs_i.h b/sys/gnu/reiserfs/reiserfs_fs_i.h new file mode 100644 index 000000000000..a9aa7e53f2d7 --- /dev/null +++ b/sys/gnu/reiserfs/reiserfs_fs_i.h @@ -0,0 +1,90 @@ +/*- + * Copyright 2000 Hans Reiser + * See README for licensing and copyright details + * + * Ported to FreeBSD by Jean-Sébastien Pédron + * + * $FreeBSD$ + */ + +#ifndef _GNU_REISERFS_REISERFS_FS_I_H +#define _GNU_REISERFS_REISERFS_FS_I_H + +#include + +/* Bitmasks for i_flags field in reiserfs-specific part of inode */ +typedef enum { + /* + * This says what format of key do all items (but stat data) of + * an object have. If this is set, that format is 3.6 otherwise + * - 3.5 + */ + i_item_key_version_mask = 0x0001, + /* If this is unset, object has 3.5 stat data, otherwise, it has + * 3.6 stat data with 64bit size, 32bit nlink etc. */ + i_stat_data_version_mask = 0x0002, + /* File might need tail packing on close */ + i_pack_on_close_mask = 0x0004, + /* Don't pack tail of file */ + i_nopack_mask = 0x0008, + /* If those is set, "safe link" was created for this file during + * truncate or unlink. Safe link is used to avoid leakage of disk + * space on crash with some files open, but unlinked. */ + i_link_saved_unlink_mask = 0x0010, + i_link_saved_truncate_mask = 0x0020, + i_priv_object = 0x0080, + i_has_xattr_dir = 0x0100, +} reiserfs_inode_flags; + +struct reiserfs_node { + struct vnode *i_vnode; + struct vnode *i_devvp; + struct cdev *i_dev; + ino_t i_number; + + ino_t i_ino; + + struct reiserfs_sb_info *i_reiserfs; + + uint32_t i_flag; /* Flags, see below */ + uint32_t i_key[4]; /* Key is still 4 32 bit + integers */ + uint32_t i_flags; /* Transient inode flags that + are never stored on disk. + Bitmasks for this field + are defined above. */ + uint32_t i_first_direct_byte; /* Offset of first byte stored + in direct item. */ + uint32_t i_attrs; /* Copy of persistent inode + flags read from sd_attrs. */ + + uint16_t i_mode; /* IFMT, permissions. */ + uint16_t i_nlink; /* File link count. */ + uint64_t i_size; /* File byte count. */ + uint32_t i_bytes; + uid_t i_uid; /* File owner. */ + gid_t i_gid; /* File group. */ + struct timespec i_atime; /* Last access time. */ + struct timespec i_mtime; /* Last modified time. */ + struct timespec i_ctime; /* Last inode change time. */ + + uint32_t i_blocks; + uint32_t i_generation; +}; + +#define VTOI(vp) ((struct reiserfs_node *)(vp)->v_data) +#define ITOV(ip) ((ip)->i_vnode) + +/* These flags are kept in i_flag. */ +#define IN_HASHED 0x0020 /* Inode is on hash list */ + +/* This overlays the fid structure (see mount.h) */ +struct rfid { + uint16_t rfid_len; /* Length of structure */ + uint16_t rfid_pad; /* Force 32-bit alignment */ + ino_t rfid_dirid; /* File key */ + ino_t rfid_objectid; + uint32_t rfid_gen; /* Generation number */ +}; + +#endif /* !defined _GNU_REISERFS_REISERFS_FS_I_H */ diff --git a/sys/gnu/reiserfs/reiserfs_fs_sb.h b/sys/gnu/reiserfs/reiserfs_fs_sb.h new file mode 100644 index 000000000000..184e01c85957 --- /dev/null +++ b/sys/gnu/reiserfs/reiserfs_fs_sb.h @@ -0,0 +1,143 @@ +/* + * Copyright 2000 Hans Reiser + * See README for licensing and copyright details + * + * Ported to FreeBSD by Jean-Sébastien Pédron + * + * $FreeBSD$ + */ + +#ifndef _GNU_REISERFS_REISERFS_FS_SB_H +#define _GNU_REISERFS_REISERFS_FS_SB_H + +typedef uint32_t (*hashf_t)(const signed char *, int); + +#define sb_block_count(sbp) (le32toh((sbp)->s_v1.s_block_count)) +#define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = htole32(v)) +#define sb_free_blocks(sbp) (le32toh((sbp)->s_v1.s_free_blocks)) +#define set_sb_free_blocks(sbp,v) ((sbp)->s_v1.s_free_blocks = htole32(v)) +#define sb_root_block(sbp) (le32toh((sbp)->s_v1.s_root_block)) + +/* Bitmaps */ +struct reiserfs_bitmap_info { + uint16_t first_zero_hint; + uint16_t free_count; + //struct buf *bp; /* The actual bitmap */ + caddr_t bp_data; /* The actual bitmap */ +}; + +/* ReiserFS union of in-core super block data */ +struct reiserfs_sb_info { + struct reiserfs_super_block *s_rs; + struct reiserfs_bitmap_info *s_ap_bitmap; + struct vnode *s_devvp; + + unsigned short s_mount_state; + + hashf_t s_hash_function; /* Pointer to function which + is used to sort names in + directory. Set on mount */ + unsigned long s_mount_opt; /* ReiserFS's mount options + are set here */ + int s_generation_counter; /* Increased by one every + time the tree gets + re-balanced */ + unsigned long s_properties; /* File system properties. + Currently holds on-disk + FS format */ + uint16_t s_blocksize; + uint16_t s_blocksize_bits; + char s_rd_only; /* Is it read-only ? */ + int s_is_unlinked_ok; +}; + +#define sb_version(sbi) (le16toh((sbi)->s_v1.s_version)) +#define set_sb_version(sbi, v) ((sbi)->s_v1.s_version = htole16(v)) + +#define sb_blocksize(sbi) (le16toh((sbi)->s_v1.s_blocksize)) +#define set_sb_blocksize(sbi, v) ((sbi)->s_v1.s_blocksize = htole16(v)) + +#define sb_hash_function_code(sbi) \ + (le32toh((sbi)->s_v1.s_hash_function_code)) +#define set_sb_hash_function_code(sbi, v) \ + ((sbi)->s_v1.s_hash_function_code = htole32(v)) + +#define sb_bmap_nr(sbi) (le16toh((sbi)->s_v1.s_bmap_nr)) +#define set_sb_bmap_nr(sbi, v) ((sbi)->s_v1.s_bmap_nr = htole16(v)) + +/* Definitions of reiserfs on-disk properties: */ +#define REISERFS_3_5 0 +#define REISERFS_3_6 1 + +enum reiserfs_mount_options { + /* Mount options */ + REISERFS_LARGETAIL, /* Large tails will be created in a session */ + REISERFS_SMALLTAIL, /* Small (for files less than block size) tails + will be created in a session */ + REPLAYONLY, /* Replay journal and return 0. Use by fsck */ + REISERFS_CONVERT, /* -o conv: causes conversion of old format super + block to the new format. If not specified - + old partition will be dealt with in a manner + of 3.5.x */ + + /* + * -o hash={tea, rupasov, r5, detect} is meant for properly mounting + * reiserfs disks from 3.5.19 or earlier. 99% of the time, this option + * is not required. If the normal autodection code can't determine + * which hash to use (because both hases had the same value for a + * file) use this option to force a specific hash. It won't allow you + * to override the existing hash on the FS, so if you have a tea hash + * disk, and mount with -o hash=rupasov, the mount will fail. + */ + FORCE_TEA_HASH, /* try to force tea hash on mount */ + FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ + FORCE_R5_HASH, /* try to force rupasov hash on mount */ + FORCE_HASH_DETECT, /* try to detect hash function on mount */ + + REISERFS_DATA_LOG, + REISERFS_DATA_ORDERED, + REISERFS_DATA_WRITEBACK, + + /* + * used for testing experimental features, makes benchmarking new + * features with and without more convenient, should never be used by + * users in any code shipped to users (ideally) + */ + + REISERFS_NO_BORDER, + REISERFS_NO_UNHASHED_RELOCATION, + REISERFS_HASHED_RELOCATION, + REISERFS_ATTRS, + REISERFS_XATTRS, + REISERFS_XATTRS_USER, + REISERFS_POSIXACL, + + REISERFS_TEST1, + REISERFS_TEST2, + REISERFS_TEST3, + REISERFS_TEST4, +}; + +#define reiserfs_r5_hash(sbi) \ + (REISERFS_SB(sbi)->s_mount_opt & (1 << FORCE_R5_HASH)) +#define reiserfs_rupasov_hash(sbi) \ + (REISERFS_SB(sbi)->s_mount_opt & (1 << FORCE_RUPASOV_HASH)) +#define reiserfs_tea_hash(sbi) \ + (REISERFS_SB(sbi)->s_mount_opt & (1 << FORCE_TEA_HASH)) +#define reiserfs_hash_detect(sbi) \ + (REISERFS_SB(sbi)->s_mount_opt & (1 << FORCE_HASH_DETECT)) + +#define reiserfs_attrs(sbi) \ + (REISERFS_SB(sbi)->s_mount_opt & (1 << REISERFS_ATTRS)) + +#define reiserfs_data_log(sbi) \ + (REISERFS_SB(sbi)->s_mount_opt & (1 << REISERFS_DATA_LOG)) +#define reiserfs_data_ordered(sbi) \ + (REISERFS_SB(sbi)->s_mount_opt & (1 << REISERFS_DATA_ORDERED)) +#define reiserfs_data_writeback(sbi) \ + (REISERFS_SB(sbi)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) + +#define SB_BUFFER_WITH_SB(sbi) (REISERFS_SB(sbi)->s_sbh) +#define SB_AP_BITMAP(sbi) (REISERFS_SB(sbi)->s_ap_bitmap) + +#endif /* !defined _GNU_REISERFS_REISERFS_FS_SB_H */ diff --git a/sys/gnu/reiserfs/reiserfs_hashes.c b/sys/gnu/reiserfs/reiserfs_hashes.c new file mode 100644 index 000000000000..03420c8687d1 --- /dev/null +++ b/sys/gnu/reiserfs/reiserfs_hashes.c @@ -0,0 +1,217 @@ +/*- + * Copyright 2000 Hans Reiser + * See README for licensing and copyright details + * + * Ported to FreeBSD by Jean-Sébastien Pédron + * + * $FreeBSD$ + */ + +#include + +/* + * Keyed 32-bit hash function using TEA in a Davis-Meyer function + * H0 = Key + * Hi = E Mi(Hi-1) + Hi-1 + * + * (see Applied Cryptography, 2nd edition, p448). + * + * Jeremy Fitzhardinge 1998 + * + * Jeremy has agreed to the contents of README. -Hans + * Yura's function is added (04/07/2000) + */ + +/* + * keyed_hash + * yura_hash + * r5_hash + */ + +#define DELTA 0x9E3779B9 +#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */ +#define PARTROUNDS 6 /* 6 gets complete mixing */ + +/* a, b, c, d - data; h0, h1 - accumulated hash */ +#define TEACORE(rounds) \ + do { \ + int n; \ + uint32_t b0, b1; \ + uint32_t sum; \ + \ + n = rounds; \ + sum = 0; \ + b0 = h0; \ + b1 = h1; \ + \ + do { \ + sum += DELTA; \ + b0 += ((b1 << 4) + a) ^ (b1+sum) ^ ((b1 >> 5) + b); \ + b1 += ((b0 << 4) + c) ^ (b0+sum) ^ ((b0 >> 5) + d); \ + } while (--n); \ + \ + h0 += b0; \ + h1 += b1; \ + } while (0) + +uint32_t +keyed_hash(const signed char *msg, int len) +{ + uint32_t k[] = { 0x9464a485, 0x542e1a94, 0x3e846bff, 0xb75bcfc3 }; + + uint32_t h0, h1; + uint32_t a, b, c, d; + uint32_t pad; + int i; + + h0 = k[0]; + h1 = k[1]; + + pad = (uint32_t)len | ((uint32_t)len << 8); + pad |= pad << 16; + + while(len >= 16) { + a = (uint32_t)msg[ 0] | + (uint32_t)msg[ 1] << 8 | + (uint32_t)msg[ 2] << 16 | + (uint32_t)msg[ 3] << 24; + b = (uint32_t)msg[ 4] | + (uint32_t)msg[ 5] << 8 | + (uint32_t)msg[ 6] << 16 | + (uint32_t)msg[ 7] << 24; + c = (uint32_t)msg[ 8] | + (uint32_t)msg[ 9] << 8 | + (uint32_t)msg[10] << 16 | + (uint32_t)msg[11] << 24; + d = (uint32_t)msg[12] | + (uint32_t)msg[13] << 8 | + (uint32_t)msg[14] << 16 | + (uint32_t)msg[15] << 24; + + TEACORE(PARTROUNDS); + + len -= 16; + msg += 16; + } + + if (len >= 12) { + a = (uint32_t)msg[ 0] | + (uint32_t)msg[ 1] << 8 | + (uint32_t)msg[ 2] << 16 | + (uint32_t)msg[ 3] << 24; + b = (uint32_t)msg[ 4] | + (uint32_t)msg[ 5] << 8 | + (uint32_t)msg[ 6] << 16 | + (uint32_t)msg[ 7] << 24; + c = (uint32_t)msg[ 8] | + (uint32_t)msg[ 9] << 8 | + (uint32_t)msg[10] << 16 | + (uint32_t)msg[11] << 24; + + d = pad; + for(i = 12; i < len; i++) { + d <<= 8; + d |= msg[i]; + } + } else if (len >= 8) { + a = (uint32_t)msg[ 0] | + (uint32_t)msg[ 1] << 8 | + (uint32_t)msg[ 2] << 16 | + (uint32_t)msg[ 3] << 24; + b = (uint32_t)msg[ 4] | + (uint32_t)msg[ 5] << 8 | + (uint32_t)msg[ 6] << 16 | + (uint32_t)msg[ 7] << 24; + + c = d = pad; + for(i = 8; i < len; i++) { + c <<= 8; + c |= msg[i]; + } + } else if (len >= 4) { + a = (uint32_t)msg[ 0] | + (uint32_t)msg[ 1] << 8 | + (uint32_t)msg[ 2] << 16 | + (uint32_t)msg[ 3] << 24; + + b = c = d = pad; + for(i = 4; i < len; i++) { + b <<= 8; + b |= msg[i]; + } + } else { + a = b = c = d = pad; + for(i = 0; i < len; i++) { + a <<= 8; + a |= msg[i]; + } + } + + TEACORE(FULLROUNDS); + + /* return 0; */ + return (h0 ^ h1); +} + +/* + * What follows in this file is copyright 2000 by Hans Reiser, and the + * licensing of what follows is governed by README + * */ +uint32_t +yura_hash(const signed char *msg, int len) +{ + int i; + int j, pow; + uint32_t a, c; + + for (pow = 1, i = 1; i < len; i++) + pow = pow * 10; + + if (len == 1) + a = msg[0] - 48; + else + a = (msg[0] - 48) * pow; + + for (i = 1; i < len; i++) { + c = msg[i] - 48; + for (pow = 1, j = i; j < len - 1; j++) + pow = pow * 10; + a = a + c * pow; + } + + for (; i < 40; i++) { + c = '0' - 48; + for (pow = 1, j = i; j < len - 1; j++) + pow = pow * 10; + a = a + c * pow; + } + + for (; i < 256; i++) { + c = i; + for (pow = 1, j = i; j < len - 1; j++) + pow = pow * 10; + a = a + c * pow; + } + + a = a << 7; + return (a); +} + +uint32_t +r5_hash(const signed char *msg, int len) +{ + uint32_t a; + const signed char *start; + + a = 0; + start = msg; + + while (*msg && msg < start + len) { + a += *msg << 4; + a += *msg >> 4; + a *= 11; + msg++; + } + + return (a); +} diff --git a/sys/gnu/reiserfs/reiserfs_inode.c b/sys/gnu/reiserfs/reiserfs_inode.c new file mode 100644 index 000000000000..950bb02dfebc --- /dev/null +++ b/sys/gnu/reiserfs/reiserfs_inode.c @@ -0,0 +1,926 @@ +/*- + * Copyright 2000 Hans Reiser + * See README for licensing and copyright details + * + * Ported to FreeBSD by Jean-Sébastien Pédron + * + * $FreeBSD$ + */ + +#include + +static b_strategy_t reiserfs_bufstrategy; + +/* + * Buffer operations for ReiserFS vnodes. + * We punt on VOP_BMAP, so we need to do strategy on the file's vnode + * rather than the underlying device's. + */ +static struct buf_ops reiserfs_vnbufops = { + .bop_name = "ReiserFS", + .bop_strategy = reiserfs_bufstrategy, +}; + +/* Default io size devuned in super.c */ +extern int reiserfs_default_io_size; +void inode_set_bytes(struct reiserfs_node *ip, off_t bytes); + +/* Args for the create parameter of reiserfs_get_block */ +#define GET_BLOCK_NO_CREATE 0 /* Don't create new blocks or convert + tails */ +#define GET_BLOCK_CREATE 1 /* Add anything you need to find block */ +#define GET_BLOCK_NO_HOLE 2 /* Return ENOENT for file holes */ +#define GET_BLOCK_READ_DIRECT 4 /* Read the tail if indirect item not + found */ +#define GET_BLOCK_NO_ISEM 8 /* i_sem is not held, don't preallocate */ +#define GET_BLOCK_NO_DANGLE 16 /* Don't leave any transactions running */ + +/* ------------------------------------------------------------------- + * vnode operations + * -------------------------------------------------------------------*/ + +int +reiserfs_read(struct vop_read_args *ap) +{ + struct uio *uio; + struct vnode *vp; + struct reiserfs_node *ip; + struct reiserfs_sb_info *sbi; + + int error; + long size; + daddr_t lbn; + off_t bytesinfile, offset; + + uio = ap->a_uio; + vp = ap->a_vp; + ip = VTOI(vp); + sbi = ip->i_reiserfs; + + size = sbi->s_blocksize; + + for (error = 0; uio->uio_resid > 0;) { + if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) + break; + + /* Compute the logical block number and its offset */ + lbn = uio->uio_offset / size; + offset = uio->uio_offset % size; + reiserfs_log(LOG_DEBUG, "logical block number: %ju\n", + (intmax_t)lbn); + reiserfs_log(LOG_DEBUG, "block offset: %ju\n", + (intmax_t)offset); + + /* Read file blocks */ + reiserfs_log(LOG_DEBUG, "reiserfs_get_block(%ju)\n", + (intmax_t)lbn); + if ((error = reiserfs_get_block(ip, lbn, offset, uio)) != 0) { + reiserfs_log(LOG_DEBUG, + "reiserfs_get_block returned the error %d\n", + error); + break; + } + } + + return (error); +} + +static void +reiserfs_bufstrategy(struct bufobj *bo, struct buf *bp) +{ + struct vnode *vp; + int rc; + + vp = bo->bo_private; + KASSERT(bo == &vp->v_bufobj, ("BO/VP mismatch: vp %p bo %p != %p", + vp, &vp->v_bufobj, bo)); + rc = VOP_STRATEGY(vp, bp); + KASSERT(rc == 0, ("ReiserFS VOP_STRATEGY failed: bp=%p, " + "vp=%p, rc=%d", bp, vp, rc)); +} + +int +reiserfs_inactive(struct vop_inactive_args *ap) +{ + int error; + struct vnode *vp; + struct thread *td; + struct reiserfs_node *ip; + + error = 0; + vp = ap->a_vp; + td = ap->a_td; + ip = VTOI(vp); + + reiserfs_log(LOG_DEBUG, "deactivating inode used %d times\n", + vp->v_usecount); + if (prtactive && vrefcnt(vp) != 0) + vprint("ReiserFS/reclaim: pushing active", vp); + +#if 0 + /* Ignore inodes related to stale file handles. */ + if (ip->i_mode == 0) + goto out; + +out: +#endif + + /* + * If we are done with the inode, reclaim it so that it can be reused + * immediately. + */ + if (ip->i_mode == 0) { + reiserfs_log(LOG_DEBUG, "recyling\n"); + vrecycle(vp, td); + } + + return (error); +} + +int +reiserfs_reclaim(struct vop_reclaim_args *ap) +{ + struct reiserfs_node *ip; + struct vnode *vp; + + vp = ap->a_vp; + + reiserfs_log(LOG_DEBUG, "reclaiming inode used %d times\n", + vp->v_usecount); + if (prtactive && vrefcnt(vp) != 0) + vprint("ReiserFS/reclaim: pushing active", vp); + ip = VTOI(vp); + + /* XXX Update this node (write to the disk) */ + + /* Remove the inode from its hash chain. */ + vfs_hash_remove(vp); + + /* Purge old data structures associated with the inode. */ + if (ip->i_devvp) { + reiserfs_log(LOG_DEBUG, "releasing device (0x%p)\n", + ip->i_devvp); + vrele(ip->i_devvp); + ip->i_devvp = NULL; + } + + reiserfs_log(LOG_DEBUG, "free private data\n"); + FREE(vp->v_data, M_REISERFSNODE); + vp->v_data = NULL; + vnode_destroy_vobject(vp); + + return (0); +} + +/* ------------------------------------------------------------------- + * Functions from linux/fs/reiserfs/inode.c + * -------------------------------------------------------------------*/ + +static void +_make_cpu_key(struct cpu_key *key, int version, + uint32_t dirid, uint32_t objectid, off_t offset, int type, int length) +{ + + key->version = version; + + key->on_disk_key.k_dir_id = dirid; + key->on_disk_key.k_objectid = objectid; + set_cpu_key_k_offset(key, offset); + set_cpu_key_k_type(key, type); + key->key_length = length; +} + +/* + * Take base of inode_key (it comes from inode always) (dirid, objectid) + * and version from an inode, set offset and type of key + */ +void +make_cpu_key(struct cpu_key *key, struct reiserfs_node *ip, off_t offset, + int type, int length) +{ + + _make_cpu_key(key, get_inode_item_key_version(ip), + le32toh(INODE_PKEY(ip)->k_dir_id), + le32toh(INODE_PKEY(ip)->k_objectid), + offset, type, length); +} + +int +reiserfs_get_block(struct reiserfs_node *ip, long block, off_t offset, + struct uio *uio) +{ + caddr_t blk = NULL, p; + struct cpu_key key; + /* unsigned long offset; */ + INITIALIZE_PATH(path); + struct buf *bp, *blk_bp; + struct item_head *ih; + struct reiserfs_sb_info *sbi; + int blocknr, chars, done = 0, ret = 0, args = 0; + + sbi = ip->i_reiserfs; + + /* Prepare the key to look for the 'block'-th block of file */ + reiserfs_log(LOG_DEBUG, "prepare cpu key\n"); + make_cpu_key(&key, ip, (off_t)block * sbi->s_blocksize + 1, TYPE_ANY, 3); + + /* research: */ + reiserfs_log(LOG_DEBUG, "search for position\n"); + if (search_for_position_by_key(sbi, &key, &path) != POSITION_FOUND) { + reiserfs_log(LOG_DEBUG, "position not found\n"); + pathrelse(&path); +#if 0 + if (blk) + kunmap(bh_result->b_page); +#endif + /* + * We do not return ENOENT if there is a hole but page is + * uptodate, because it means that there is some MMAPED data + * associated with it that is yet to be written to disk. + */ + if ((args & GET_BLOCK_NO_HOLE)/* && + !PageUptodate(bh_result->b_page)*/) + return (ENOENT); + return (0); + } + reiserfs_log(LOG_DEBUG, "position found\n"); + + bp = get_last_bp(&path); + ih = get_ih(&path); + + if (is_indirect_le_ih(ih)) { + off_t xfersize; + uint32_t *ind_item = (uint32_t *)B_I_PITEM(bp, ih); + + reiserfs_log(LOG_DEBUG, "item is INDIRECT\n"); + + blocknr = get_block_num(ind_item, path.pos_in_item); + reiserfs_log(LOG_DEBUG, "block number: %d " + "(ind_item=%p, pos_in_item=%u)\n", + blocknr, ind_item, path.pos_in_item); + + xfersize = MIN(sbi->s_blocksize - offset, + ip->i_size - uio->uio_offset); + xfersize = MIN(xfersize, uio->uio_resid); + + if (blocknr) { + ret = bread(sbi->s_devvp, + blocknr * btodb(sbi->s_blocksize), + sbi->s_blocksize, NOCRED, &blk_bp); + reiserfs_log(LOG_DEBUG, "xfersize: %ju\n", + (intmax_t)xfersize); + ret = uiomove(blk_bp->b_data + offset, xfersize, uio); + brelse(blk_bp); + } else { + /* + * We do not return ENOENT if there is a hole but + * page is uptodate, because it means That there + * is some MMAPED data associated with it that + * is yet to be written to disk. + */ + if ((args & GET_BLOCK_NO_HOLE)/* && + !PageUptodate(bh_result->b_page)*/) + ret = (ENOENT); + + /* Skip this hole */ + uio->uio_resid -= xfersize; + uio->uio_offset += xfersize; + } + + pathrelse(&path); + return (ret); + } + + reiserfs_log(LOG_DEBUG, "item should be DIRECT\n"); + +#if 0 + /* Requested data are in direct item(s) */ + if (!(args & GET_BLOCK_READ_DIRECT)) { + /* + * We are called by bmap. FIXME: we can not map block of + * file when it is stored in direct item(s) + */ + pathrelse(&path); +#if 0 + if (blk) + kunmap(bh_result->b_page); +#endif + return (ENOENT); + } +#endif + +#if 0 + /* + * If we've got a direct item, and the buffer or page was uptodate, we + * don't want to pull data off disk again. Skip to the end, where we + * map the buffer and return + */ + if (buffer_uptodate(bh_result)) { + goto finished; + } else + /* + * grab_tail_page can trigger calls to reiserfs_get_block + * on up to date pages without any buffers. If the page + * is up to date, we don't want read old data off disk. + * Set the up to date bit on the buffer instead and jump + * to the end + */ + if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { + set_buffer_uptodate(bh_result); + goto finished; + } +#endif + +#if 0 + /* Read file tail into part of page */ + offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); + fs_gen = get_generation(ip->i_reiserfs); + copy_item_head(&tmp_ih, ih); +#endif + +#if 0 + /* + * We only want to kmap if we are reading the tail into the page. this + * is not the common case, so we don't kmap until we are sure we need + * to. But, this means the item might move if kmap schedules + */ + if (!blk) { + blk = (char *)kmap(bh_result->b_page); + if (fs_changed (fs_gen, sbi) && item_moved(&tmp_ih, &path)) + goto research; + } + blk += offset; + memset(blk, 0, sbi->s_blocksize); +#endif + if (!blk) { + reiserfs_log(LOG_DEBUG, "allocating buffer\n"); + blk = malloc(ip->i_size, M_REISERFSNODE, M_WAITOK | M_ZERO); + if (!blk) + return (ENOMEM); + } + /* p += offset; */ + + p = blk; + do { + if (!is_direct_le_ih(ih)) { + reiserfs_log(LOG_ERR, "BUG\n"); + return (ENOENT); /* XXX Wrong error code */ + } + + /* + * Make sure we don't read more bytes than actually exist + * in the file. This can happen in odd cases where i_size + * isn't correct, and when direct item padding results in + * a few extra bytes at the end of the direct item + */ + if ((le_ih_k_offset(ih) + path.pos_in_item) > ip->i_size) + break; + + if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > ip->i_size) { + chars = ip->i_size - (le_ih_k_offset(ih) - 1) - + path.pos_in_item; + done = 1; + } else { + chars = ih_item_len(ih) - path.pos_in_item; + } + reiserfs_log(LOG_DEBUG, "copying %d bytes\n", chars); + memcpy(p, B_I_PITEM(bp, ih) + path.pos_in_item, chars); + if (done) { + reiserfs_log(LOG_DEBUG, "copy done\n"); + break; + } + + p += chars; + + if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bp) - 1)) + /* + * We done, if read direct item is not the last + * item of node + * FIXME: we could try to check right delimiting + * key to see whether direct item continues in + * the right neighbor or rely on i_size + */ + break; + + /* Update key to look for the next piece */ + set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); + if (search_for_position_by_key(sbi, &key, &path) != + POSITION_FOUND) + /* + * We read something from tail, even if now we got + * IO_ERROR + */ + break; + + bp = get_last_bp(&path); + ih = get_ih(&path); + } while (1); + + /* finished: */ + pathrelse(&path); + /* + * This buffer has valid data, but isn't valid for io. mapping it to + * block #0 tells the rest of reiserfs it just has a tail in it + */ + ret = uiomove(blk, ip->i_size, uio); + free(blk, M_REISERFSNODE); + return (ret); +} + +/* + * Compute real number of used bytes by file + * Following three functions can go away when we'll have enough space in + * stat item + */ +static int +real_space_diff(struct reiserfs_node *ip, int sd_size) +{ + int bytes; + off_t blocksize = ip->i_reiserfs->s_blocksize; + + if (S_ISLNK(ip->i_mode) || S_ISDIR(ip->i_mode)) + return (sd_size); + + /* End of file is also in full block with indirect reference, so round + * up to the next block. + * + * There is just no way to know if the tail is actually packed on the + * file, so we have to assume it isn't. When we pack the tail, we add + * 4 bytes to pretend there really is an unformatted node pointer. */ + bytes = ((ip->i_size + (blocksize - 1)) >> + ip->i_reiserfs->s_blocksize_bits) * UNFM_P_SIZE + sd_size; + + return (bytes); +} + +static inline off_t +to_real_used_space(struct reiserfs_node *ip, unsigned long blocks, int sd_size) +{ + + if (S_ISLNK(ip->i_mode) || S_ISDIR(ip->i_mode)) { + return ip->i_size + (off_t)(real_space_diff(ip, sd_size)); + } + + return ((off_t)real_space_diff(ip, sd_size)) + (((off_t)blocks) << 9); +} + +void +inode_set_bytes(struct reiserfs_node *ip, off_t bytes) +{ + + ip->i_blocks = bytes >> 9; + ip->i_bytes = bytes & 511; +} + +/* Called by read_locked_inode */ +static void +init_inode(struct reiserfs_node *ip, struct path *path) +{ + struct buf *bp; + struct item_head *ih; + uint32_t rdev; + + bp = PATH_PLAST_BUFFER(path); + ih = PATH_PITEM_HEAD(path); + + reiserfs_log(LOG_DEBUG, "copy the key (objectid=%d, dirid=%d)\n", + ih->ih_key.k_objectid, ih->ih_key.k_dir_id); + copy_key(INODE_PKEY(ip), &(ih->ih_key)); + /* ip->i_blksize = reiserfs_default_io_size; */ + + reiserfs_log(LOG_DEBUG, "reset some inode structure members\n"); + REISERFS_I(ip)->i_flags = 0; +#if 0 + REISERFS_I(ip)->i_prealloc_block = 0; + REISERFS_I(ip)->i_prealloc_count = 0; + REISERFS_I(ip)->i_trans_id = 0; + REISERFS_I(ip)->i_jl = NULL; + REISERFS_I(ip)->i_acl_access = NULL; + REISERFS_I(ip)->i_acl_default = NULL; +#endif + + if (stat_data_v1(ih)) { + reiserfs_log(LOG_DEBUG, "reiserfs/init_inode: stat data v1\n"); + struct stat_data_v1 *sd; + unsigned long blocks; + + sd = (struct stat_data_v1 *)B_I_PITEM(bp, ih); + + reiserfs_log(LOG_DEBUG, + "reiserfs/init_inode: filling more members\n"); + set_inode_item_key_version(ip, KEY_FORMAT_3_5); + set_inode_sd_version(ip, STAT_DATA_V1); + ip->i_mode = sd_v1_mode(sd); + ip->i_nlink = sd_v1_nlink(sd); + ip->i_uid = sd_v1_uid(sd); + ip->i_gid = sd_v1_gid(sd); + ip->i_size = sd_v1_size(sd); + ip->i_atime.tv_sec = sd_v1_atime(sd); + ip->i_mtime.tv_sec = sd_v1_mtime(sd); + ip->i_ctime.tv_sec = sd_v1_ctime(sd); + ip->i_atime.tv_nsec = 0; + ip->i_ctime.tv_nsec = 0; + ip->i_mtime.tv_nsec = 0; + + reiserfs_log(LOG_DEBUG, " mode = %08x\n", ip->i_mode); + reiserfs_log(LOG_DEBUG, " nlink = %d\n", ip->i_nlink); + reiserfs_log(LOG_DEBUG, " owner = %d:%d\n", ip->i_uid, + ip->i_gid); + reiserfs_log(LOG_DEBUG, " size = %ju\n", + (intmax_t)ip->i_size); + reiserfs_log(LOG_DEBUG, " atime = %jd\n", + (intmax_t)ip->i_atime.tv_sec); + reiserfs_log(LOG_DEBUG, " mtime = %jd\n", + (intmax_t)ip->i_mtime.tv_sec); + reiserfs_log(LOG_DEBUG, " ctime = %jd\n", + (intmax_t)ip->i_ctime.tv_sec); + + ip->i_blocks = sd_v1_blocks(sd); + ip->i_generation = le32toh(INODE_PKEY(ip)->k_dir_id); + blocks = (ip->i_size + 511) >> 9; + blocks = _ROUND_UP(blocks, ip->i_reiserfs->s_blocksize >> 9); + if (ip->i_blocks > blocks) { + /* + * There was a bug in <= 3.5.23 when i_blocks could + * take negative values. Starting from 3.5.17 this + * value could even be stored in stat data. For such + * files we set i_blocks based on file size. Just 2 + * notes: this can be wrong for sparce files. On-disk + * value will be only updated if file's inode will + * ever change. + */ + ip->i_blocks = blocks; + } + + rdev = sd_v1_rdev(sd); + REISERFS_I(ip)->i_first_direct_byte = + sd_v1_first_direct_byte(sd); + + /* + * An early bug in the quota code can give us an odd number + * for the block count. This is incorrect, fix it here. + */ + if (ip->i_blocks & 1) { + ip->i_blocks++ ; + } + inode_set_bytes(ip, to_real_used_space(ip, ip->i_blocks, + SD_V1_SIZE)); + + /* + * nopack is initially zero for v1 objects. For v2 objects, + * nopack is initialised from sd_attrs + */ + REISERFS_I(ip)->i_flags &= ~i_nopack_mask; + reiserfs_log(LOG_DEBUG, "...done\n"); + } else { + reiserfs_log(LOG_DEBUG, "stat data v2\n"); + /* + * New stat data found, but object may have old items + * (directories and symlinks) + */ + struct stat_data *sd = (struct stat_data *)B_I_PITEM(bp, ih); + + reiserfs_log(LOG_DEBUG, "filling more members\n"); + ip->i_mode = sd_v2_mode(sd); + ip->i_nlink = sd_v2_nlink(sd); + ip->i_uid = sd_v2_uid(sd); + ip->i_size = sd_v2_size(sd); + ip->i_gid = sd_v2_gid(sd); + ip->i_mtime.tv_sec = sd_v2_mtime(sd); + ip->i_atime.tv_sec = sd_v2_atime(sd); + ip->i_ctime.tv_sec = sd_v2_ctime(sd); + ip->i_ctime.tv_nsec = 0; + ip->i_mtime.tv_nsec = 0; + ip->i_atime.tv_nsec = 0; + + reiserfs_log(LOG_DEBUG, " mode = %08x\n", ip->i_mode); + reiserfs_log(LOG_DEBUG, " nlink = %d\n", ip->i_nlink); + reiserfs_log(LOG_DEBUG, " owner = %d:%d\n", ip->i_uid, + ip->i_gid); + reiserfs_log(LOG_DEBUG, " size = %ju\n", + (intmax_t)ip->i_size); + reiserfs_log(LOG_DEBUG, " atime = %jd\n", + (intmax_t)ip->i_atime.tv_sec); + reiserfs_log(LOG_DEBUG, " mtime = %jd\n", + (intmax_t)ip->i_mtime.tv_sec); + reiserfs_log(LOG_DEBUG, " ctime = %jd\n", + (intmax_t)ip->i_ctime.tv_sec); + + ip->i_blocks = sd_v2_blocks(sd); + rdev = sd_v2_rdev(sd); + reiserfs_log(LOG_DEBUG, " blocks = %u\n", ip->i_blocks); + + if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) + ip->i_generation = le32toh(INODE_PKEY(ip)->k_dir_id); + else + ip->i_generation = sd_v2_generation(sd); + + if (S_ISDIR(ip->i_mode) || S_ISLNK(ip->i_mode)) + set_inode_item_key_version(ip, KEY_FORMAT_3_5); + else + set_inode_item_key_version(ip, KEY_FORMAT_3_6); + + REISERFS_I(ip)->i_first_direct_byte = 0; + set_inode_sd_version(ip, STAT_DATA_V2); + inode_set_bytes(ip, to_real_used_space(ip, ip->i_blocks, + SD_V2_SIZE)); + + /* + * Read persistent inode attributes from sd and initalise + * generic inode flags from them + */ + REISERFS_I(ip)->i_attrs = sd_v2_attrs(sd); + sd_attrs_to_i_attrs(sd_v2_attrs(sd), ip); + reiserfs_log(LOG_DEBUG, "...done\n"); + } + + pathrelse(path); + if (S_ISREG(ip->i_mode)) { + reiserfs_log(LOG_DEBUG, "this inode is a regular file\n"); + //ip->i_op = &reiserfs_file_ip_operations; + //ip->i_fop = &reiserfs_file_operations; + //ip->i_mapping->a_ops = &reiserfs_address_space_operations ; + } else if (S_ISDIR(ip->i_mode)) { + reiserfs_log(LOG_DEBUG, "this inode is a directory\n"); + //ip->i_op = &reiserfs_dir_ip_operations; + //ip->i_fop = &reiserfs_dir_operations; + } else if (S_ISLNK(ip->i_mode)) { + reiserfs_log(LOG_DEBUG, "this inode is a symlink\n"); + //ip->i_op = &reiserfs_symlink_ip_operations; + //ip->i_mapping->a_ops = &reiserfs_address_space_operations; + } else { + reiserfs_log(LOG_DEBUG, "this inode is something unknown in " + "this universe\n"); + ip->i_blocks = 0; + //ip->i_op = &reiserfs_special_ip_operations; + //init_special_ip(ip, ip->i_mode, new_decode_dev(rdev)); + } +} + +/* + * reiserfs_read_locked_inode is called to read the inode off disk, and + * it does a make_bad_inode when things go wrong. But, we need to make + * sure and clear the key in the private portion of the inode, otherwise + * a corresponding iput might try to delete whatever object the inode + * last represented. + */ +static void +reiserfs_make_bad_inode(struct reiserfs_node *ip) { + + memset(INODE_PKEY(ip), 0, KEY_SIZE); + //make_bad_inode(inode); +} + +void +reiserfs_read_locked_inode(struct reiserfs_node *ip, + struct reiserfs_iget_args *args) +{ + INITIALIZE_PATH(path_to_sd); + struct cpu_key key; + unsigned long dirino; + int retval; + + dirino = args->dirid; + + /* + * Set version 1, version 2 could be used too, because stat data + * key is the same in both versions + */ + key.version = KEY_FORMAT_3_5; + key.on_disk_key.k_dir_id = dirino; + key.on_disk_key.k_objectid = ip->i_number; + key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET; + key.on_disk_key.u.k_offset_v1.k_uniqueness = SD_UNIQUENESS; + + /* Look for the object's stat data */ + retval = search_item(ip->i_reiserfs, &key, &path_to_sd); + if (retval == IO_ERROR) { + reiserfs_log(LOG_ERR, + "I/O failure occured trying to find stat" + "data %u/%u\n", + key.on_disk_key.k_dir_id, key.on_disk_key.k_objectid); + reiserfs_make_bad_inode(ip); + return; + } + if (retval != ITEM_FOUND) { + /* + * A stale NFS handle can trigger this without it being + * an error + */ + reiserfs_log(LOG_ERR, + "item not found (objectid=%u, dirid=%u)\n", + key.on_disk_key.k_objectid, key.on_disk_key.k_dir_id); + pathrelse(&path_to_sd); + reiserfs_make_bad_inode(ip); + ip->i_nlink = 0; + return; + } + + init_inode(ip, &path_to_sd); + + /* + * It is possible that knfsd is trying to access inode of a file + * that is being removed from the disk by some other thread. As + * we update sd on unlink all that is required is to check for + * nlink here. This bug was first found by Sizif when debugging + * SquidNG/Butterfly, forgotten, and found again after Philippe + * Gramoulle reproduced it. + * + * More logical fix would require changes in fs/inode.c:iput() to + * remove inode from hash-table _after_ fs cleaned disk stuff up and + * in iget() to return NULL if I_FREEING inode is found in hash-table. + */ + /* + * Currently there is one place where it's ok to meet inode with + * nlink == 0: processing of open-unlinked and half-truncated files + * during mount (fs/reiserfs/super.c:finish_unfinished()). + */ + if((ip->i_nlink == 0) && + !REISERFS_SB(ip->i_reiserfs)->s_is_unlinked_ok ) { + reiserfs_log(LOG_WARNING, "dead inode read from disk. This is " + "likely to be race with knfsd. Ignore"); + reiserfs_make_bad_inode(ip); + } + + /* Init inode should be relsing */ + reiserfs_check_path(&path_to_sd); +} + +int +reiserfs_iget( + struct mount *mp, const struct cpu_key *key, + struct vnode **vpp, struct thread *td) +{ + int error, flags; + struct cdev *dev; + struct vnode *vp; + struct reiserfs_node *ip; + struct reiserfs_mount *rmp; + + struct reiserfs_iget_args args; + + //restart: + /* Check if the inode cache contains it */ + // XXX LK_EXCLUSIVE ? + flags = LK_EXCLUSIVE; + error = vfs_hash_get(mp, key->on_disk_key.k_objectid, flags, + td, vpp, NULL, NULL); + if (error || *vpp != NULL) + return (error); + + rmp = VFSTOREISERFS(mp); + dev = rmp->rm_dev; + + /* + * If this MALLOC() is performed after the getnewvnode() it might + * block, leaving a vnode with a NULL v_data to be found by + * reiserfs_sync() if a sync happens to fire right then, which + * will cause a panic because reiserfs_sync() blindly dereferences + * vp->v_data (as well it should). + */ + reiserfs_log(LOG_DEBUG, "malloc(struct reiserfs_node)\n"); + ip = malloc(sizeof(struct reiserfs_node), M_REISERFSNODE, + M_WAITOK | M_ZERO); + + /* Allocate a new vnode/inode. */ + reiserfs_log(LOG_DEBUG, "getnewvnode\n"); + if ((error = + getnewvnode("reiserfs", mp, &reiserfs_vnodeops, &vp)) != 0) { + *vpp = NULL; + free(ip, M_REISERFSNODE); + reiserfs_log(LOG_DEBUG, "getnewvnode FAILED\n"); + return (error); + } + + args.dirid = key->on_disk_key.k_dir_id; + args.objectid = key->on_disk_key.k_objectid; + + reiserfs_log(LOG_DEBUG, "filling *ip\n"); + vp->v_data = ip; + ip->i_vnode = vp; + ip->i_dev = dev; + ip->i_number = args.objectid; + ip->i_ino = args.dirid; + ip->i_reiserfs = rmp->rm_reiserfs; + + vp->v_bufobj.bo_ops = &reiserfs_vnbufops; + vp->v_bufobj.bo_private = vp; + + /* If this is the root node, set the VV_ROOT flag */ + if (ip->i_number == REISERFS_ROOT_OBJECTID && + ip->i_ino == REISERFS_ROOT_PARENT_OBJECTID) + vp->v_vflag |= VV_ROOT; + +#if 0 + if (VOP_LOCK(vp, LK_EXCLUSIVE, td) != 0) + panic("reiserfs/iget: unexpected lock failure"); + + /* + * Exclusively lock the vnode before adding to hash. Note, that we + * must not release nor downgrade the lock (despite flags argument + * says) till it is fully initialized. + */ + lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td); +#endif + + error = vfs_hash_insert(vp, key->on_disk_key.k_objectid, flags, + td, vpp, NULL, NULL); + if (error || *vpp != NULL) + return (error); + + /* Read the inode */ + reiserfs_log(LOG_DEBUG, "call reiserfs_read_locked_inode (" + "objectid=%d,dirid=%d)\n", args.objectid, args.dirid); + reiserfs_read_locked_inode(ip, &args); + + ip->i_devvp = rmp->rm_devvp; + VREF(ip->i_devvp); + + switch(vp->v_type = IFTOVT(ip->i_mode)) { + case VBLK: + reiserfs_log(LOG_DEBUG, "vnode type VBLK\n"); + vp->v_op = &reiserfs_specops; + break; +#if 0 + case VCHR: + reiserfs_log(LOG_DEBUG, "vnode type VCHR\n"); + vp->v_op = &reiserfs_specops; + vp = addaliasu(vp, ip->i_rdev); + ip->i_vnode = vp; + break; + case VFIFO: + reiserfs_log(LOG_DEBUG, "vnode type VFIFO\n"); + vp->v_op = reiserfs_fifoop_p; + break; +#endif + default: + break; + } + + *vpp = vp; + return (0); +} + +void +sd_attrs_to_i_attrs(uint16_t sd_attrs, struct reiserfs_node *ip) +{ + + if (reiserfs_attrs(ip->i_reiserfs)) { +#if 0 + if (sd_attrs & REISERFS_SYNC_FL) + ip->i_flags |= S_SYNC; + else + ip->i_flags &= ~S_SYNC; +#endif + if (sd_attrs & REISERFS_IMMUTABLE_FL) + ip->i_flags |= IMMUTABLE; + else + ip->i_flags &= ~IMMUTABLE; + if (sd_attrs & REISERFS_APPEND_FL) + ip->i_flags |= APPEND; + else + ip->i_flags &= ~APPEND; +#if 0 + if (sd_attrs & REISERFS_NOATIME_FL) + ip->i_flags |= S_NOATIME; + else + ip->i_flags &= ~S_NOATIME; + if (sd_attrs & REISERFS_NOTAIL_FL) + REISERFS_I(ip)->i_flags |= i_nopack_mask; + else + REISERFS_I(ip)->i_flags &= ~i_nopack_mask; +#endif + } +} + +void +i_attrs_to_sd_attrs(struct reiserfs_node *ip, uint16_t *sd_attrs) +{ + + if (reiserfs_attrs(ip->i_reiserfs)) { +#if 0 + if (ip->i_flags & S_SYNC) + *sd_attrs |= REISERFS_SYNC_FL; + else + *sd_attrs &= ~REISERFS_SYNC_FL; +#endif + if (ip->i_flags & IMMUTABLE) + *sd_attrs |= REISERFS_IMMUTABLE_FL; + else + *sd_attrs &= ~REISERFS_IMMUTABLE_FL; + if (ip->i_flags & APPEND) + *sd_attrs |= REISERFS_APPEND_FL; + else + *sd_attrs &= ~REISERFS_APPEND_FL; +#if 0 + if (ip->i_flags & S_NOATIME) + *sd_attrs |= REISERFS_NOATIME_FL; + else + *sd_attrs &= ~REISERFS_NOATIME_FL; + if (REISERFS_I(ip)->i_flags & i_nopack_mask) + *sd_attrs |= REISERFS_NOTAIL_FL; + else + *sd_attrs &= ~REISERFS_NOTAIL_FL; +#endif + } +} diff --git a/sys/gnu/reiserfs/reiserfs_item_ops.c b/sys/gnu/reiserfs/reiserfs_item_ops.c new file mode 100644 index 000000000000..18a122d92aeb --- /dev/null +++ b/sys/gnu/reiserfs/reiserfs_item_ops.c @@ -0,0 +1,158 @@ +/*- + * Copyright 2000 Hans Reiser + * See README for licensing and copyright details + * + * Ported to FreeBSD by Jean-Sébastien Pédron + * + * $FreeBSD$ + */ + +#include + +/* ------------------------------------------------------------------- + * Stat data functions + * -------------------------------------------------------------------*/ + +static int +sd_bytes_number(struct item_head *ih, int block_size) +{ + + return (0); +} + +struct item_operations stat_data_ops = { + .bytes_number = sd_bytes_number, + //.decrement_key = sd_decrement_key, + //.is_left_mergeable = sd_is_left_mergeable, + //.print_item = sd_print_item, + //.check_item = sd_check_item, + + //.create_vi = sd_create_vi, + //.check_left = sd_check_left, + //.check_right = sd_check_right, + //.part_size = sd_part_size, + //.unit_num = sd_unit_num, + //.print_vi = sd_print_vi +}; + +/* ------------------------------------------------------------------- + * Direct item functions + * -------------------------------------------------------------------*/ + +static int +direct_bytes_number(struct item_head *ih, int block_size) +{ + + return (ih_item_len(ih)); +} + +struct item_operations direct_ops = { + .bytes_number = direct_bytes_number, + //.decrement_key = direct_decrement_key, + //.is_left_mergeable = direct_is_left_mergeable, + //.print_item = direct_print_item, + //.check_item = direct_check_item, + + //.create_vi = direct_create_vi, + //.check_left = direct_check_left, + //.check_right = direct_check_right, + //.part_size = direct_part_size, + //.unit_num = direct_unit_num, + //.print_vi = direct_print_vi +}; + +/* ------------------------------------------------------------------- + * Indirect item functions + * -------------------------------------------------------------------*/ + +static int +indirect_bytes_number(struct item_head *ih, int block_size) +{ + + return (ih_item_len(ih) / UNFM_P_SIZE * block_size); +} + +struct item_operations indirect_ops = { + .bytes_number = indirect_bytes_number, + //.decrement_key = indirect_decrement_key, + //.is_left_mergeable = indirect_is_left_mergeable, + //.print_item = indirect_print_item, + //.check_item = indirect_check_item, + + //.create_vi = indirect_create_vi, + //.check_left = indirect_check_left, + //.check_right = indirect_check_right, + //.part_size = indirect_part_size, + //.unit_num = indirect_unit_num, + //.print_vi = indirect_print_vi +}; + +/* ------------------------------------------------------------------- + * Direntry functions + * -------------------------------------------------------------------*/ + +static int +direntry_bytes_number(struct item_head *ih, int block_size) +{ + + reiserfs_log(LOG_WARNING, "bytes number is asked for direntry\n"); + return (0); +} + +struct item_operations direntry_ops = { + .bytes_number = direntry_bytes_number, + //.decrement_key = direntry_decrement_key, + //.is_left_mergeable = direntry_is_left_mergeable, + //.print_item = direntry_print_item, + //.check_item = direntry_check_item, + + //.create_vi = direntry_create_vi, + //.check_left = direntry_check_left, + //.check_right = direntry_check_right, + //.part_size = direntry_part_size, + //.unit_num = direntry_unit_num, + //.print_vi = direntry_print_vi +}; + +/* ------------------------------------------------------------------- + * Error catching functions to catch errors caused by incorrect item + * types. + * -------------------------------------------------------------------*/ + +static int +errcatch_bytes_number(struct item_head *ih, int block_size) +{ + + reiserfs_log(LOG_WARNING, "invalid item type observed, run fsck ASAP"); + return (0); +} + +struct item_operations errcatch_ops = { + errcatch_bytes_number, + //errcatch_decrement_key, + //errcatch_is_left_mergeable, + //errcatch_print_item, + //errcatch_check_item, + + //errcatch_create_vi, + //errcatch_check_left, + //errcatch_check_right, + //errcatch_part_size, + //errcatch_unit_num, + //errcatch_print_vi +}; + +#if !(TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && \ + TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3) +#error +#endif + +struct item_operations *item_ops[TYPE_ANY + 1] = { + &stat_data_ops, + &indirect_ops, + &direct_ops, + &direntry_ops, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + &errcatch_ops /* This is to catch errors with invalid type (15th + entry for TYPE_ANY) */ +}; diff --git a/sys/gnu/reiserfs/reiserfs_mount.h b/sys/gnu/reiserfs/reiserfs_mount.h new file mode 100644 index 000000000000..0db218be938e --- /dev/null +++ b/sys/gnu/reiserfs/reiserfs_mount.h @@ -0,0 +1,47 @@ +/* + * Copyright 2000 Hans Reiser + * See README for licensing and copyright details + * + * Ported to FreeBSD by Jean-Sébastien Pédron + * + * $FreeBSD$ + */ + +#ifndef _GNU_REISERFS_REISERFS_MOUNT_H +#define _GNU_REISERFS_REISERFS_MOUNT_H + +#define REISERFS_FOR_FREEBSD_VERSION "0.1.6" + +#if defined(_KERNEL) + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_REISERFSMNT); +MALLOC_DECLARE(M_REISERFSPATH); +MALLOC_DECLARE(M_REISERFSNODE); +MALLOC_DECLARE(M_REISERFSCOOKIES); +#endif + +/* This structure describes the ReiserFS specific mount structure data. */ +struct reiserfs_mount { + struct mount *rm_mountp; + struct cdev *rm_dev; + struct vnode *rm_devvp; + + struct reiserfs_sb_info *rm_reiserfs; + + struct g_consumer *rm_cp; + struct bufobj *rm_bo; +}; + +/* Convert mount ptr to reiserfs_mount ptr. */ +#define VFSTOREISERFS(mp) ((struct reiserfs_mount *)((mp)->mnt_data)) + +#endif /* defined(_KERNEL) */ + +/* Arguments to mount ReiserFS filesystems. */ +struct reiserfs_args { + char *fspec; /* blocks special holding the fs to mount */ + struct export_args export; /* network export information */ +}; + +#endif /* !defined _GNU_REISERFS_REISERFS_MOUNT_H */ diff --git a/sys/gnu/reiserfs/reiserfs_namei.c b/sys/gnu/reiserfs/reiserfs_namei.c new file mode 100644 index 000000000000..77c0d459d640 --- /dev/null +++ b/sys/gnu/reiserfs/reiserfs_namei.c @@ -0,0 +1,699 @@ +/*- + * Copyright 2000 Hans Reiser + * See README for licensing and copyright details + * + * Ported to FreeBSD by Jean-Sébastien Pédron + * + * $FreeBSD$ + */ + +#include + +static int reiserfs_find_entry(struct reiserfs_node *dp, + const char *name, int namelen, + struct path * path_to_entry, struct reiserfs_dir_entry *de); + +MALLOC_DEFINE(M_REISERFSCOOKIES, "ReiserFS cookies", + "ReiserFS VOP_READDIR cookies"); + +/* ------------------------------------------------------------------- + * Lookup functions + * -------------------------------------------------------------------*/ + +int +reiserfs_lookup(struct vop_cachedlookup_args *ap) +{ + int error, retval; + struct vnode *vdp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + + int flags = cnp->cn_flags; + struct thread *td = cnp->cn_thread; + + struct vnode *vp; + struct vnode *pdp; /* Saved dp during symlink work */ + struct reiserfs_node *dp; + struct reiserfs_dir_entry de; + INITIALIZE_PATH(path_to_entry); + + char c = cnp->cn_nameptr[cnp->cn_namelen]; + cnp->cn_nameptr[cnp->cn_namelen] = '\0'; + reiserfs_log(LOG_DEBUG, "looking for `%s', %ld (%s)\n", + cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_pnbuf); + cnp->cn_nameptr[cnp->cn_namelen] = c; + + vp = NULL; + dp = VTOI(vdp); + + if (REISERFS_MAX_NAME(dp->i_reiserfs->s_blocksize) < cnp->cn_namelen) + return (ENAMETOOLONG); + + reiserfs_log(LOG_DEBUG, "searching entry\n"); + de.de_gen_number_bit_string = 0; + retval = reiserfs_find_entry(dp, cnp->cn_nameptr, cnp->cn_namelen, + &path_to_entry, &de); + pathrelse(&path_to_entry); + + if (retval == NAME_FOUND) { + reiserfs_log(LOG_DEBUG, "found\n"); + } else { + reiserfs_log(LOG_DEBUG, "not found\n"); + } + + if (retval == NAME_FOUND) { +#if 0 + /* Hide the .reiserfs_priv directory */ + if (reiserfs_xattrs(dp->i_reiserfs) && + !old_format_only(dp->i_reiserfs) && + REISERFS_SB(dp->i_reiserfs)->priv_root && + REISERFS_SB(dp->i_reiserfs)->priv_root->d_inode && + de.de_objectid == le32toh(INODE_PKEY(REISERFS_SB( + dp->i_reiserfs)->priv_root->d_inode)->k_objectid)) { + return (EACCES); + } +#endif + + reiserfs_log(LOG_DEBUG, "reading vnode\n"); + pdp = vdp; + if (flags & ISDOTDOT) { + VOP_UNLOCK(pdp, 0, td); + error = reiserfs_iget(vdp->v_mount, + (struct cpu_key *)&(de.de_dir_id), &vp, td); + vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, td); + if (error != 0) + return (error); + *vpp = vp; + } else if (de.de_objectid == dp->i_number && + de.de_dir_id == dp->i_ino) { + VREF(vdp); /* We want ourself, ie "." */ + *vpp = vdp; + } else { + if ((error = reiserfs_iget(vdp->v_mount, + (struct cpu_key *)&(de.de_dir_id), &vp, td)) != 0) + return (error); + *vpp = vp; + } + + /* + * Propogate the priv_object flag so we know we're in the + * priv tree + */ + /*if (is_reiserfs_priv_object(dir)) + REISERFS_I(inode)->i_flags |= i_priv_object;*/ + } else { + if (retval == IO_ERROR) { + reiserfs_log(LOG_DEBUG, "IO error\n"); + return (EIO); + } + + return (ENOENT); + } + + /* Insert name into cache if appropriate. */ + if (cnp->cn_flags & MAKEENTRY) + cache_enter(vdp, *vpp, cnp); + + reiserfs_log(LOG_DEBUG, "done\n"); + return (0); +} + +extern struct key MIN_KEY; + +int +reiserfs_readdir(struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + int *a_eofflag; + int *a_ncookies; + u_long **a_cookies; + } */*ap) +{ + int error = 0; + struct dirent dstdp; + struct uio *uio = ap->a_uio; + + off_t next_pos; + struct buf *bp; + struct item_head *ih; + struct cpu_key pos_key; + const struct key *rkey; + struct reiserfs_node *ip; + struct reiserfs_dir_entry de; + INITIALIZE_PATH(path_to_entry); + int entry_num, item_num, search_res; + + /* The NFS part */ + int ncookies = 0; + u_long *cookies = NULL; + + /* + * Form key for search the next directory entry using f_pos field of + * file structure + */ + ip = VTOI(ap->a_vp); + make_cpu_key(&pos_key, + ip, uio->uio_offset ? uio->uio_offset : DOT_OFFSET, + TYPE_DIRENTRY, 3); + next_pos = cpu_key_k_offset(&pos_key); + + reiserfs_log(LOG_DEBUG, "listing entries for " + "(objectid=%d, dirid=%d)\n", + pos_key.on_disk_key.k_objectid, pos_key.on_disk_key.k_dir_id); + reiserfs_log(LOG_DEBUG, "uio_offset = %jd, uio_resid = %d\n", + (intmax_t)uio->uio_offset, uio->uio_resid); + + if (ap->a_ncookies && ap->a_cookies) { + cookies = (u_long *)malloc( + uio->uio_resid / 16 * sizeof(u_long), + M_REISERFSCOOKIES, M_WAITOK); + } + + while (1) { + //research: + /* + * Search the directory item, containing entry with + * specified key + */ + reiserfs_log(LOG_DEBUG, "search directory to read\n"); + search_res = search_by_entry_key(ip->i_reiserfs, &pos_key, + &path_to_entry, &de); + if (search_res == IO_ERROR) { + error = EIO; + goto out; + } + + entry_num = de.de_entry_num; + item_num = de.de_item_num; + bp = de.de_bp; + ih = de.de_ih; + + if (search_res == POSITION_FOUND || + entry_num < I_ENTRY_COUNT(ih)) { + /* + * Go through all entries in the directory item + * beginning from the entry, that has been found. + */ + struct reiserfs_de_head *deh = B_I_DEH(bp, ih) + + entry_num; + + if (ap->a_ncookies == NULL) { + cookies = NULL; + } else { + //ncookies = + } + + reiserfs_log(LOG_DEBUG, + "walking through directory entries\n"); + for (; entry_num < I_ENTRY_COUNT(ih); + entry_num++, deh++) { + int d_namlen; + char *d_name; + off_t d_off; + ino_t d_ino; + + if (!de_visible(deh)) { + /* It is hidden entry */ + continue; + } + + d_namlen = entry_length(bp, ih, entry_num); + d_name = B_I_DEH_ENTRY_FILE_NAME(bp, ih, deh); + if (!d_name[d_namlen - 1]) + d_namlen = strlen(d_name); + reiserfs_log(LOG_DEBUG, " - `%s' (len=%d)\n", + d_name, d_namlen); + + if (d_namlen > REISERFS_MAX_NAME( + ip->i_reiserfs->s_blocksize)) { + /* Too big to send back to VFS */ + continue; + } + +#if 0 + /* Ignore the .reiserfs_priv entry */ + if (reiserfs_xattrs(ip->i_reiserfs) && + !old_format_only(ip->i_reiserfs) && + filp->f_dentry == ip->i_reiserfs->s_root && + REISERFS_SB(ip->i_reiserfs)->priv_root && + REISERFS_SB(ip->i_reiserfs)->priv_root->d_inode && + deh_objectid(deh) == + le32toh(INODE_PKEY(REISERFS_SB( + ip->i_reiserfs)->priv_root->d_inode)->k_objectid)) { + continue; + } +#endif + + d_off = deh_offset(deh); + d_ino = deh_objectid(deh); + uio->uio_offset = d_off; + + /* Copy to user land */ + dstdp.d_fileno = d_ino; + dstdp.d_type = DT_UNKNOWN; + dstdp.d_namlen = d_namlen; + dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp); + bcopy(d_name, dstdp.d_name, dstdp.d_namlen); + bzero(dstdp.d_name + dstdp.d_namlen, + dstdp.d_reclen - + offsetof(struct dirent, d_name) - + dstdp.d_namlen); + + if (d_namlen > 0) { + if (dstdp.d_reclen <= uio->uio_resid) { + reiserfs_log(LOG_DEBUG, " copying to user land\n"); + error = uiomove(&dstdp, + dstdp.d_reclen, uio); + if (error) + goto end; + if (cookies != NULL) { + cookies[ncookies] = + d_off; + ncookies++; + } + } else + break; + } else { + error = EIO; + break; + } + + next_pos = deh_offset(deh) + 1; + } + reiserfs_log(LOG_DEBUG, "...done\n"); + } + + reiserfs_log(LOG_DEBUG, "checking item num (%d == %d ?)\n", + item_num, B_NR_ITEMS(bp) - 1); + if (item_num != B_NR_ITEMS(bp) - 1) { + /* End of directory has been reached */ + reiserfs_log(LOG_DEBUG, "end reached\n"); + if (ap->a_eofflag) + *ap->a_eofflag = 1; + goto end; + } + + /* + * Item we went through is last item of node. Using right + * delimiting key check is it directory end + */ + reiserfs_log(LOG_DEBUG, "get right key\n"); + rkey = get_rkey(&path_to_entry, ip->i_reiserfs); + reiserfs_log(LOG_DEBUG, "right key = (objectid=%d, dirid=%d)\n", + rkey->k_objectid, rkey->k_dir_id); + + reiserfs_log(LOG_DEBUG, "compare it to MIN_KEY\n"); + reiserfs_log(LOG_DEBUG, "MIN KEY = (objectid=%d, dirid=%d)\n", + MIN_KEY.k_objectid, MIN_KEY.k_dir_id); + if (comp_le_keys(rkey, &MIN_KEY) == 0) { + /* Set pos_key to key, that is the smallest and greater + * that key of the last entry in the item */ + reiserfs_log(LOG_DEBUG, "continuing on the right\n"); + set_cpu_key_k_offset(&pos_key, next_pos); + continue; + } + + reiserfs_log(LOG_DEBUG, "compare it to pos_key\n"); + reiserfs_log(LOG_DEBUG, "pos key = (objectid=%d, dirid=%d)\n", + pos_key.on_disk_key.k_objectid, + pos_key.on_disk_key.k_dir_id); + if (COMP_SHORT_KEYS(rkey, &pos_key)) { + /* End of directory has been reached */ + reiserfs_log(LOG_DEBUG, "end reached (right)\n"); + if (ap->a_eofflag) + *ap->a_eofflag = 1; + goto end; + } + + /* Directory continues in the right neighboring block */ + reiserfs_log(LOG_DEBUG, "continuing with a new offset\n"); + set_cpu_key_k_offset(&pos_key, + le_key_k_offset(KEY_FORMAT_3_5, rkey)); + reiserfs_log(LOG_DEBUG, + "new pos key = (objectid=%d, dirid=%d)\n", + pos_key.on_disk_key.k_objectid, + pos_key.on_disk_key.k_dir_id); + } + +end: + uio->uio_offset = next_pos; + pathrelse(&path_to_entry); + reiserfs_check_path(&path_to_entry); +out: + if (error && cookies != NULL) { + free(cookies, M_REISERFSCOOKIES); + } else if (ap->a_ncookies != NULL && ap->a_cookies != NULL) { + *ap->a_ncookies = ncookies; + *ap->a_cookies = cookies; + } + return (error); +} + +/* ------------------------------------------------------------------- + * Functions from linux/fs/reiserfs/namei.c + * -------------------------------------------------------------------*/ + + +/* + * Directory item contains array of entry headers. This performs binary + * search through that array. + */ +static int +bin_search_in_dir_item(struct reiserfs_dir_entry *de, off_t off) +{ + struct item_head *ih = de->de_ih; + struct reiserfs_de_head *deh = de->de_deh; + int rbound, lbound, j; + + lbound = 0; + rbound = I_ENTRY_COUNT(ih) - 1; + + for (j = (rbound + lbound) / 2; lbound <= rbound; + j = (rbound + lbound) / 2) { + if (off < deh_offset(deh + j)) { + rbound = j - 1; + continue; + } + if (off > deh_offset(deh + j)) { + lbound = j + 1; + continue; + } + + /* This is not name found, but matched third key component */ + de->de_entry_num = j; + return (NAME_FOUND); + } + + de->de_entry_num = lbound; + return (NAME_NOT_FOUND); +} + +/* + * Comment? Maybe something like set de to point to what the path + * points to? + */ +static inline void +set_de_item_location(struct reiserfs_dir_entry *de, struct path *path) +{ + + de->de_bp = get_last_bp(path); + de->de_ih = get_ih(path); + de->de_deh = B_I_DEH(de->de_bp, de->de_ih); + de->de_item_num = PATH_LAST_POSITION(path); +} + +/* + * de_bh, de_ih, de_deh (points to first element of array), de_item_num + * is set + */ +inline void +set_de_name_and_namelen(struct reiserfs_dir_entry *de) +{ + struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; + + if (de->de_entry_num >= ih_entry_count(de->de_ih)) { + reiserfs_log(LOG_DEBUG, "BUG\n"); + return; + } + + de->de_entrylen = entry_length(de->de_bp, de->de_ih, de->de_entry_num); + de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0); + de->de_name = B_I_PITEM(de->de_bp, de->de_ih) + deh_location(deh); + if (de->de_name[de->de_namelen - 1] == 0) + de->de_namelen = strlen(de->de_name); +} + +/* What entry points to */ +static inline void +set_de_object_key(struct reiserfs_dir_entry *de) +{ + + if (de->de_entry_num >= ih_entry_count(de->de_ih)) { + reiserfs_log(LOG_DEBUG, "BUG\n"); + return; + } + de->de_dir_id = deh_dir_id(&(de->de_deh[de->de_entry_num])); + de->de_objectid = deh_objectid(&(de->de_deh[de->de_entry_num])); +} + +static inline void +store_de_entry_key(struct reiserfs_dir_entry *de) +{ + struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; + + if (de->de_entry_num >= ih_entry_count(de->de_ih)) { + reiserfs_log(LOG_DEBUG, "BUG\n"); + return; + } + + /* Store key of the found entry */ + de->de_entry_key.version = KEY_FORMAT_3_5; + de->de_entry_key.on_disk_key.k_dir_id = + le32toh(de->de_ih->ih_key.k_dir_id); + de->de_entry_key.on_disk_key.k_objectid = + le32toh(de->de_ih->ih_key.k_objectid); + set_cpu_key_k_offset(&(de->de_entry_key), deh_offset(deh)); + set_cpu_key_k_type(&(de->de_entry_key), TYPE_DIRENTRY); +} + +/* + * We assign a key to each directory item, and place multiple entries in + * a single directory item. A directory item has a key equal to the key + * of the first directory entry in it. + * + * This function first calls search_by_key, then, if item whose first + * entry matches is not found it looks for the entry inside directory + * item found by search_by_key. Fills the path to the entry, and to the + * entry position in the item + */ +int +search_by_entry_key(struct reiserfs_sb_info *sbi, + const struct cpu_key *key, struct path *path, + struct reiserfs_dir_entry *de) +{ + int retval; + + reiserfs_log(LOG_DEBUG, "searching in (objectid=%d,dirid=%d)\n", + key->on_disk_key.k_objectid, key->on_disk_key.k_dir_id); + retval = search_item(sbi, key, path); + switch (retval) { + case ITEM_NOT_FOUND: + if (!PATH_LAST_POSITION(path)) { + reiserfs_log(LOG_DEBUG, + "search_by_key returned item position == 0"); + pathrelse(path); + return (IO_ERROR); + } + PATH_LAST_POSITION(path)--; + reiserfs_log(LOG_DEBUG, "search_by_key did not found it\n"); + break; + case ITEM_FOUND: + reiserfs_log(LOG_DEBUG, "search_by_key found it\n"); + break; + case IO_ERROR: + return (retval); + default: + pathrelse(path); + reiserfs_log(LOG_DEBUG, "no path to here"); + return (IO_ERROR); + } + + reiserfs_log(LOG_DEBUG, "set item location\n"); + set_de_item_location(de, path); + + /* + * Binary search in directory item by third component of the + * key. Sets de->de_entry_num of de + */ + reiserfs_log(LOG_DEBUG, "bin_search_in_dir_item\n"); + retval = bin_search_in_dir_item(de, cpu_key_k_offset(key)); + path->pos_in_item = de->de_entry_num; + if (retval != NAME_NOT_FOUND) { + /* + * Ugly, but rename needs de_bp, de_deh, de_name, de_namelen, + * de_objectid set + */ + set_de_name_and_namelen(de); + set_de_object_key(de); + reiserfs_log(LOG_DEBUG, "set (objectid=%d,dirid=%d)\n", + de->de_objectid, de->de_dir_id); + } + + return (retval); +} + +static uint32_t +get_third_component(struct reiserfs_sb_info *sbi, const char *name, int len) +{ + uint32_t res; + + if (!len || (len == 1 && name[0] == '.')) + return (DOT_OFFSET); + + if (len == 2 && name[0] == '.' && name[1] == '.') + return (DOT_DOT_OFFSET); + + res = REISERFS_SB(sbi)->s_hash_function(name, len); + + /* Take bits from 7-th to 30-th including both bounds */ + res = GET_HASH_VALUE(res); + if (res == 0) + /* + * Needed to have no names before "." and ".." those have hash + * value == 0 and generation counters 1 and 2 accordingly + */ + res = 128; + + return (res + MAX_GENERATION_NUMBER); +} + +static int +reiserfs_match(struct reiserfs_dir_entry *de, const char *name, int namelen) +{ + int retval = NAME_NOT_FOUND; + + if ((namelen == de->de_namelen) && + !memcmp(de->de_name, name, de->de_namelen)) + retval = (de_visible(de->de_deh + de->de_entry_num) ? + NAME_FOUND : NAME_FOUND_INVISIBLE); + + return (retval); +} + +/* + * de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already + * Used when hash collisions exist + */ +static int +linear_search_in_dir_item(struct cpu_key *key, struct reiserfs_dir_entry *de, + const char *name, int namelen) +{ + int i; + int retval; + struct reiserfs_de_head * deh = de->de_deh; + + i = de->de_entry_num; + + if (i == I_ENTRY_COUNT(de->de_ih) || + GET_HASH_VALUE(deh_offset(deh + i)) != + GET_HASH_VALUE(cpu_key_k_offset(key))) { + i--; + } + + /*RFALSE( de->de_deh != B_I_DEH (de->de_bh, de->de_ih), + "vs-7010: array of entry headers not found");*/ + + deh += i; + + for (; i >= 0; i--, deh--) { + if (GET_HASH_VALUE(deh_offset(deh)) != + GET_HASH_VALUE(cpu_key_k_offset(key))) { + /* + * Hash value does not match, no need to check + * whole name + */ + reiserfs_log(LOG_DEBUG, "name `%s' not found\n", name); + return (NAME_NOT_FOUND); + } + + /* Mark that this generation number is used */ + if (de->de_gen_number_bit_string) + set_bit(GET_GENERATION_NUMBER(deh_offset(deh)), + (unsigned long *)de->de_gen_number_bit_string); + + /* Calculate pointer to name and namelen */ + de->de_entry_num = i; + set_de_name_and_namelen(de); + + if ((retval = reiserfs_match(de, name, namelen)) != + NAME_NOT_FOUND) { + /* + * de's de_name, de_namelen, de_recordlen are set. + * Fill the rest: + */ + /* key of pointed object */ + set_de_object_key(de); + store_de_entry_key(de); + + /* retval can be NAME_FOUND or NAME_FOUND_INVISIBLE */ + reiserfs_log(LOG_DEBUG, + "reiserfs_match answered `%d'\n", + retval); + return (retval); + } + } + + if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0) + /* + * We have reached left most entry in the node. In common + * we have to go to the left neighbor, but if generation + * counter is 0 already, we know for sure, that there is + * no name with the same hash value + */ + /* FIXME: this work correctly only because hash value can + * not be 0. Btw, in case of Yura's hash it is probably + * possible, so, this is a bug + */ + return (NAME_NOT_FOUND); + + /*RFALSE(de->de_item_num, + "vs-7015: two diritems of the same directory in one node?");*/ + + return (GOTO_PREVIOUS_ITEM); +} + +/* + * May return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND + * FIXME: should add something like IOERROR + */ +static int +reiserfs_find_entry(struct reiserfs_node *dp, const char *name, int namelen, + struct path * path_to_entry, struct reiserfs_dir_entry *de) +{ + struct cpu_key key_to_search; + int retval; + + if (namelen > REISERFS_MAX_NAME(dp->i_reiserfs->s_blocksize)) + return NAME_NOT_FOUND; + + /* We will search for this key in the tree */ + make_cpu_key(&key_to_search, dp, + get_third_component(dp->i_reiserfs, name, namelen), + TYPE_DIRENTRY, 3); + + while (1) { + reiserfs_log(LOG_DEBUG, "search by entry key\n"); + retval = search_by_entry_key(dp->i_reiserfs, &key_to_search, + path_to_entry, de); + if (retval == IO_ERROR) { + reiserfs_log(LOG_DEBUG, "IO error in %s\n", + __FUNCTION__); + return IO_ERROR; + } + + /* Compare names for all entries having given hash value */ + reiserfs_log(LOG_DEBUG, "linear search for `%s'\n", name); + retval = linear_search_in_dir_item(&key_to_search, de, + name, namelen); + if (retval != GOTO_PREVIOUS_ITEM) { + /* + * There is no need to scan directory anymore. + * Given entry found or does not exist + */ + reiserfs_log(LOG_DEBUG, "linear search returned " + "(objectid=%d,dirid=%d)\n", + de->de_objectid, de->de_dir_id); + path_to_entry->pos_in_item = de->de_entry_num; + return retval; + } + + /* + * There is left neighboring item of this directory and + * given entry can be there + */ + set_cpu_key_k_offset(&key_to_search, + le_ih_k_offset(de->de_ih) - 1); + pathrelse(path_to_entry); + } /* while (1) */ +} diff --git a/sys/gnu/reiserfs/reiserfs_prints.c b/sys/gnu/reiserfs/reiserfs_prints.c new file mode 100644 index 000000000000..54fe28dffb95 --- /dev/null +++ b/sys/gnu/reiserfs/reiserfs_prints.c @@ -0,0 +1,307 @@ +/*- + * Copyright 2000 Hans Reiser + * See README for licensing and copyright details + * + * Ported to FreeBSD by Jean-Sébastien Pédron + * + * $FreeBSD$ + */ + +#include + +#if 0 +static char error_buf[1024]; +static char fmt_buf[1024]; +static char off_buf[80]; + +static char * +reiserfs_cpu_offset(struct cpu_key *key) +{ + + if (cpu_key_k_type(key) == TYPE_DIRENTRY) + sprintf(off_buf, "%Lu(%Lu)", + (unsigned long long)GET_HASH_VALUE(cpu_key_k_offset(key)), + (unsigned long long)GET_GENERATION_NUMBER( + cpu_key_k_offset(key))); + else + sprintf(off_buf, "0x%Lx", + (unsigned long long)cpu_key_k_offset(key)); + + return (off_buf); +} + +static char * +le_offset(struct key *key) +{ + int version; + + version = le_key_version(key); + if (le_key_k_type(version, key) == TYPE_DIRENTRY) + sprintf(off_buf, "%Lu(%Lu)", + (unsigned long long)GET_HASH_VALUE( + le_key_k_offset(version, key)), + (unsigned long long)GET_GENERATION_NUMBER( + le_key_k_offset(version, key))); + else + sprintf(off_buf, "0x%Lx", + (unsigned long long)le_key_k_offset(version, key)); + + return (off_buf); +} + +static char * +cpu_type(struct cpu_key *key) +{ + + if (cpu_key_k_type(key) == TYPE_STAT_DATA) + return ("SD"); + if (cpu_key_k_type(key) == TYPE_DIRENTRY) + return ("DIR"); + if (cpu_key_k_type(key) == TYPE_DIRECT) + return ("DIRECT"); + if (cpu_key_k_type(key) == TYPE_INDIRECT) + return ("IND"); + + return ("UNKNOWN"); +} + +static char * +le_type(struct key *key) +{ + int version; + + version = le_key_version(key); + + if (le_key_k_type(version, key) == TYPE_STAT_DATA) + return ("SD"); + if (le_key_k_type(version, key) == TYPE_DIRENTRY) + return ("DIR"); + if (le_key_k_type(version, key) == TYPE_DIRECT) + return ("DIRECT"); + if (le_key_k_type(version, key) == TYPE_INDIRECT) + return ("IND"); + + return ("UNKNOWN"); +} + +/* %k */ +static void +sprintf_le_key(char *buf, struct key *key) +{ + + if (key) + sprintf(buf, "[%d %d %s %s]", le32toh(key->k_dir_id), + le32toh(key->k_objectid), le_offset(key), le_type(key)); + else + sprintf(buf, "[NULL]"); +} + +/* %K */ +static void +sprintf_cpu_key(char *buf, struct cpu_key *key) +{ + + if (key) + sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id, + key->on_disk_key.k_objectid, reiserfs_cpu_offset (key), + cpu_type (key)); + else + sprintf(buf, "[NULL]"); +} + +static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh) +{ + + if (deh) + sprintf(buf, + "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", + deh_offset(deh), deh_dir_id(deh), + deh_objectid(deh), deh_location(deh), deh_state(deh)); + else + sprintf(buf, "[NULL]"); +} + +static void +sprintf_item_head(char *buf, struct item_head *ih) +{ + + if (ih) { + strcpy(buf, (ih_version(ih) == KEY_FORMAT_3_6) ? + "*3.6* " : "*3.5*"); + sprintf_le_key(buf + strlen(buf), &(ih->ih_key)); + sprintf(buf + strlen(buf), ", item_len %d, item_location %d, " + "free_space(entry_count) %d", + ih_item_len(ih), ih_location(ih), ih_free_space(ih)); + } else + sprintf(buf, "[NULL]"); +} + +static void +sprintf_direntry(char *buf, struct reiserfs_dir_entry *de) +{ + char name[20]; + + memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); + name [de->de_namelen > 19 ? 19 : de->de_namelen] = 0; + sprintf(buf, "\"%s\" ==> [%d %d]", + name, de->de_dir_id, de->de_objectid); +} + +static void +sprintf_block_head(char *buf, struct buf *bp) +{ + + sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ", + B_LEVEL(bp), B_NR_ITEMS(bp), B_FREE_SPACE(bp)); +} + +static void +sprintf_disk_child(char *buf, struct disk_child *dc) +{ + + sprintf (buf, "[dc_number=%d, dc_size=%u]", + dc_block_number(dc), dc_size(dc)); +} + +static char * +is_there_reiserfs_struct (char *fmt, int *what, int *skip) +{ + char *k; + + k = fmt; + *skip = 0; + + while ((k = strchr(k, '%')) != NULL) { + if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' || + k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a' ) { + *what = k[1]; + break; + } + (*skip)++; + k++; + } + + return (k); +} + +static void +prepare_error_buf(const char *fmt, va_list args) +{ + char *fmt1, *k, *p; + int i, j, what, skip; + + fmt1 = fmt_buf; + p = error_buf; + strcpy (fmt1, fmt); + + while ((k = is_there_reiserfs_struct(fmt1, &what, &skip)) != NULL) { + *k = 0; + + p += vsprintf (p, fmt1, args); + + for (i = 0; i < skip; i ++) + j = va_arg(args, int); + + switch (what) { + case 'k': + sprintf_le_key(p, va_arg(args, struct key *)); + break; + case 'K': + sprintf_cpu_key(p, va_arg(args, struct cpu_key *)); + break; + case 'h': + sprintf_item_head(p, va_arg(args, struct item_head *)); + break; + case 't': + sprintf_direntry(p, + va_arg(args, struct reiserfs_dir_entry *)); + break; + case 'y': + sprintf_disk_child(p, + va_arg(args, struct disk_child *)); + break; + case 'z': + sprintf_block_head(p, + va_arg(args, struct buffer_head *)); + break; + case 'a': + sprintf_de_head(p, + va_arg(args, struct reiserfs_de_head *)); + break; + } + + p += strlen(p); + fmt1 = k + 2; + } + + vsprintf(p, fmt1, args); +} + +/* + * In addition to usual conversion specifiers this accepts reiserfs + * specific conversion specifiers: + * %k to print little endian key, + * %K to print cpu key, + * %h to print item_head, + * %t to print directory entry, + * %z to print block head (arg must be struct buf *) + */ + +#define do_reiserfs_warning(fmt) \ +{ \ + va_list args; \ + va_start(args, fmt); \ + prepare_error_buf(fmt, args); \ + va_end(args); \ +} + +void +__reiserfs_log(int level, const char * fmt, ...) +{ + + do_reiserfs_warning(fmt); + log(level, "ReiserFS/%s: %s\n", __FUNCTION__, error_buf); +} + +#endif + +char * +reiserfs_hashname(int code) +{ + + if (code == YURA_HASH) + return ("rupasov"); + if (code == TEA_HASH) + return ("tea"); + if (code == R5_HASH) + return ("r5"); + + return ("unknown"); +} + +void +reiserfs_dump_buffer(caddr_t buf, off_t len) +{ + int i, j; + + log(LOG_DEBUG, "reiserfs: dumping a buffer of %jd bytes\n", + (intmax_t)len); + for (i = 0; i < len; i += 16) { + log(LOG_DEBUG, "%08x: ", i); + for (j = 0; j < 16; j += 2) { + if (i + j >= len) + log(LOG_DEBUG, " "); + else + log(LOG_DEBUG, "%02x%02x ", + buf[i + j] & 0xff, + buf[i + j + 1] & 0xff); + } + for (j = 0; j < 16; ++j) { + if (i + j >= len) + break; + log(LOG_DEBUG, "%c", + isprint(buf[i + j]) ? buf[i + j] : '.'); + } + log(LOG_DEBUG, "\n"); + } +} diff --git a/sys/gnu/reiserfs/reiserfs_stree.c b/sys/gnu/reiserfs/reiserfs_stree.c new file mode 100644 index 000000000000..cf89f1226dfa --- /dev/null +++ b/sys/gnu/reiserfs/reiserfs_stree.c @@ -0,0 +1,760 @@ +/*- + * Copyright 2000 Hans Reiser + * See README for licensing and copyright details + * + * Ported to FreeBSD by Jean-Sébastien Pédron + * + * $FreeBSD$ + */ + +#include + +/* Minimal possible key. It is never in the tree. */ +const struct key MIN_KEY = { + 0, + 0, + { {0, 0}, } +}; + +/* Maximal possible key. It is never in the tree. */ +const struct key MAX_KEY = { + 0xffffffff, + 0xffffffff, + { {0xffffffff, 0xffffffff }, } +}; + +/* Does the buffer contain a disk block which is in the tree. */ +inline int +B_IS_IN_TREE(const struct buf *p_s_bp) +{ + + return (B_LEVEL(p_s_bp) != FREE_LEVEL); +} + +/* To gets item head in le form */ +inline void +copy_item_head(struct item_head *p_v_to, const struct item_head *p_v_from) +{ + + memcpy(p_v_to, p_v_from, IH_SIZE); +} + +/* + * k1 is pointer to on-disk structure which is stored in little-endian + * form. k2 is pointer to cpu variable. For key of items of the same + * object this returns 0. + * Returns: -1 if key1 < key2, 0 if key1 == key2 or 1 if key1 > key2 + */ +/*inline*/ int +comp_short_keys(const struct key *le_key, const struct cpu_key *cpu_key) +{ + const uint32_t *p_s_le_u32, *p_s_cpu_u32; + int n_key_length = REISERFS_SHORT_KEY_LEN; + + p_s_le_u32 = (const uint32_t *)le_key; + p_s_cpu_u32 = (const uint32_t *)&cpu_key->on_disk_key; + for(; n_key_length--; ++p_s_le_u32, ++p_s_cpu_u32) { + if (le32toh(*p_s_le_u32) < *p_s_cpu_u32) + return (-1); + if (le32toh(*p_s_le_u32) > *p_s_cpu_u32) + return (1); + } + + return (0); +} + +/* + * k1 is pointer to on-disk structure which is stored in little-endian + * form. k2 is pointer to cpu variable. Compare keys using all 4 key + * fields. + * Returns: -1 if key1 < key2, 0 if key1 = key2 or 1 if key1 > key2 + */ +/*inline*/ int +comp_keys(const struct key *le_key, const struct cpu_key *cpu_key) +{ + int retval; + + retval = comp_short_keys(le_key, cpu_key); + if (retval) + return retval; + + if (le_key_k_offset(le_key_version(le_key), le_key) < + cpu_key_k_offset(cpu_key)) + return (-1); + if (le_key_k_offset(le_key_version(le_key), le_key) > + cpu_key_k_offset(cpu_key)) + return (1); + + if (cpu_key->key_length == 3) + return (0); + + /* This part is needed only when tail conversion is in progress */ + if (le_key_k_type(le_key_version(le_key), le_key) < + cpu_key_k_type(cpu_key)) + return (-1); + + if (le_key_k_type(le_key_version(le_key), le_key) > + cpu_key_k_type(cpu_key)) + return (1); + + return (0); +} + +/* Release all buffers in the path. */ +void +pathrelse(struct path *p_s_search_path) +{ + struct buf *bp; + int n_path_offset = p_s_search_path->path_length; + + while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { + bp = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--); + free(bp->b_data, M_REISERFSPATH); + free(bp, M_REISERFSPATH); + } + + p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; +} + +/* + * This does not say which one is bigger, it only returns 1 if keys + * are not equal, 0 otherwise + */ +inline int +comp_le_keys(const struct key *k1, const struct key *k2) +{ + + return (memcmp(k1, k2, sizeof(struct key))); +} + +/* + * Binary search toolkit function. Search for an item in the array by + * the item key. + * Returns: 1 if found, 0 if not found; + * *p_n_pos = number of the searched element if found, else the + * number of the first element that is larger than p_v_key. + */ +/* + * For those not familiar with binary search: n_lbound is the leftmost + * item that it could be, n_rbound the rightmost item that it could be. + * We examine the item halfway between n_lbound and n_rbound, and that + * tells us either that we can increase n_lbound, or decrease n_rbound, + * or that we have found it, or if n_lbound <= n_rbound that there are + * no possible items, and we have not found it. With each examination we + * cut the number of possible items it could be by one more than half + * rounded down, or we find it. + */ +inline int +bin_search(const void *p_v_key, /* Key to search for. */ + const void *p_v_base, /* First item in the array. */ + int p_n_num, /* Number of items in the array. */ + int p_n_width, /* Item size in the array. searched. Lest the + reader be confused, note that this is crafted + as a general function, and when it is applied + specifically to the array of item headers in + a node, p_n_width is actually the item header + size not the item size. */ + int *p_n_pos) /* Number of the searched for element. */ +{ + int n_rbound, n_lbound, n_j; + + for (n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0)) / 2; + n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2) { + switch (COMP_KEYS((const struct key *) + ((const char *)p_v_base + n_j * p_n_width), + (const struct cpu_key *)p_v_key)) { + case -1: + n_lbound = n_j + 1; + continue; + case 1: + n_rbound = n_j - 1; + continue; + case 0: + *p_n_pos = n_j; + return (ITEM_FOUND); /* Key found in the array. */ + } + } + + /* + * bin_search did not find given key, it returns position of key, + * that is minimal and greater than the given one. + */ + *p_n_pos = n_lbound; + return (ITEM_NOT_FOUND); +} + +/* + * Get delimiting key of the buffer by looking for it in the buffers in + * the path, starting from the bottom of the path, and going upwards. We + * must check the path's validity at each step. If the key is not in the + * path, there is no delimiting key in the tree (buffer is first or last + * buffer in tree), and in this case we return a special key, either + * MIN_KEY or MAX_KEY. + */ +inline const struct key * +get_lkey(const struct path *p_s_chk_path, + const struct reiserfs_sb_info *p_s_sbi) +{ + struct buf *p_s_parent; + int n_position, n_path_offset = p_s_chk_path->path_length; + + /* While not higher in path than first element. */ + while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { + /* Parent at the path is not in the tree now. */ + if (!B_IS_IN_TREE(p_s_parent = + PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset))) + return (&MAX_KEY); + + /* Check whether position in the parent is correct. */ + if ((n_position = PATH_OFFSET_POSITION(p_s_chk_path, + n_path_offset)) > B_NR_ITEMS(p_s_parent)) + return (&MAX_KEY); + + /* + * Check whether parent at the path really points to + * the child. + */ + if (B_N_CHILD_NUM(p_s_parent, n_position) != + (PATH_OFFSET_PBUFFER(p_s_chk_path, + n_path_offset + 1)->b_blkno + / btodb(p_s_sbi->s_blocksize))) + return (&MAX_KEY); + + /* + * Return delimiting key if position in the parent is not + * equal to zero. + */ + if (n_position) + return (B_N_PDELIM_KEY(p_s_parent, n_position - 1)); + } + + /* Return MIN_KEY if we are in the root of the buffer tree. */ + if ((PATH_OFFSET_PBUFFER(p_s_chk_path, + FIRST_PATH_ELEMENT_OFFSET)->b_blkno + / btodb(p_s_sbi->s_blocksize)) == SB_ROOT_BLOCK(p_s_sbi)) + return (&MIN_KEY); + + return (&MAX_KEY); +} + +/* Get delimiting key of the buffer at the path and its right neighbor. */ +inline const struct key * +get_rkey(const struct path *p_s_chk_path, + const struct reiserfs_sb_info *p_s_sbi) +{ + struct buf *p_s_parent; + int n_position, n_path_offset = p_s_chk_path->path_length; + + while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { + /* Parent at the path is not in the tree now. */ + if (!B_IS_IN_TREE(p_s_parent = + PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset))) + return (&MIN_KEY); + + /* Check whether position in the parent is correct. */ + if ((n_position = PATH_OFFSET_POSITION(p_s_chk_path, + n_path_offset)) > + B_NR_ITEMS(p_s_parent)) + return (&MIN_KEY); + + /* + * Check whether parent at the path really points to the + * child. + */ + if (B_N_CHILD_NUM(p_s_parent, n_position) != + (PATH_OFFSET_PBUFFER(p_s_chk_path, + n_path_offset + 1)->b_blkno + / btodb(p_s_sbi->s_blocksize))) + return (&MIN_KEY); + + /* + * Return delimiting key if position in the parent is not + * the last one. + */ + if (n_position != B_NR_ITEMS(p_s_parent)) + return (B_N_PDELIM_KEY(p_s_parent, n_position)); + } + + /* Return MAX_KEY if we are in the root of the buffer tree. */ + if ((PATH_OFFSET_PBUFFER(p_s_chk_path, + FIRST_PATH_ELEMENT_OFFSET)->b_blkno + / btodb(p_s_sbi->s_blocksize)) == SB_ROOT_BLOCK(p_s_sbi)) + return (&MAX_KEY); + + return (&MIN_KEY); +} + +int +reiserfs_check_path(struct path *p) +{ + + if (p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET) + reiserfs_log(LOG_WARNING, "path not properly relsed\n"); + return (0); +} + +/* + * Check whether a key is contained in the tree rooted from a buffer at + * a path. This works by looking at the left and right delimiting keys + * for the buffer in the last path_element in the path. These delimiting + * keys are stored at least one level above that buffer in the tree. + * If the buffer is the first or last node in the tree order then one + * of the delimiting keys may be absent, and in this case get_lkey and + * get_rkey return a special key which is MIN_KEY or MAX_KEY. + */ +static inline int +key_in_buffer( + struct path *p_s_chk_path, /* Path which should be checked. */ + const struct cpu_key *p_s_key, /* Key which should be checked. */ + struct reiserfs_sb_info *p_s_sbi) /* Super block pointer. */ +{ + + if (COMP_KEYS(get_lkey(p_s_chk_path, p_s_sbi), p_s_key) == 1) + /* left delimiting key is bigger, that the key we look for */ + return (0); + + if (COMP_KEYS(get_rkey(p_s_chk_path, p_s_sbi), p_s_key) != 1) + /* p_s_key must be less than right delimitiing key */ + return (0); + + return (1); +} + +#if 0 +/* XXX Il ne semble pas y avoir de compteur de référence dans struct buf */ +inline void +decrement_bcount(struct buf *p_s_bp) +{ + + if (p_s_bp) { + if (atomic_read(&(p_s_bp->b_count))) { + put_bh(p_s_bp); + return; + } + } +} +#endif + +/* Decrement b_count field of the all buffers in the path. */ +void +decrement_counters_in_path(struct path *p_s_search_path) +{ + + pathrelse(p_s_search_path); +#if 0 + int n_path_offset = p_s_search_path->path_length; + + while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { + struct buf *bp; + + bp = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--); + decrement_bcount(bp); + } + + p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; +#endif +} + +static int +is_leaf(char *buf, int blocksize, struct buf *bp) +{ + struct item_head *ih; + struct block_head *blkh; + int used_space, prev_location, i, nr; + + blkh = (struct block_head *)buf; + if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) { + reiserfs_log(LOG_WARNING, "this should be caught earlier"); + return (0); + } + + nr = blkh_nr_item(blkh); + if (nr < 1 || nr > + ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) { + /* Item number is too big or too small */ + reiserfs_log(LOG_WARNING, "nr_item seems wrong\n"); + return (0); + } + + ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; + used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); + if (used_space != blocksize - blkh_free_space(blkh)) { + /* + * Free space does not match to calculated amount of + * use space + */ + reiserfs_log(LOG_WARNING, "free space seems wrong\n"); + return (0); + } + + /* FIXME: it is_leaf will hit performance too much - we may have + * return 1 here */ + + /* Check tables of item heads */ + ih = (struct item_head *)(buf + BLKH_SIZE); + prev_location = blocksize; + for (i = 0; i < nr; i++, ih++) { + if (le_ih_k_type(ih) == TYPE_ANY) { + reiserfs_log(LOG_WARNING, + "wrong item type for item\n"); + return (0); + } + if (ih_location(ih) >= blocksize || + ih_location(ih) < IH_SIZE * nr) { + reiserfs_log(LOG_WARNING, + "item location seems wrong\n"); + return (0); + } + if (ih_item_len(ih) < 1 || + ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) { + reiserfs_log(LOG_WARNING, "item length seems wrong\n"); + return (0); + } + if (prev_location - ih_location(ih) != ih_item_len(ih)) { + reiserfs_log(LOG_WARNING, + "item location seems wrong (second one)\n"); + return (0); + } + prev_location = ih_location(ih); + } + + /* One may imagine much more checks */ + return 1; +} + +/* Returns 1 if buf looks like an internal node, 0 otherwise */ +static int +is_internal(char *buf, int blocksize, struct buf *bp) +{ + int nr, used_space; + struct block_head *blkh; + + blkh = (struct block_head *)buf; + nr = blkh_level(blkh); + if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) { + /* This level is not possible for internal nodes */ + reiserfs_log(LOG_WARNING, "this should be caught earlier\n"); + return (0); + } + + nr = blkh_nr_item(blkh); + if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { + /* + * For internal which is not root we might check min + * number of keys + */ + reiserfs_log(LOG_WARNING, "number of key seems wrong\n"); + return (0); + } + + used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); + if (used_space != blocksize - blkh_free_space(blkh)) { + reiserfs_log(LOG_WARNING, + "is_internal: free space seems wrong\n"); + return (0); + } + + /* One may imagine much more checks */ + return (1); +} + +/* + * Make sure that bh contains formatted node of reiserfs tree of + * 'level'-th level + */ +static int +is_tree_node(struct buf *bp, int level) +{ + if (B_LEVEL(bp) != level) { + reiserfs_log(LOG_WARNING, + "node level (%d) doesn't match to the " + "expected one (%d)\n", B_LEVEL (bp), level); + return (0); + } + + if (level == DISK_LEAF_NODE_LEVEL) + return (is_leaf(bp->b_data, bp->b_bcount, bp)); + + return (is_internal(bp->b_data, bp->b_bcount, bp)); +} + +int +search_by_key(struct reiserfs_sb_info *p_s_sbi, + const struct cpu_key * p_s_key, /* Key to search. */ + struct path * p_s_search_path, /* This structure was allocated and + initialized by the calling function. + It is filled up by this function. */ + int n_stop_level) /* How far down the tree to search. To + stop at leaf level - set to + DISK_LEAF_NODE_LEVEL */ +{ + int error; + int n_node_level, n_retval; + int n_block_number, expected_level, fs_gen; + struct path_element *p_s_last_element; + struct buf *p_s_bp, *tmp_bp; + + /* + * As we add each node to a path we increase its count. This means that + * we must be careful to release all nodes in a path before we either + * discard the path struct or re-use the path struct, as we do here. + */ + decrement_counters_in_path(p_s_search_path); + + /* + * With each iteration of this loop we search through the items in the + * current node, and calculate the next current node(next path element) + * for the next iteration of this loop... + */ + n_block_number = SB_ROOT_BLOCK(p_s_sbi); + expected_level = -1; + + reiserfs_log(LOG_DEBUG, "root block: #%d\n", n_block_number); + + while (1) { + /* Prep path to have another element added to it. */ + reiserfs_log(LOG_DEBUG, "path element #%d\n", + p_s_search_path->path_length); + p_s_last_element = PATH_OFFSET_PELEMENT(p_s_search_path, + ++p_s_search_path->path_length); + fs_gen = get_generation(p_s_sbi); + + /* + * Read the next tree node, and set the last element in the + * path to have a pointer to it. + */ + reiserfs_log(LOG_DEBUG, "reading block #%d\n", + n_block_number); + if ((error = bread(p_s_sbi->s_devvp, + n_block_number * btodb(p_s_sbi->s_blocksize), + p_s_sbi->s_blocksize, NOCRED, &tmp_bp)) != 0) { + reiserfs_log(LOG_DEBUG, "error reading block\n"); + p_s_search_path->path_length--; + pathrelse(p_s_search_path); + return (IO_ERROR); + } + reiserfs_log(LOG_DEBUG, "blkno = %ju, lblkno = %ju\n", + (intmax_t)tmp_bp->b_blkno, (intmax_t)tmp_bp->b_lblkno); + + /* + * As i didn't found a way to handle the lock correctly, + * i copy the data into a fake buffer + */ + reiserfs_log(LOG_DEBUG, "allocating p_s_bp\n"); + p_s_bp = malloc(sizeof *p_s_bp, M_REISERFSPATH, M_WAITOK); + if (!p_s_bp) { + reiserfs_log(LOG_DEBUG, "error allocating memory\n"); + p_s_search_path->path_length--; + pathrelse(p_s_search_path); + brelse(tmp_bp); + return (IO_ERROR); + } + reiserfs_log(LOG_DEBUG, "copying struct buf\n"); + bcopy(tmp_bp, p_s_bp, sizeof(struct buf)); + + reiserfs_log(LOG_DEBUG, "allocating p_s_bp->b_data\n"); + p_s_bp->b_data = malloc(p_s_sbi->s_blocksize, + M_REISERFSPATH, M_WAITOK); + if (!p_s_bp->b_data) { + reiserfs_log(LOG_DEBUG, "error allocating memory\n"); + p_s_search_path->path_length--; + pathrelse(p_s_search_path); + free(p_s_bp, M_REISERFSPATH); + brelse(tmp_bp); + return (IO_ERROR); + } + reiserfs_log(LOG_DEBUG, "copying buffer data\n"); + bcopy(tmp_bp->b_data, p_s_bp->b_data, p_s_sbi->s_blocksize); + brelse(tmp_bp); + tmp_bp = NULL; + + reiserfs_log(LOG_DEBUG, "...done\n"); + p_s_last_element->pe_buffer = p_s_bp; + + if (expected_level == -1) + expected_level = SB_TREE_HEIGHT(p_s_sbi); + expected_level--; + reiserfs_log(LOG_DEBUG, "expected level: %d (%d)\n", + expected_level, SB_TREE_HEIGHT(p_s_sbi)); + + /* XXX */ + /* + * It is possible that schedule occurred. We must check + * whether the key to search is still in the tree rooted + * from the current buffer. If not then repeat search + * from the root. + */ + if (fs_changed(fs_gen, p_s_sbi) && + (!B_IS_IN_TREE(p_s_bp) || + B_LEVEL(p_s_bp) != expected_level || + !key_in_buffer(p_s_search_path, p_s_key, p_s_sbi))) { + reiserfs_log(LOG_DEBUG, + "the key isn't in the tree anymore\n"); + decrement_counters_in_path(p_s_search_path); + + /* + * Get the root block number so that we can repeat + * the search starting from the root. + */ + n_block_number = SB_ROOT_BLOCK(p_s_sbi); + expected_level = -1; + + /* Repeat search from the root */ + continue; + } + + /* + * Make sure, that the node contents look like a node of + * certain level + */ + if (!is_tree_node(p_s_bp, expected_level)) { + reiserfs_log(LOG_WARNING, + "invalid format found in block %ju. Fsck?", + (intmax_t)p_s_bp->b_blkno); + pathrelse (p_s_search_path); + return (IO_ERROR); + } + + /* Ok, we have acquired next formatted node in the tree */ + n_node_level = B_LEVEL(p_s_bp); + reiserfs_log(LOG_DEBUG, "block info:\n"); + reiserfs_log(LOG_DEBUG, " node level: %d\n", + n_node_level); + reiserfs_log(LOG_DEBUG, " nb of items: %d\n", + B_NR_ITEMS(p_s_bp)); + reiserfs_log(LOG_DEBUG, " free space: %d bytes\n", + B_FREE_SPACE(p_s_bp)); + reiserfs_log(LOG_DEBUG, "bin_search with :\n" + " p_s_key = (objectid=%d, dirid=%d)\n" + " B_NR_ITEMS(p_s_bp) = %d\n" + " p_s_last_element->pe_position = %d (path_length = %d)\n", + p_s_key->on_disk_key.k_objectid, + p_s_key->on_disk_key.k_dir_id, + B_NR_ITEMS(p_s_bp), + p_s_last_element->pe_position, + p_s_search_path->path_length); + n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(p_s_bp, 0), + B_NR_ITEMS(p_s_bp), + (n_node_level == DISK_LEAF_NODE_LEVEL) ? IH_SIZE : KEY_SIZE, + &(p_s_last_element->pe_position)); + reiserfs_log(LOG_DEBUG, "bin_search result: %d\n", + n_retval); + if (n_node_level == n_stop_level) { + reiserfs_log(LOG_DEBUG, "stop level reached (%s)\n", + n_retval == ITEM_FOUND ? "found" : "not found"); + return (n_retval); + } + + /* We are not in the stop level */ + if (n_retval == ITEM_FOUND) + /* + * Item has been found, so we choose the pointer + * which is to the right of the found one + */ + p_s_last_element->pe_position++; + + /* + * If item was not found we choose the position which is + * to the left of the found item. This requires no code, + * bin_search did it already. + */ + + /* + * So we have chosen a position in the current node which + * is an internal node. Now we calculate child block number + * by position in the node. + */ + n_block_number = B_N_CHILD_NUM(p_s_bp, + p_s_last_element->pe_position); + } + + reiserfs_log(LOG_DEBUG, "done\n"); + return (0); +} + +/* + * Form the path to an item and position in this item which contains + * file byte defined by p_s_key. If there is no such item corresponding + * to the key, we point the path to the item with maximal key less than + * p_s_key, and *p_n_pos_in_item is set to one past the last entry/byte + * in the item. If searching for entry in a directory item, and it is + * not found, *p_n_pos_in_item is set to one entry more than the entry + * with maximal key which is less than the sought key. + * + * Note that if there is no entry in this same node which is one more, + * then we point to an imaginary entry. For direct items, the position + * is in units of bytes, for indirect items the position is in units + * of blocknr entries, for directory items the position is in units of + * directory entries. + */ + +/* The function is NOT SCHEDULE-SAFE! */ +int +search_for_position_by_key(struct reiserfs_sb_info *p_s_sbi, + const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */ + struct path *p_s_search_path) /* Filled up by this function. */ +{ + int retval, n_blk_size; + off_t item_offset, offset; + struct item_head *p_le_ih; /* Pointer to on-disk structure */ + struct reiserfs_dir_entry de; + + /* If searching for directory entry. */ + if (is_direntry_cpu_key(p_cpu_key)) + return (search_by_entry_key(p_s_sbi, p_cpu_key, + p_s_search_path, &de)); + + /* If not searching for directory entry. */ + + /* If item is found. */ + retval = search_item(p_s_sbi, p_cpu_key, p_s_search_path); + if (retval == IO_ERROR) + return (retval); + if (retval == ITEM_FOUND) { + if (ih_item_len(B_N_PITEM_HEAD( + PATH_PLAST_BUFFER(p_s_search_path), + PATH_LAST_POSITION(p_s_search_path))) == 0) { + reiserfs_log(LOG_WARNING, "item length equals zero\n"); + } + + pos_in_item(p_s_search_path) = 0; + return (POSITION_FOUND); + } + + if (PATH_LAST_POSITION(p_s_search_path) == 0) { + reiserfs_log(LOG_WARNING, "position equals zero\n"); + } + + /* Item is not found. Set path to the previous item. */ + p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), + --PATH_LAST_POSITION(p_s_search_path)); + n_blk_size = p_s_sbi->s_blocksize; + + if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) { + return (FILE_NOT_FOUND); + } + + item_offset = le_ih_k_offset(p_le_ih); + offset = cpu_key_k_offset(p_cpu_key); + + /* Needed byte is contained in the item pointed to by the path.*/ + if (item_offset <= offset && + item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) { + pos_in_item(p_s_search_path) = offset - item_offset; + if (is_indirect_le_ih(p_le_ih)) { + pos_in_item(p_s_search_path) /= n_blk_size; + } + return (POSITION_FOUND); + } + + /* Needed byte is not contained in the item pointed to by the + * path. Set pos_in_item out of the item. */ + if (is_indirect_le_ih(p_le_ih)) + pos_in_item(p_s_search_path) = + ih_item_len(p_le_ih) / UNFM_P_SIZE; + else + pos_in_item(p_s_search_path) = + ih_item_len(p_le_ih); + + return (POSITION_NOT_FOUND); +} diff --git a/sys/gnu/reiserfs/reiserfs_vfsops.c b/sys/gnu/reiserfs/reiserfs_vfsops.c new file mode 100644 index 000000000000..40675fcd9947 --- /dev/null +++ b/sys/gnu/reiserfs/reiserfs_vfsops.c @@ -0,0 +1,1186 @@ +/*- + * Copyright 2000 Hans Reiser + * See README for licensing and copyright details + * + * Ported to FreeBSD by Jean-Sébastien Pédron + * + * $FreeBSD$ + */ + +#include + +const char reiserfs_3_5_magic_string[] = REISERFS_SUPER_MAGIC_STRING; +const char reiserfs_3_6_magic_string[] = REISER2FS_SUPER_MAGIC_STRING; +const char reiserfs_jr_magic_string[] = REISER2FS_JR_SUPER_MAGIC_STRING; + +/* + * Default recommended I/O size is 128k. There might be broken + * applications that are confused by this. Use nolargeio mount option to + * get usual i/o size = PAGE_SIZE. + */ +int reiserfs_default_io_size = 128 * 1024; + +static vfs_cmount_t reiserfs_cmount; +static vfs_fhtovp_t reiserfs_fhtovp; +static vfs_mount_t reiserfs_mount; +static vfs_root_t reiserfs_root; +static vfs_statfs_t reiserfs_statfs; +static vfs_unmount_t reiserfs_unmount; +static vfs_vptofh_t reiserfs_vptofh; + +static int reiserfs_mountfs(struct vnode *devvp, struct mount *mp, + struct thread *td); +static void load_bitmap_info_data(struct reiserfs_sb_info *sbi, + struct reiserfs_bitmap_info *bi); +static int read_bitmaps(struct reiserfs_mount *rmp); +static int read_old_bitmaps(struct reiserfs_mount *rmp); +static int read_super_block(struct reiserfs_mount *rmp, int offset); +static hashf_t hash_function(struct reiserfs_mount *rmp); + +static int get_root_node(struct reiserfs_mount *rmp, + struct reiserfs_node **root); +uint32_t find_hash_out(struct reiserfs_mount *rmp); + +MALLOC_DEFINE(M_REISERFSMNT, "ReiserFS mount", "ReiserFS mount structure"); +MALLOC_DEFINE(M_REISERFSPATH, "ReiserFS path", "ReiserFS path structure"); +MALLOC_DEFINE(M_REISERFSNODE, "ReiserFS node", "ReiserFS vnode private part"); + +/* ------------------------------------------------------------------- + * VFS operations + * -------------------------------------------------------------------*/ + +static int +reiserfs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td) +{ + struct reiserfs_args args; + int error; + + error = copyin(data, &args, sizeof(args)); + if (error) + return (error); + + ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN); + ma = mount_arg(ma, "export", &args.export, sizeof args.export); + + error = kernel_mount(ma, flags); + + return (error); +} + +/* + * Mount system call + */ +static int +reiserfs_mount(struct mount *mp, struct thread *td) +{ + size_t size; + int error, len; + mode_t accessmode; + char *path, *fspec; + struct vnode *devvp; + struct vfsoptlist *opts; + struct export_args *export; + struct reiserfs_mount *rmp; + struct reiserfs_sb_info *sbi; + struct nameidata nd, *ndp = &nd; + + if (!(mp->mnt_flag & MNT_RDONLY)) + return EROFS; + + /* Get the new options passed to mount */ + opts = mp->mnt_optnew; + + /* `fspath' contains the mount point (eg. /mnt/linux); REQUIRED */ + vfs_getopt(opts, "fspath", (void **)&path, NULL); + reiserfs_log(LOG_INFO, "mount point is `%s'\n", path); + + /* `from' contains the device name (eg. /dev/ad0s1); REQUIRED */ + fspec = NULL; + error = vfs_getopt(opts, "from", (void **)&fspec, &len); + if (!error && fspec[len - 1] != '\0') + return (EINVAL); + reiserfs_log(LOG_INFO, "device is `%s'\n", fspec); + + /* Handle MNT_UPDATE (mp->mnt_flag) */ + if (mp->mnt_flag & MNT_UPDATE) { + /* For now, only NFS export is supported. */ + error = vfs_getopt(opts, "export", (void **)&export, &len); + if (error == 0 && len == sizeof(*export) && export->ex_flags) + return (vfs_export(mp, export)); + } + + /* Not an update, or updating the name: look up the name + * and verify that it refers to a sensible disk device. */ + if (fspec == NULL) + return (EINVAL); + + NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td); + if ((error = namei(ndp)) != 0) + return (error); + NDFREE(ndp, NDF_ONLY_PNBUF); + devvp = ndp->ni_vp; + + if (!vn_isdisk(devvp, &error)) { + vrele(devvp); + return (error); + } + + /* If mount by non-root, then verify that user has necessary + * permissions on the device. */ + if (suser(td)) { + accessmode = VREAD; + if ((mp->mnt_flag & MNT_RDONLY) == 0) + accessmode |= VWRITE; + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); + if ((error = VOP_ACCESS(devvp, + accessmode, td->td_ucred, td)) != 0) { + vput(devvp); + return (error); + } + VOP_UNLOCK(devvp, 0, td); + } + + if ((mp->mnt_flag & MNT_UPDATE) == 0) { + error = reiserfs_mountfs(devvp, mp, td); + } else { + /* TODO Handle MNT_UPDATE */ + error = (EOPNOTSUPP); + } + + if (error) { + vrele(devvp); + return (error); + } + + rmp = VFSTOREISERFS(mp); + sbi = rmp->rm_reiserfs; + + /* + * Note that this strncpy() is ok because of a check at the start + * of reiserfs_mount(). + */ + reiserfs_log(LOG_DEBUG, "prepare statfs data\n"); + (void)copystr(fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void)reiserfs_statfs(mp, &mp->mnt_stat, td); + + reiserfs_log(LOG_DEBUG, "done\n"); + return (0); +} + +/* + * Unmount system call + */ +static int +reiserfs_unmount(struct mount *mp, int mntflags, struct thread *td) +{ + int error, flags = 0; + struct reiserfs_mount *rmp; + struct reiserfs_sb_info *sbi; + + reiserfs_log(LOG_DEBUG, "get private data\n"); + rmp = VFSTOREISERFS(mp); + sbi = rmp->rm_reiserfs; + + /* Flangs handling */ + reiserfs_log(LOG_DEBUG, "handle mntflags\n"); + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + + /* Flush files -> vflush */ + reiserfs_log(LOG_DEBUG, "flush vnodes\n"); + if ((error = vflush(mp, 0, flags, td))) + return (error); + + /* XXX Super block update */ + + if (sbi) { + if (SB_AP_BITMAP(sbi)) { + int i; + reiserfs_log(LOG_DEBUG, + "release bitmap buffers (total: %d)\n", + SB_BMAP_NR(sbi)); + for (i = 0; i < SB_BMAP_NR(sbi); i++) { + if (SB_AP_BITMAP(sbi)[i].bp_data) { + free(SB_AP_BITMAP(sbi)[i].bp_data, + M_REISERFSMNT); + SB_AP_BITMAP(sbi)[i].bp_data = NULL; + } + } + + reiserfs_log(LOG_DEBUG, "free bitmaps structure\n"); + free(SB_AP_BITMAP(sbi), M_REISERFSMNT); + SB_AP_BITMAP(sbi) = NULL; + } + + if (sbi->s_rs) { + reiserfs_log(LOG_DEBUG, "free super block data\n"); + free(sbi->s_rs, M_REISERFSMNT); + sbi->s_rs = NULL; + } + } + + reiserfs_log(LOG_DEBUG, "close device\n"); +#if defined(si_mountpoint) + rmp->rm_devvp->v_rdev->si_mountpoint = NULL; +#endif + + DROP_GIANT(); + g_topology_lock(); + g_wither_geom_close(rmp->rm_cp->geom, ENXIO); + g_topology_unlock(); + PICKUP_GIANT(); + vrele(rmp->rm_devvp); + + if (sbi) { + reiserfs_log(LOG_DEBUG, "free sbi\n"); + free(sbi, M_REISERFSMNT); + sbi = rmp->rm_reiserfs = NULL; + } + if (rmp) { + reiserfs_log(LOG_DEBUG, "free rmp\n"); + free(rmp, M_REISERFSMNT); + rmp = NULL; + } + + mp->mnt_data = (qaddr_t)0; + mp->mnt_flag &= ~MNT_LOCAL; + + reiserfs_log(LOG_DEBUG, "done\n"); + return (error); +} + +/* + * Return the root of a filesystem. + */ +static int +reiserfs_root(struct mount *mp, int flags, struct vnode **vpp, + struct thread *td) +{ + int error; + struct vnode *vp; + struct cpu_key rootkey; + + rootkey.on_disk_key.k_dir_id = REISERFS_ROOT_PARENT_OBJECTID; + rootkey.on_disk_key.k_objectid = REISERFS_ROOT_OBJECTID; + + error = reiserfs_iget(mp, &rootkey, &vp, td); + + if (error == 0) + *vpp = vp; + return (error); +} + +/* + * The statfs syscall + */ +static int +reiserfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) +{ + struct reiserfs_mount *rmp; + struct reiserfs_sb_info *sbi; + struct reiserfs_super_block *rs; + + reiserfs_log(LOG_DEBUG, "get private data\n"); + rmp = VFSTOREISERFS(mp); + sbi = rmp->rm_reiserfs; + rs = sbi->s_rs; + + reiserfs_log(LOG_DEBUG, "fill statfs structure\n"); + sbp->f_bsize = sbi->s_blocksize; + sbp->f_iosize = sbp->f_bsize; + sbp->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1; + sbp->f_bfree = sb_free_blocks(rs); + sbp->f_bavail = sbp->f_bfree; + sbp->f_files = 0; + sbp->f_ffree = 0; + reiserfs_log(LOG_DEBUG, " block size = %ju\n", + (intmax_t)sbp->f_bsize); + reiserfs_log(LOG_DEBUG, " IO size = %ju\n", + (intmax_t)sbp->f_iosize); + reiserfs_log(LOG_DEBUG, " block count = %ju\n", + (intmax_t)sbp->f_blocks); + reiserfs_log(LOG_DEBUG, " free blocks = %ju\n", + (intmax_t)sbp->f_bfree); + reiserfs_log(LOG_DEBUG, " avail blocks = %ju\n", + (intmax_t)sbp->f_bavail); + reiserfs_log(LOG_DEBUG, "...done\n"); + + if (sbp != &mp->mnt_stat) { + reiserfs_log(LOG_DEBUG, "copying monut point info\n"); + sbp->f_type = mp->mnt_vfc->vfc_typenum; + bcopy((caddr_t)mp->mnt_stat.f_mntonname, + (caddr_t)&sbp->f_mntonname[0], MNAMELEN); + bcopy((caddr_t)mp->mnt_stat.f_mntfromname, + (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); + reiserfs_log(LOG_DEBUG, " mount from: %s\n", + sbp->f_mntfromname); + reiserfs_log(LOG_DEBUG, " mount on: %s\n", + sbp->f_mntonname); + reiserfs_log(LOG_DEBUG, "...done\n"); + } + + return (0); +} + +/* + * File handle to vnode + * + * Have to be really careful about stale file handles: + * - check that the inode key is valid + * - call ffs_vget() to get the locked inode + * - check for an unallocated inode (i_mode == 0) + * - check that the given client host has export rights and return + * those rights via. exflagsp and credanonp + */ +static int +reiserfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) +{ + int error; + struct rfid *rfhp; + struct vnode *nvp; + struct cpu_key key; + struct reiserfs_node *ip; + struct reiserfs_sb_info *sbi; + struct thread *td = curthread; + + rfhp = (struct rfid *)fhp; + sbi = VFSTOREISERFS(mp)->rm_reiserfs; + + /* Check that the key is valid */ + if (rfhp->rfid_dirid < REISERFS_ROOT_PARENT_OBJECTID && + rfhp->rfid_objectid < REISERFS_ROOT_OBJECTID) + return (ESTALE); + + reiserfs_log(LOG_DEBUG, + "file handle key is (dirid=%d, objectid=%d)\n", + rfhp->rfid_dirid, rfhp->rfid_objectid); + key.on_disk_key.k_dir_id = rfhp->rfid_dirid; + key.on_disk_key.k_objectid = rfhp->rfid_objectid; + + reiserfs_log(LOG_DEBUG, "read this inode\n"); + error = reiserfs_iget(mp, &key, &nvp, td); + if (error) { + *vpp = NULLVP; + return (error); + } + + reiserfs_log(LOG_DEBUG, "check validity\n"); + ip = VTOI(nvp); + if (ip->i_mode == 0 || ip->i_generation != rfhp->rfid_gen) { + vput(nvp); + *vpp = NULLVP; + return (ESTALE); + } + + reiserfs_log(LOG_DEBUG, "return it\n"); + *vpp = nvp; + return (0); +} + +/* + * Vnode pointer to File handle + */ +static int +reiserfs_vptofh(struct vnode *vp, struct fid *fhp) +{ + struct rfid *rfhp; + struct reiserfs_node *ip; + + ip = VTOI(vp); + reiserfs_log(LOG_DEBUG, + "fill *fhp with inode (dirid=%d, objectid=%d)\n", + ip->i_ino, ip->i_number); + + rfhp = (struct rfid *)fhp; + rfhp->rfid_len = sizeof(struct rfid); + rfhp->rfid_dirid = ip->i_ino; + rfhp->rfid_objectid = ip->i_number; + rfhp->rfid_gen = ip->i_generation; + + reiserfs_log(LOG_DEBUG, "return it\n"); + return (0); +} + +/* ------------------------------------------------------------------- + * Functions for the journal + * -------------------------------------------------------------------*/ + +int +is_reiserfs_3_5(struct reiserfs_super_block *rs) +{ + + return (!strncmp(rs->s_v1.s_magic, reiserfs_3_5_magic_string, + strlen(reiserfs_3_5_magic_string))); +} + +int +is_reiserfs_3_6(struct reiserfs_super_block *rs) +{ + + return (!strncmp(rs->s_v1.s_magic, reiserfs_3_6_magic_string, + strlen(reiserfs_3_6_magic_string))); +} + +int +is_reiserfs_jr(struct reiserfs_super_block *rs) +{ + + return (!strncmp(rs->s_v1.s_magic, reiserfs_jr_magic_string, + strlen(reiserfs_jr_magic_string))); +} + +static int +is_any_reiserfs_magic_string(struct reiserfs_super_block *rs) +{ + + return ((is_reiserfs_3_5(rs) || is_reiserfs_3_6(rs) || + is_reiserfs_jr(rs))); +} + +/* ------------------------------------------------------------------- + * Internal functions + * -------------------------------------------------------------------*/ + +/* + * Common code for mount and mountroot + */ +static int +reiserfs_mountfs(struct vnode *devvp, struct mount *mp, struct thread *td) +{ + int error, old_format = 0; + struct reiserfs_mount *rmp; + struct reiserfs_sb_info *sbi; + struct reiserfs_super_block *rs; + struct cdev *dev = devvp->v_rdev; + +#if (__FreeBSD_version >= 600000) + struct g_consumer *cp; + struct bufobj *bo; +#endif + + //ronly = (mp->mnt_flag & MNT_RDONLY) != 0; + +#if (__FreeBSD_version < 600000) + /* + * Disallow multiple mounts of the same device. + * Disallow mounting of a device that is currently in use + * (except for root, which might share swap device for miniroot). + * Flush out any old buffers remaining from a previous use. + */ + if ((error = vfs_mountedon(devvp)) != 0) + return (error); + if (vcount(devvp) > 1) + return (EBUSY); + + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); + error = vinvalbuf(devvp, V_SAVE, td->td_ucred, td, 0, 0); + if (error) { + VOP_UNLOCK(devvp, 0, td); + return (error); + } + + /* + * Open the device in read-only, 'cause we don't support write + * for now + */ + error = VOP_OPEN(devvp, FREAD, FSCRED, td, -1); + VOP_UNLOCK(devvp, 0, td); + if (error) + return (error); +#else + DROP_GIANT(); + g_topology_lock(); + error = g_vfs_open(devvp, &cp, "reiserfs", /* read-only */ 0); + g_topology_unlock(); + PICKUP_GIANT(); + VOP_UNLOCK(devvp, 0, td); + if (error) + return (error); + + bo = &devvp->v_bufobj; + bo->bo_private = cp; + bo->bo_ops = g_vfs_bufops; +#endif + + if (devvp->v_rdev->si_iosize_max != 0) + mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; + if (mp->mnt_iosize_max > MAXPHYS) + mp->mnt_iosize_max = MAXPHYS; + + rmp = NULL; + sbi = NULL; + + /* rmp contains any information about this specific mount */ + rmp = malloc(sizeof *rmp, M_REISERFSMNT, M_WAITOK | M_ZERO); + if (!rmp) { + error = (ENOMEM); + goto out; + } + sbi = malloc(sizeof *sbi, M_REISERFSMNT, M_WAITOK | M_ZERO); + if (!sbi) { + error = (ENOMEM); + goto out; + } + rmp->rm_reiserfs = sbi; + rmp->rm_mountp = mp; + rmp->rm_devvp = devvp; + rmp->rm_dev = dev; +#if (__FreeBSD_version >= 600000) + rmp->rm_bo = &devvp->v_bufobj; + rmp->rm_cp = cp; +#endif + + /* Set default values for options: non-aggressive tails */ + REISERFS_SB(sbi)->s_mount_opt = (1 << REISERFS_SMALLTAIL); + REISERFS_SB(sbi)->s_rd_only = 1; + REISERFS_SB(sbi)->s_devvp = devvp; + + /* Read the super block */ + if ((error = read_super_block(rmp, REISERFS_OLD_DISK_OFFSET)) == 0) { + /* The read process succeeded, it's an old format */ + old_format = 1; + } else if ((error = read_super_block(rmp, REISERFS_DISK_OFFSET)) != 0) { + reiserfs_log(LOG_ERR, "can not find a ReiserFS filesystem\n"); + goto out; + } + + rs = SB_DISK_SUPER_BLOCK(sbi); + + /* + * Let's do basic sanity check to verify that underlying device is + * not smaller than the filesystem. If the check fails then abort and + * scream, because bad stuff will happen otherwise. + */ +#if 0 + if (s->s_bdev && s->s_bdev->bd_inode && + i_size_read(s->s_bdev->bd_inode) < + sb_block_count(rs) * sb_blocksize(rs)) { + reiserfs_log(LOG_ERR, + "reiserfs: filesystem cannot be mounted because it is " + "bigger than the device.\n"); + reiserfs_log(LOG_ERR, "reiserfs: you may need to run fsck " + "rr may be you forgot to reboot after fdisk when it " + "told you to.\n"); + goto out; + } +#endif + + /* + * XXX This is from the original Linux code, but why affecting 2 values + * to the same variable? + */ + sbi->s_mount_state = SB_REISERFS_STATE(sbi); + sbi->s_mount_state = REISERFS_VALID_FS; + + if ((error = (old_format ? + read_old_bitmaps(rmp) : read_bitmaps(rmp)))) { + reiserfs_log(LOG_ERR, "unable to read bitmap\n"); + goto out; + } + + /* Make data=ordered the default */ + if (!reiserfs_data_log(sbi) && !reiserfs_data_ordered(sbi) && + !reiserfs_data_writeback(sbi)) { + REISERFS_SB(sbi)->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); + } + + if (reiserfs_data_log(sbi)) { + reiserfs_log(LOG_INFO, "using journaled data mode\n"); + } else if (reiserfs_data_ordered(sbi)) { + reiserfs_log(LOG_INFO, "using ordered data mode\n"); + } else { + reiserfs_log(LOG_INFO, "using writeback data mode\n"); + } + + /* TODO Not yet supported */ +#if 0 + if(journal_init(sbi, jdev_name, old_format, commit_max_age)) { + reiserfs_log(LOG_ERR, "unable to initialize journal space\n"); + goto out; + } else { + jinit_done = 1 ; /* once this is set, journal_release must + be called if we error out of the mount */ + } + + if (reread_meta_blocks(sbi)) { + reiserfs_log(LOG_ERR, + "unable to reread meta blocks after journal init\n"); + goto out; + } +#endif + + /* Define and initialize hash function */ + sbi->s_hash_function = hash_function(rmp); + + if (sbi->s_hash_function == NULL) { + reiserfs_log(LOG_ERR, "couldn't determined hash function\n"); + error = (EINVAL); + goto out; + } + + if (is_reiserfs_3_5(rs) || + (is_reiserfs_jr(rs) && SB_VERSION(sbi) == REISERFS_VERSION_1)) + bit_set(&(sbi->s_properties), REISERFS_3_5); + else + bit_set(&(sbi->s_properties), REISERFS_3_6); + + mp->mnt_data = (qaddr_t)rmp; + mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); + mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; + mp->mnt_flag |= MNT_LOCAL; +#if defined(si_mountpoint) + devvp->v_rdev->si_mountpoint = mp; +#endif + + return (0); + +out: + reiserfs_log(LOG_INFO, "*** error during mount ***\n"); + if (sbi) { + if (SB_AP_BITMAP(sbi)) { + int i; + for (i = 0; i < SB_BMAP_NR(sbi); i++) { + if (!SB_AP_BITMAP(sbi)[i].bp_data) + break; + free(SB_AP_BITMAP(sbi)[i].bp_data, M_REISERFSMNT); + } + free(SB_AP_BITMAP(sbi), M_REISERFSMNT); + } + + if (sbi->s_rs) { + free(sbi->s_rs, M_REISERFSMNT); + sbi->s_rs = NULL; + } + } + +#if (__FreeBSD_version < 600000) + (void)VOP_CLOSE(devvp, FREAD, NOCRED, td); +#else + if (cp != NULL) { + DROP_GIANT(); + g_topology_lock(); + g_wither_geom_close(cp->geom, ENXIO); + g_topology_unlock(); + PICKUP_GIANT(); + } +#endif + + if (sbi) + free(sbi, M_REISERFSMNT); + if (rmp) + free(rmp, M_REISERFSMNT); + return (error); +} + +/* + * Read the super block + */ +static int +read_super_block(struct reiserfs_mount *rmp, int offset) +{ + struct buf *bp; + int error, bits; + struct reiserfs_super_block *rs; + struct reiserfs_sb_info *sbi; + uint16_t fs_blocksize; + + if (offset == REISERFS_OLD_DISK_OFFSET) { + reiserfs_log(LOG_DEBUG, + "reiserfs/super: read old format super block\n"); + } else { + reiserfs_log(LOG_DEBUG, + "reiserfs/super: read new format super block\n"); + } + + /* Read the super block */ + if ((error = bread(rmp->rm_devvp, offset * btodb(REISERFS_BSIZE), + REISERFS_BSIZE, NOCRED, &bp)) != 0) { + reiserfs_log(LOG_ERR, "can't read device\n"); + return (error); + } + + /* Get it from the buffer data */ + rs = (struct reiserfs_super_block *)bp->b_data; + if (!is_any_reiserfs_magic_string(rs)) { + brelse(bp); + return (EINVAL); + } + + fs_blocksize = sb_blocksize(rs); + brelse(bp); + bp = NULL; + + if (fs_blocksize <= 0) { + reiserfs_log(LOG_ERR, "unexpected null block size"); + return (EINVAL); + } + + /* Read the super block (for double check) + * We can't read the same blkno with a different size: it causes + * panic() if INVARIANTS is set. So we keep REISERFS_BSIZE */ + if ((error = bread(rmp->rm_devvp, + offset * REISERFS_BSIZE / fs_blocksize * btodb(fs_blocksize), + REISERFS_BSIZE, NOCRED, &bp)) != 0) { + reiserfs_log(LOG_ERR, "can't reread the super block\n"); + return (error); + } + + rs = (struct reiserfs_super_block *)bp->b_data; + if (sb_blocksize(rs) != fs_blocksize) { + reiserfs_log(LOG_ERR, "unexpected block size " + "(found=%u, expected=%u)\n", + sb_blocksize(rs), fs_blocksize); + brelse(bp); + return (EINVAL); + } + + reiserfs_log(LOG_DEBUG, "magic: `%s'\n", rs->s_v1.s_magic); + reiserfs_log(LOG_DEBUG, "label: `%s'\n", rs->s_label); + reiserfs_log(LOG_DEBUG, "block size: %6d\n", sb_blocksize(rs)); + reiserfs_log(LOG_DEBUG, "block count: %6u\n", + rs->s_v1.s_block_count); + reiserfs_log(LOG_DEBUG, "bitmaps number: %6u\n", + rs->s_v1.s_bmap_nr); + + if (rs->s_v1.s_root_block == -1) { + log(LOG_ERR, + "reiserfs: Unfinished reiserfsck --rebuild-tree run " + "detected. Please\n" + "run reiserfsck --rebuild-tree and wait for a " + "completion. If that\n" + "fails, get newer reiserfsprogs package"); + brelse(bp); + return (EINVAL); + } + + sbi = rmp->rm_reiserfs; + sbi->s_blocksize = fs_blocksize; + + for (bits = 9, fs_blocksize >>= 9; fs_blocksize >>= 1; bits++) + ; + sbi->s_blocksize_bits = bits; + + /* Copy the buffer and release it */ + sbi->s_rs = malloc(sizeof *rs, M_REISERFSMNT, M_WAITOK | M_ZERO); + if (!sbi->s_rs) { + reiserfs_log(LOG_ERR, "can not read the super block\n"); + brelse(bp); + return (ENOMEM); + } + bcopy(rs, sbi->s_rs, sizeof(struct reiserfs_super_block)); + brelse(bp); + + if (is_reiserfs_jr(rs)) { + if (sb_version(rs) == REISERFS_VERSION_2) + reiserfs_log(LOG_INFO, "found reiserfs format \"3.6\"" + " with non-standard journal"); + else if (sb_version(rs) == REISERFS_VERSION_1) + reiserfs_log(LOG_INFO, "found reiserfs format \"3.5\"" + " with non-standard journal"); + else { + reiserfs_log(LOG_ERR, "found unknown " + "format \"%u\" of reiserfs with non-standard magic", + sb_version(rs)); + return (EINVAL); + } + } else { + /* + * s_version of standard format may contain incorrect + * information, so we just look at the magic string + */ + reiserfs_log(LOG_INFO, + "found reiserfs format \"%s\" with standard journal\n", + is_reiserfs_3_5(rs) ? "3.5" : "3.6"); + } + + return (0); +} + +/* + * load_bitmap_info_data - Sets up the reiserfs_bitmap_info structure + * from disk. + * @sbi - superblock info for this filesystem + * @bi - the bitmap info to be loaded. Requires that bi->bp is valid. + * + * This routine counts how many free bits there are, finding the first + * zero as a side effect. Could also be implemented as a loop of + * test_bit() calls, or a loop of find_first_zero_bit() calls. This + * implementation is similar to find_first_zero_bit(), but doesn't + * return after it finds the first bit. Should only be called on fs + * mount, but should be fairly efficient anyways. + * + * bi->first_zero_hint is considered unset if it == 0, since the bitmap + * itself will invariably occupt block 0 represented in the bitmap. The + * only exception to this is when free_count also == 0, since there will + * be no free blocks at all. + */ +static void +load_bitmap_info_data(struct reiserfs_sb_info *sbi, + struct reiserfs_bitmap_info *bi) +{ + unsigned long *cur; + + cur = (unsigned long *)bi->bp_data; + while ((char *)cur < (bi->bp_data + sbi->s_blocksize)) { + /* + * No need to scan if all 0's or all 1's. + * Since we're only counting 0's, we can simply ignore + * all 1's + */ + if (*cur == 0) { + if (bi->first_zero_hint == 0) { + bi->first_zero_hint = + ((char *)cur - bi->bp_data) << 3; + } + bi->free_count += sizeof(unsigned long) * 8; + } else if (*cur != ~0L) { + int b; + + for (b = 0; b < sizeof(unsigned long) * 8; b++) { + if (!reiserfs_test_le_bit(b, cur)) { + bi->free_count++; + if (bi->first_zero_hint == 0) + bi->first_zero_hint = + (((char *)cur - + bi->bp_data) << 3) + b; + } + } + } + cur++; + } +} + +/* + * Read the bitmaps + */ +static int +read_bitmaps(struct reiserfs_mount *rmp) +{ + int i, bmap_nr; + struct buf *bp = NULL; + struct reiserfs_sb_info *sbi = rmp->rm_reiserfs; + + /* Allocate memory for the table of bitmaps */ + SB_AP_BITMAP(sbi) = + malloc(sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(sbi), + M_REISERFSMNT, M_WAITOK | M_ZERO); + if (!SB_AP_BITMAP(sbi)) + return (ENOMEM); + + /* Read all the bitmaps */ + for (i = 0, + bmap_nr = (REISERFS_DISK_OFFSET_IN_BYTES / sbi->s_blocksize + 1) * + btodb(sbi->s_blocksize); + i < SB_BMAP_NR(sbi); i++, bmap_nr = sbi->s_blocksize * 8 * i) { + SB_AP_BITMAP(sbi)[i].bp_data = malloc(sbi->s_blocksize, + M_REISERFSMNT, M_WAITOK | M_ZERO); + if (!SB_AP_BITMAP(sbi)[i].bp_data) + return (ENOMEM); + bread(rmp->rm_devvp, bmap_nr, sbi->s_blocksize, NOCRED, &bp); + bcopy(bp->b_data, SB_AP_BITMAP(sbi)[i].bp_data, + sbi->s_blocksize); + brelse(bp); + bp = NULL; + + /*if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) + ll_rw_block(READ, 1, &SB_AP_BITMAP(s)[i].bh);*/ + } + + for (i = 0; i < SB_BMAP_NR(sbi); i++) { + /*if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) { + reiserfs_warning(s,"sh-2029: reiserfs read_bitmaps: " + "bitmap block (#%lu) reading failed", + SB_AP_BITMAP(s)[i].bh->b_blocknr); + for (i = 0; i < SB_BMAP_NR(s); i++) + brelse(SB_AP_BITMAP(s)[i].bh); + vfree(SB_AP_BITMAP(s)); + SB_AP_BITMAP(s) = NULL; + return 1; + }*/ + load_bitmap_info_data(sbi, SB_AP_BITMAP(sbi) + i); + reiserfs_log(LOG_DEBUG, + "%d free blocks (starting at block %ld)\n", + SB_AP_BITMAP(sbi)[i].free_count, + (long)SB_AP_BITMAP(sbi)[i].first_zero_hint); + } + + return (0); +} + +// TODO Not supported +static int +read_old_bitmaps(struct reiserfs_mount *rmp) +{ + + return (EOPNOTSUPP); +#if 0 + int i; + struct reiserfs_sb_info *sbi = rmp->rm_reiserfs; + struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(sbi); + + /* First of bitmap blocks */ + int bmp1 = (REISERFS_OLD_DISK_OFFSET / sbi->s_blocksize) * + btodb(sbi->s_blocksize); + + /* Read true bitmap */ + SB_AP_BITMAP(sbi) = + malloc(sizeof (struct reiserfs_buffer_info *) * sb_bmap_nr(rs), + M_REISERFSMNT, M_WAITOK | M_ZERO); + if (!SB_AP_BITMAP(sbi)) + return 1; + + for (i = 0; i < sb_bmap_nr(rs); i ++) { + SB_AP_BITMAP(sbi)[i].bp = getblk(rmp->rm_devvp, + (bmp1 + i) * btodb(sbi->s_blocksize), sbi->s_blocksize, 0, 0, 0); + if (!SB_AP_BITMAP(sbi)[i].bp) + return 1; + load_bitmap_info_data(sbi, SB_AP_BITMAP(sbi) + i); + } + + return 0; +#endif +} + +/* ------------------------------------------------------------------- + * Hash detection stuff + * -------------------------------------------------------------------*/ + +static int +get_root_node(struct reiserfs_mount *rmp, struct reiserfs_node **root) +{ + struct reiserfs_node *ip; + struct reiserfs_iget_args args; + + /* Allocate the node structure */ + reiserfs_log(LOG_DEBUG, "malloc(struct reiserfs_node)\n"); + MALLOC(ip, struct reiserfs_node *, sizeof(struct reiserfs_node), + M_REISERFSNODE, M_WAITOK | M_ZERO); + + /* Fill the structure */ + reiserfs_log(LOG_DEBUG, "filling *ip\n"); + ip->i_dev = rmp->rm_dev; + ip->i_number = REISERFS_ROOT_OBJECTID; + ip->i_ino = REISERFS_ROOT_PARENT_OBJECTID; + ip->i_reiserfs = rmp->rm_reiserfs; + + /* Read the inode */ + args.objectid = ip->i_number; + args.dirid = ip->i_ino; + reiserfs_log(LOG_DEBUG, "call reiserfs_read_locked_inode(" + "objectid=%d,dirid=%d)\n", args.objectid, args.dirid); + reiserfs_read_locked_inode(ip, &args); + + ip->i_devvp = rmp->rm_devvp; + //XXX VREF(ip->i_devvp); Is it necessary ? + + *root = ip; + return (0); +} + +/* + * If root directory is empty - we set default - Yura's - hash and warn + * about it. + * FIXME: we look for only one name in a directory. If tea and yura both + * have the same value - we ask user to send report to the mailing list + */ +uint32_t find_hash_out(struct reiserfs_mount *rmp) +{ + int retval; + struct cpu_key key; + INITIALIZE_PATH(path); + struct reiserfs_node *ip; + struct reiserfs_sb_info *sbi; + struct reiserfs_dir_entry de; + uint32_t hash = DEFAULT_HASH; + + get_root_node(rmp, &ip); + if (!ip) + return (UNSET_HASH); + + sbi = rmp->rm_reiserfs; + + do { + uint32_t teahash, r5hash, yurahash; + + reiserfs_log(LOG_DEBUG, "make_cpu_key\n"); + make_cpu_key(&key, ip, ~0, TYPE_DIRENTRY, 3); + reiserfs_log(LOG_DEBUG, "search_by_entry_key for " + "key(objectid=%d,dirid=%d)\n", + key.on_disk_key.k_objectid, key.on_disk_key.k_dir_id); + retval = search_by_entry_key(sbi, &key, &path, &de); + if (retval == IO_ERROR) { + pathrelse(&path); + return (UNSET_HASH); + } + if (retval == NAME_NOT_FOUND) + de.de_entry_num--; + + reiserfs_log(LOG_DEBUG, "name found\n"); + + set_de_name_and_namelen(&de); + + if (deh_offset(&(de.de_deh[de.de_entry_num])) == DOT_DOT_OFFSET) { + /* Allow override in this case */ + if (reiserfs_rupasov_hash(sbi)) { + hash = YURA_HASH; + } + reiserfs_log(LOG_DEBUG, + "FS seems to be empty, autodetect " + "is using the default hash"); + break; + } + + r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen)); + teahash = GET_HASH_VALUE(keyed_hash(de.de_name, + de.de_namelen)); + yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen)); + if (((teahash == r5hash) && + (GET_HASH_VALUE( + deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash)) || + ((teahash == yurahash) && + (yurahash == + GET_HASH_VALUE( + deh_offset(&(de.de_deh[de.de_entry_num]))))) || + ((r5hash == yurahash) && + (yurahash == + GET_HASH_VALUE( + deh_offset(&(de.de_deh[de.de_entry_num])))))) { + reiserfs_log(LOG_ERR, + "unable to automatically detect hash " + "function. Please mount with -o " + "hash={tea,rupasov,r5}"); + hash = UNSET_HASH; + break; + } + + if (GET_HASH_VALUE( + deh_offset(&(de.de_deh[de.de_entry_num]))) == yurahash) { + reiserfs_log(LOG_DEBUG, "detected YURA hash\n"); + hash = YURA_HASH; + } else if (GET_HASH_VALUE( + deh_offset(&(de.de_deh[de.de_entry_num]))) == teahash) { + reiserfs_log(LOG_DEBUG, "detected TEA hash\n"); + hash = TEA_HASH; + } else if (GET_HASH_VALUE( + deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash) { + reiserfs_log(LOG_DEBUG, "detected R5 hash\n"); + hash = R5_HASH; + } else { + reiserfs_log(LOG_WARNING, "unrecognised hash function"); + hash = UNSET_HASH; + } + } while (0); + + pathrelse(&path); + return (hash); +} + +/* Finds out which hash names are sorted with */ +static int +what_hash(struct reiserfs_mount *rmp) +{ + uint32_t code; + struct reiserfs_sb_info *sbi = rmp->rm_reiserfs; + + find_hash_out(rmp); + code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(sbi)); + + /* + * reiserfs_hash_detect() == true if any of the hash mount options + * were used. We must check them to make sure the user isn't using a + * bad hash value + */ + if (code == UNSET_HASH || reiserfs_hash_detect(sbi)) + code = find_hash_out(rmp); + + if (code != UNSET_HASH && reiserfs_hash_detect(sbi)) { + /* + * Detection has found the hash, and we must check against + * the mount options + */ + if (reiserfs_rupasov_hash(sbi) && code != YURA_HASH) { + reiserfs_log(LOG_ERR, "error, %s hash detected, " + "unable to force rupasov hash", + reiserfs_hashname(code)); + code = UNSET_HASH; + } else if (reiserfs_tea_hash(sbi) && code != TEA_HASH) { + reiserfs_log(LOG_ERR, "error, %s hash detected, " + "unable to force tea hash", + reiserfs_hashname(code)); + code = UNSET_HASH; + } else if (reiserfs_r5_hash(sbi) && code != R5_HASH) { + reiserfs_log(LOG_ERR, "error, %s hash detected, " + "unable to force r5 hash", + reiserfs_hashname(code)); + code = UNSET_HASH; + } + } else { + /* + * Find_hash_out was not called or could not determine + * the hash + */ + if (reiserfs_rupasov_hash(sbi)) { + code = YURA_HASH; + } else if (reiserfs_tea_hash(sbi)) { + code = TEA_HASH; + } else if (reiserfs_r5_hash(sbi)) { + code = R5_HASH; + } + } + + /* TODO Not supported yet */ +#if 0 + /* If we are mounted RW, and we have a new valid hash code, update + * the super */ + if (code != UNSET_HASH && + !(s->s_flags & MS_RDONLY) && + code != sb_hash_function_code(SB_DISK_SUPER_BLOCK(s))) { + set_sb_hash_function_code(SB_DISK_SUPER_BLOCK(s), code); + } +#endif + + return (code); +} + +/* Return pointer to appropriate function */ +static hashf_t +hash_function(struct reiserfs_mount *rmp) +{ + + switch (what_hash(rmp)) { + case TEA_HASH: + reiserfs_log(LOG_INFO, "using tea hash to sort names\n"); + return (keyed_hash); + case YURA_HASH: + reiserfs_log(LOG_INFO, "using rupasov hash to sort names\n"); + return (yura_hash); + case R5_HASH: + reiserfs_log(LOG_INFO, "using r5 hash to sort names\n"); + return (r5_hash); + } + + return (NULL); +} + +/* ------------------------------------------------------------------- + * VFS registration + * -------------------------------------------------------------------*/ + +static struct vfsops reiser_vfsops = { + .vfs_cmount = reiserfs_cmount, + .vfs_mount = reiserfs_mount, + .vfs_unmount = reiserfs_unmount, + //.vfs_checkexp = reiserfs_checkexp, + //.vfs_extattrctl = reiserfs_extattrctl, + .vfs_fhtovp = reiserfs_fhtovp, + //.vfs_quotactl = reiserfs_quotactl, + .vfs_root = reiserfs_root, + //.vfs_start = reiserfs_start, + .vfs_statfs = reiserfs_statfs, + //.vfs_sync = reiserfs_sync, + //.vfs_vget = reiserfs_vget, + .vfs_vptofh = reiserfs_vptofh, +}; + +VFS_SET(reiser_vfsops, reiserfs, VFCF_READONLY); diff --git a/sys/gnu/reiserfs/reiserfs_vnops.c b/sys/gnu/reiserfs/reiserfs_vnops.c new file mode 100644 index 000000000000..6e771d49b668 --- /dev/null +++ b/sys/gnu/reiserfs/reiserfs_vnops.c @@ -0,0 +1,353 @@ +/*- + * Copyright 2000 Hans Reiser + * See README for licensing and copyright details + * + * Ported to FreeBSD by Jean-Sébastien Pédron + * + * $FreeBSD$ + */ + +#include + +static vop_access_t reiserfs_access; +static vop_bmap_t reiserfs_bmap; +static vop_getattr_t reiserfs_getattr; +static vop_open_t reiserfs_open; +static vop_pathconf_t reiserfs_pathconf; +static vop_readlink_t reiserfs_readlink; +static vop_strategy_t reiserfs_strategy; + +/* Global vfs data structures for ReiserFS */ +struct vop_vector reiserfs_vnodeops = { + .vop_default = &default_vnodeops, + + .vop_access = reiserfs_access, + .vop_bmap = reiserfs_bmap, + .vop_cachedlookup = reiserfs_lookup, + .vop_getattr = reiserfs_getattr, + .vop_inactive = reiserfs_inactive, + .vop_lookup = vfs_cache_lookup, + .vop_open = reiserfs_open, + .vop_reclaim = reiserfs_reclaim, + .vop_read = reiserfs_read, + .vop_readdir = reiserfs_readdir, + .vop_readlink = reiserfs_readlink, + .vop_pathconf = reiserfs_pathconf, + .vop_strategy = reiserfs_strategy, +}; + +struct vop_vector reiserfs_specops = { + .vop_default = &default_vnodeops, + + .vop_access = reiserfs_access, + .vop_getattr = reiserfs_getattr, + .vop_inactive = reiserfs_inactive, + .vop_reclaim = reiserfs_reclaim, +}; + +/* ------------------------------------------------------------------- + * vnode operations + * -------------------------------------------------------------------*/ + +static int +reiserfs_access(struct vop_access_args *ap) +{ + int error; + struct vnode *vp = ap->a_vp; + struct reiserfs_node *ip = VTOI(vp); + mode_t mode = ap->a_mode; + + /* + * Disallow write attempts on read-only file systems; unless the file + * is a socket, fifo, or a block or character device resident on the + * file system. + */ + if (mode & VWRITE) { + switch (vp->v_type) { + case VDIR: + case VLNK: + case VREG: + if (vp->v_mount->mnt_flag & MNT_RDONLY) { + reiserfs_log(LOG_DEBUG, + "no write access (read-only fs)\n"); + return (EROFS); + } + break; + default: + break; + } + } + + /* If immutable bit set, nobody gets to write it. */ + if ((mode & VWRITE) && (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT))) { + reiserfs_log(LOG_DEBUG, "no write access (immutable)\n"); + return (EPERM); + } + + error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, + ap->a_mode, ap->a_cred, NULL); + return (error); +} + +static int +reiserfs_getattr(struct vop_getattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + struct reiserfs_node *ip = VTOI(vp); + + vap->va_fsid = dev2udev(ip->i_dev); + vap->va_fileid = ip->i_number; + vap->va_mode = ip->i_mode & ~S_IFMT; + vap->va_nlink = ip->i_nlink; + vap->va_uid = ip->i_uid; + vap->va_gid = ip->i_gid; + //XXX vap->va_rdev = ip->i_rdev; + vap->va_size = ip->i_size; + vap->va_atime = ip->i_atime; + vap->va_mtime = ip->i_mtime; + vap->va_ctime = ip->i_ctime; + vap->va_flags = ip->i_flags; + vap->va_gen = ip->i_generation; + vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; + vap->va_bytes = dbtob((u_quad_t)ip->i_blocks); + vap->va_type = vp->v_type; + //XXX vap->va_filerev = ip->i_modrev; + + return (0); +} + +/* Return POSIX pathconf information applicable to ReiserFS filesystems */ +static int +reiserfs_pathconf(struct vop_pathconf_args *ap) +{ + switch (ap->a_name) { + case _PC_LINK_MAX: + *ap->a_retval = REISERFS_LINK_MAX; + return (0); + case _PC_NAME_MAX: + *ap->a_retval = + REISERFS_MAX_NAME(VTOI(ap->a_vp)->i_reiserfs->s_blocksize); + return (0); + case _PC_PATH_MAX: + *ap->a_retval = PATH_MAX; + return (0); + case _PC_PIPE_BUF: + *ap->a_retval = PIPE_BUF; + return (0); + case _PC_CHOWN_RESTRICTED: + *ap->a_retval = 1; + return (0); + case _PC_NO_TRUNC: + *ap->a_retval = 1; + return (0); + default: + return (EINVAL); + } +} + +static int +reiserfs_open(struct vop_open_args *ap) +{ + /* Files marked append-only must be opened for appending. */ + if ((VTOI(ap->a_vp)->i_flags & APPEND) && + (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) + return (EPERM); + + vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td); + + return (0); +} + +/* Return target name of a symbolic link */ +static int +reiserfs_readlink(struct vop_readlink_args *ap) +{ + struct vnode *vp = ap->a_vp; + + reiserfs_log(LOG_DEBUG, "redirect to VOP_READ()\n"); + return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); +} + +/* Bmap converts the logical block number of a file to its physical + * block number on the disk. */ +static int +reiserfs_bmap(ap) + struct vop_bmap_args /* { + struct vnode *a_vp; + daddr_t a_bn; + struct bufobj **a_bop; + daddr_t *a_bnp; + int *a_runp; + int *a_runb; + } */ *ap; +{ + daddr_t blkno; + struct buf *bp; + struct cpu_key key; + struct item_head *ih; + + struct vnode *vp = ap->a_vp; + struct reiserfs_node *ip = VTOI(vp); + struct reiserfs_sb_info *sbi = ip->i_reiserfs; + INITIALIZE_PATH(path); + + /* Prepare the key to look for the 'block'-th block of file + * (XXX we suppose that statfs.f_iosize == sbi->s_blocksize) */ + make_cpu_key(&key, ip, (off_t)ap->a_bn * sbi->s_blocksize + 1, + TYPE_ANY, 3); + + /* Search item */ + if (search_for_position_by_key(sbi, &key, &path) != POSITION_FOUND) { + reiserfs_log(LOG_DEBUG, "position not found\n"); + pathrelse(&path); + return (ENOENT); + } + + bp = get_last_bp(&path); + ih = get_ih(&path); + + if (is_indirect_le_ih(ih)) { + /* Indirect item can be read by the underlying layer, instead of + * VOP_STRATEGY. */ + int i; + uint32_t *ind_item = (uint32_t *)B_I_PITEM(bp, ih); + reiserfs_log(LOG_DEBUG, "found an INDIRECT item\n"); + blkno = get_block_num(ind_item, path.pos_in_item); + + /* Read-ahead */ + if (ap->a_runb) { + uint32_t count = 0; + for (i = path.pos_in_item - 1; i >= 0; --i) { + if ((blkno - get_block_num(ind_item, i)) != + count + 1) + break; + ++count; + } + + /* + * This count isn't expressed in DEV_BSIZE base but + * in fs' own block base + * (see sys/vm/vnode_pager.c:vnode_pager_addr()) + */ + *ap->a_runb = count; + reiserfs_log(LOG_DEBUG, + " read-ahead: %d blocks before\n", *ap->a_runb); + } + if (ap->a_runp) { + uint32_t count = 0; + /* + * ih is an uint32_t array, that's why we use + * its length (in bytes) divided by 4 to know + * the number of items + */ + for (i = path.pos_in_item + 1; + i < ih_item_len(ih) / 4; ++i) { + if ((get_block_num(ind_item, i) - blkno) != + count + 1) + break; + ++count; + } + + /* + * This count isn't expressed in DEV_BSIZE base but + * in fs' own block base + * (see sys/vm/vnode_pager.c:vnode_pager_addr()) */ + *ap->a_runp = count; + reiserfs_log(LOG_DEBUG, + " read-ahead: %d blocks after\n", *ap->a_runp); + } + + /* Indirect items can be read using the device VOP_STRATEGY */ + if (ap->a_bop) + *ap->a_bop = &VTOI(ap->a_vp)->i_devvp->v_bufobj; + + /* Convert the block number into DEV_BSIZE base */ + blkno *= btodb(sbi->s_blocksize); + } else { + /* + * Direct item are not DEV_BSIZE aligned, VOP_STRATEGY will + * have to handle this case specifically + */ + reiserfs_log(LOG_DEBUG, "found a DIRECT item\n"); + blkno = ap->a_bn; + + if (ap->a_runp) + *ap->a_runp = 0; + if (ap->a_runb) + *ap->a_runb = 0; + + /* Direct item must be read by reiserfs_strategy */ + if (ap->a_bop) + *ap->a_bop = &vp->v_bufobj; + } + + if (ap->a_bnp) + *ap->a_bnp = blkno; + + pathrelse(&path); + + if (ap->a_bnp) { + reiserfs_log(LOG_DEBUG, "logical block: %ju (%ju)," + " physical block: %ju (%ju)\n", + (intmax_t)ap->a_bn, + (intmax_t)(ap->a_bn / btodb(sbi->s_blocksize)), + (intmax_t)*ap->a_bnp, + (intmax_t)(*ap->a_bnp / btodb(sbi->s_blocksize))); + } + + return (0); +} + +/* Does simply the same as reiserfs_read. It's called when reiserfs_bmap find + * an direct item. */ +static int +reiserfs_strategy(struct vop_strategy_args /* { + struct vnode *a_vp; + struct buf *a_bp; + } */ *ap) +{ + int error; + struct uio auio; + struct iovec aiov; + struct reiserfs_node *ip; + struct buf *bp = ap->a_bp; + struct vnode *vp = ap->a_vp; + + reiserfs_log(LOG_DEBUG, "logical block: %ju," + " physical block: %ju\n", (intmax_t)bp->b_lblkno, + (intmax_t)bp->b_blkno); + + ip = VTOI(vp); + + if (bp->b_iocmd == BIO_READ) { + /* Prepare the uio structure */ + reiserfs_log(LOG_DEBUG, "prepare uio structure\n"); + aiov.iov_base = bp->b_data; + aiov.iov_len = MIN(bp->b_bcount, ip->i_size); + reiserfs_log(LOG_DEBUG, " vector length: %ju\n", + (intmax_t)aiov.iov_len); + + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = 0; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_td = curthread; + auio.uio_resid = bp->b_bcount; + reiserfs_log(LOG_DEBUG, " buffer length: %u\n", + auio.uio_resid); + + reiserfs_log(LOG_DEBUG, "reading block #%ju\n", + (intmax_t)bp->b_blkno); + error = reiserfs_get_block(ip, bp->b_blkno, 0, &auio); + } else { + /* No write support yet */ + error = (EOPNOTSUPP); + bp->b_error = error; + bp->b_ioflags |= BIO_ERROR; + } + + bufdone(bp); + return (error); +}