Btrfs: Add a write ahead tree log to optimize synchronous operations

File syncs and directory syncs are optimized by copying their
items into a special (copy-on-write) log tree.  There is one log tree per
subvolume and the btrfs super block points to a tree of log tree roots.

After a crash, items are copied out of the log tree and back into the
subvolume.  See tree-log.c for all the details.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Chris Mason
2008-09-05 16:13:11 -04:00
parent a1b32a5932
commit e02119d5a7
17 changed files with 3408 additions and 205 deletions

View File

@@ -41,6 +41,7 @@
#include "async-thread.h"
#include "locking.h"
#include "ref-cache.h"
#include "tree-log.h"
#if 0
static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
@@ -694,6 +695,18 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
}
int btrfs_write_tree_block(struct extent_buffer *buf)
{
return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start,
buf->start + buf->len - 1, WB_SYNC_NONE);
}
int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
{
return btrfs_wait_on_page_writeback_range(buf->first_page->mapping,
buf->start, buf->start + buf->len -1);
}
struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
u32 blocksize, u64 parent_transid)
{
@@ -732,15 +745,6 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
return 0;
}
int wait_on_tree_block_writeback(struct btrfs_root *root,
struct extent_buffer *buf)
{
struct inode *btree_inode = root->fs_info->btree_inode;
wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree,
buf);
return 0;
}
static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
u32 stripesize, struct btrfs_root *root,
struct btrfs_fs_info *fs_info,
@@ -771,6 +775,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
spin_lock_init(&root->node_lock);
spin_lock_init(&root->list_lock);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
btrfs_leaf_ref_tree_init(&root->ref_tree_struct);
root->ref_tree = &root->ref_tree_struct;
@@ -809,11 +814,74 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
return 0;
}
struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info,
struct btrfs_key *location)
int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct extent_buffer *eb;
int ret;
if (!fs_info->log_root_tree)
return 0;
eb = fs_info->log_root_tree->node;
WARN_ON(btrfs_header_level(eb) != 0);
WARN_ON(btrfs_header_nritems(eb) != 0);
ret = btrfs_free_extent(trans, fs_info->tree_root,
eb->start, eb->len,
BTRFS_TREE_LOG_OBJECTID, 0, 0, 0, 1);
BUG_ON(ret);
free_extent_buffer(eb);
kfree(fs_info->log_root_tree);
fs_info->log_root_tree = NULL;
return 0;
}
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
struct btrfs_root *tree_root = fs_info->tree_root;
root = kzalloc(sizeof(*root), GFP_NOFS);
if (!root)
return -ENOMEM;
__setup_root(tree_root->nodesize, tree_root->leafsize,
tree_root->sectorsize, tree_root->stripesize,
root, fs_info, BTRFS_TREE_LOG_OBJECTID);
root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
root->ref_cows = 0;
root->node = btrfs_alloc_free_block(trans, root, root->leafsize,
BTRFS_TREE_LOG_OBJECTID,
0, 0, 0, 0, 0);
btrfs_set_header_nritems(root->node, 0);
btrfs_set_header_level(root->node, 0);
btrfs_set_header_bytenr(root->node, root->node->start);
btrfs_set_header_generation(root->node, trans->transid);
btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID);
write_extent_buffer(root->node, root->fs_info->fsid,
(unsigned long)btrfs_header_fsid(root->node),
BTRFS_FSID_SIZE);
btrfs_mark_buffer_dirty(root->node);
btrfs_tree_unlock(root->node);
fs_info->log_root_tree = root;
return 0;
}
struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
struct btrfs_key *location)
{
struct btrfs_root *root;
struct btrfs_fs_info *fs_info = tree_root->fs_info;
struct btrfs_path *path;
struct extent_buffer *l;
u64 highest_inode;
@@ -863,11 +931,13 @@ out:
blocksize, 0);
BUG_ON(!root->node);
insert:
root->ref_cows = 1;
ret = btrfs_find_highest_inode(root, &highest_inode);
if (ret == 0) {
root->highest_inode = highest_inode;
root->last_inode_alloc = highest_inode;
if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
root->ref_cows = 1;
ret = btrfs_find_highest_inode(root, &highest_inode);
if (ret == 0) {
root->highest_inode = highest_inode;
root->last_inode_alloc = highest_inode;
}
}
return root;
}
@@ -907,7 +977,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
if (root)
return root;
root = btrfs_read_fs_root_no_radix(fs_info, location);
root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
if (IS_ERR(root))
return root;
ret = radix_tree_insert(&fs_info->fs_roots_radix,
@@ -1250,16 +1320,18 @@ struct btrfs_root *open_ctree(struct super_block *sb,
u32 blocksize;
u32 stripesize;
struct buffer_head *bh;
struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root),
GFP_NOFS);
struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root),
GFP_NOFS);
struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
GFP_NOFS);
struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root),
struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
GFP_NOFS);
struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root),
struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
GFP_NOFS);
struct btrfs_root *log_tree_root;
int ret;
int err = -EINVAL;
@@ -1343,6 +1415,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
mutex_init(&fs_info->trans_mutex);
mutex_init(&fs_info->tree_log_mutex);
mutex_init(&fs_info->drop_mutex);
mutex_init(&fs_info->alloc_mutex);
mutex_init(&fs_info->chunk_mutex);
@@ -1352,6 +1425,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->transaction_throttle);
init_waitqueue_head(&fs_info->transaction_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
init_waitqueue_head(&fs_info->tree_log_wait);
atomic_set(&fs_info->tree_log_commit, 0);
atomic_set(&fs_info->tree_log_writers, 0);
fs_info->tree_log_transid = 0;
#if 0
ret = add_hasher(fs_info, "crc32c");
@@ -1532,7 +1609,26 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!fs_info->transaction_kthread)
goto fail_cleaner;
if (btrfs_super_log_root(disk_super) != 0) {
u32 blocksize;
u64 bytenr = btrfs_super_log_root(disk_super);
blocksize =
btrfs_level_size(tree_root,
btrfs_super_log_root_level(disk_super));
log_tree_root = kzalloc(sizeof(struct btrfs_root),
GFP_NOFS);
__setup_root(nodesize, leafsize, sectorsize, stripesize,
log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
log_tree_root->node = read_tree_block(tree_root, bytenr,
blocksize, 0);
ret = btrfs_recover_log_trees(log_tree_root);
BUG_ON(ret);
}
fs_info->last_trans_committed = btrfs_super_generation(disk_super);
return tree_root;
fail_cleaner: