/* * btrfs_disk_key is in disk byte order. struct btrfs_key is always * in cpu native order. Otherwise they are identical and their sizes * should be the same (ie both packed) */ structbtrfs_disk_key { __le64 objectid; __u8 type; __le64 offset; } __attribute__ ((__packed__));
#ifdef __LITTLE_ENDIAN /* * Compare two keys, on little-endian the disk order is same as CPU order and * we can avoid the conversion. */ staticintcomp_keys(conststruct btrfs_disk_key *disk_key, conststruct btrfs_key *k2) { conststructbtrfs_key *k1 = (conststruct btrfs_key *)disk_key; return btrfs_comp_cpu_keys(k1, k2); } #endif
int __pure btrfs_comp_cpu_keys(conststruct btrfs_key *k1, conststruct btrfs_key *k2) { if (k1->objectid > k2->objectid) return1; if (k1->objectid < k2->objectid) return-1; if (k1->type > k2->type) return1; if (k1->type < k2->type) return-1; if (k1->offset > k2->offset) return1; if (k1->offset < k2->offset) return-1; return0; }
/* * Every tree block (leaf or node) starts with this header. */ structbtrfs_header { /* These first four must match the super block */ __u8 csum[BTRFS_CSUM_SIZE]; /* FS specific uuid */ __u8 fsid[BTRFS_FSID_SIZE]; /* Which block this node is supposed to live in */ __le64 bytenr; __le64 flags;
/* Allowed to be different from the super from here on down */ __u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; __le64 generation; __le64 owner; __le32 nritems; __u8 level; } __attribute__ ((__packed__));
/* * A leaf is full of items. offset and size tell us where to find the item in * the leaf (relative to the start of the data area) */ structbtrfs_item { structbtrfs_disk_keykey; __le32 offset; __le32 size; } __attribute__ ((__packed__));
/* * Leaves have an item area and a data area: * [item0, item1....itemN] [free space] [dataN...data1, data0] * * The data is separate from the items to get the keys closer together during * searches. */ structbtrfs_leaf { structbtrfs_headerheader; structbtrfs_itemitems[]; } __attribute__ ((__packed__));
/* * All non-leaf blocks are nodes, they hold only keys and pointers to other * blocks. */ structbtrfs_key_ptr { structbtrfs_disk_keykey; __le64 blockptr; __le64 generation; } __attribute__ ((__packed__));
/* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. */ structbtrfs_root { structrb_noderb_node;
/* * btrfs_paths remember the path taken from the root down to the leaf. * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point * to any other levels that are present. * * The slots array records the index of the item or block pointer * used while walking the tree. */ structbtrfs_path { structextent_buffer *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; u8 locks[BTRFS_MAX_LEVEL]; u8 reada; u8 lowest_level;
/* * btrfs_search_slot - look for a key in a tree and perform necessary * modifications to preserve tree invariants. * * @trans: Handle of transaction, used when modifying the tree * @p: Holds all btree nodes along the search path * @root: The root node of the tree * @key: The key we are looking for * @ins_len: Indicates purpose of search: * >0 for inserts it's size of item inserted * <0 for deletions * 0 for plain searches, not modifying the tree * @cow: boolean should CoW operations be performed. * * If @key is found, 0 is returned and you can find the item in the leaf level * of the path (level 0) * * If @key isn't found, 1 is returned and the leaf level of the path (level 0) * points to the slot where it should be inserted */ intbtrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, conststruct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow)
/* * node level balancing, used to make sure nodes are in proper order for * item deletion. We balance from the top down, so we have to make sure * that a deletion won't leave a node completely empty later on. */ static noinline intbalance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { /* ... 尝试从左/右兄弟节点移入 item,若节点为空则释放 ... */ if (btrfs_header_nritems(mid) == 1) { /* * we're not allowed to leave a node with one item in the * tree during a delete. A deletion from lower in the tree * could try to delete the only pointer in this node. * So, pull some keys from the left. */ wret = balance_node_right(trans, mid, left); } if (btrfs_header_nritems(mid) == 0) { btrfs_clear_buffer_dirty(trans, mid); btrfs_tree_unlock(mid); del_ptr(root, path, level + 1, pslot); root_sub_used(root, mid->len); btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); } /* ... */ }
/* Ensure we can see the FORCE_COW bit */ smp_mb__before_atomic();
/* * We do not need to cow a block if * 1) this block is not created or changed in this transaction; * 2) this block does not belong to TREE_RELOC tree; * 3) the root is not forced COW. * * What is forced COW: * when we create snapshot during committing the transaction, * after we've finished copying src root, we must COW the shared * block to ensure the metadata consistency. */ if (btrfs_header_generation(buf) == trans->transid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) && !test_bit(BTRFS_ROOT_FORCE_COW, &root->state)) return0; return1; }
/* * does the dirty work in cow of a single block. The parent block (if * supplied) is updated to point to the new cow copy. The new buffer is marked * dirty and returned locked. */ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret, u64 search_start, u64 empty_size, enum btrfs_lock_nesting nest) { /* ... */ level = btrfs_header_level(buf);
/* * check if the tree block can be shared by multiple trees */ intbtrfs_block_can_be_shared(struct btrfs_root *root, struct extent_buffer *buf) { /* * Tree blocks not in shareable trees and tree roots are never shared. * If a block was allocated after the last snapshot and the block was * not allocated by tree relocation, we know the block is not shared. */ if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && buf != root->node && buf != root->commit_root && (btrfs_header_generation(buf) <= btrfs_root_last_snapshot(&root->root_item) || btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) return1; return0; }
No running transaction | V Transaction N [TRANS_STATE_RUNNING] | 新事务,所有写操作都可以加入 V Transaction N [TRANS_STATE_COMMIT_START] | 某个调用者触发了 commit,等待所有写者退出 V Transaction N [TRANS_STATE_COMMIT_DOING] | 执行重量级工作:运行 delayed refs,创建 pending snapshots,更新 qgroups V Transaction N [TRANS_STATE_UNBLOCKED] | 所有树已修改完毕,开始将脏页写回磁盘 | 此时 Transaction N+1 可以开始运行 V Transaction N [TRANS_STATE_SUPER_COMMITTED] | 超级块已写入(原子操作,此后文件系统一致) V Transaction N [TRANS_STATE_COMPLETED] 所有工作完成,事务结构体被释放
spin_lock(&fs_info->trans_lock); loop: /* The file system has been taken offline. No new transactions. */ if (BTRFS_FS_ERROR(fs_info)) { spin_unlock(&fs_info->trans_lock); return -EROFS; }