From 65b4643d3babeb203fa9571d12fd5e1bf74d27b0 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:23 -0700 Subject: nilfs2: add inode and other major structures This adds the following common structures of the NILFS2 file system. * nilfs_inode_info structure: gives on-memory inode. * nilfs_sb_info structure: keeps per-mount state and a special inode for the ifile. This structure is attached to the super_block structure. * the_nilfs structure: keeps shared state and locks among a read/write mount and snapshot mounts. This keeps special inodes for the sufile, cpfile, dat, and another dat inode used during GC (gcdat). This also has a hash table of dummy inodes to cache disk blocks during GC (gcinodes). * nilfs_transaction_info structure: keeps per task state while nilfs is writing logs or doing indivisible inode or namespace operations. This structure is used to identify context during log making and store nest level of the lock which ensures atomicity of file system operations. Signed-off-by: Koji Sato Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/nilfs.h | 323 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/sb.h | 102 ++++++++++++++++ fs/nilfs2/the_nilfs.h | 290 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 715 insertions(+) create mode 100644 fs/nilfs2/nilfs.h create mode 100644 fs/nilfs2/sb.h create mode 100644 fs/nilfs2/the_nilfs.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h new file mode 100644 index 00000000000..17458ad4a80 --- /dev/null +++ b/fs/nilfs2/nilfs.h @@ -0,0 +1,323 @@ +/* + * nilfs.h - NILFS local header file. + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato + * Ryusuke Konishi + */ + +#ifndef _NILFS_H +#define _NILFS_H + +#include +#include +#include +#include +#include +#include "the_nilfs.h" +#include "sb.h" +#include "bmap.h" +#include "bmap_union.h" + +/* + * NILFS filesystem version + */ +#define NILFS_VERSION "2.0.5" + +/* + * nilfs inode data in memory + */ +struct nilfs_inode_info { + __u32 i_flags; + unsigned long i_state; /* Dynamic state flags */ + struct nilfs_bmap *i_bmap; + union nilfs_bmap_union i_bmap_union; + __u64 i_xattr; /* sector_t ??? */ + __u32 i_dtime; + __u32 i_dir_start_lookup; + __u64 i_cno; /* check point number for GC inode */ + struct address_space i_btnode_cache; + struct list_head i_dirty; /* List for connecting dirty files */ + +#ifdef CONFIG_NILFS_XATTR + /* + * Extended attributes can be read independently of the main file + * data. Taking i_sem even when reading would cause contention + * between readers of EAs and writers of regular file data, so + * instead we synchronize on xattr_sem when reading or changing + * EAs. + */ + struct rw_semaphore xattr_sem; +#endif +#ifdef CONFIG_NILFS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; +#endif + struct buffer_head *i_bh; /* i_bh contains a new or dirty + disk inode */ + struct inode vfs_inode; +}; + +static inline struct nilfs_inode_info *NILFS_I(const struct inode *inode) +{ + return container_of(inode, struct nilfs_inode_info, vfs_inode); +} + +static inline struct nilfs_inode_info * +NILFS_BMAP_I(const struct nilfs_bmap *bmap) +{ + return container_of((union nilfs_bmap_union *)bmap, + struct nilfs_inode_info, + i_bmap_union); +} + +static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) +{ + struct nilfs_inode_info *ii = + container_of(btnc, struct nilfs_inode_info, i_btnode_cache); + return &ii->vfs_inode; +} + +static inline struct inode *NILFS_AS_I(struct address_space *mapping) +{ + return (mapping->host) ? : + container_of(mapping, struct inode, i_data); +} + +/* + * Dynamic state flags of NILFS on-memory inode (i_state) + */ +enum { + NILFS_I_NEW = 0, /* Inode is newly created */ + NILFS_I_DIRTY, /* The file is dirty */ + NILFS_I_QUEUED, /* inode is in dirty_files list */ + NILFS_I_BUSY, /* inode is grabbed by a segment + constructor */ + NILFS_I_COLLECTED, /* All dirty blocks are collected */ + NILFS_I_UPDATED, /* The file has been written back */ + NILFS_I_INODE_DIRTY, /* write_inode is requested */ + NILFS_I_BMAP, /* has bmap and btnode_cache */ + NILFS_I_GCINODE, /* inode for GC, on memory only */ + NILFS_I_GCDAT, /* shadow DAT, on memory only */ +}; + +/* + * Macros to check inode numbers + */ +#define NILFS_MDT_INO_BITS \ + ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \ + 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \ + 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO)) + +#define NILFS_SYS_INO_BITS \ + ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) + +#define NILFS_FIRST_INO(sb) (NILFS_SB(sb)->s_nilfs->ns_first_ino) + +#define NILFS_MDT_INODE(sb, ino) \ + ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) +#define NILFS_VALID_INODE(sb, ino) \ + ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino)))) + +/** + * struct nilfs_transaction_info: context information for synchronization + * @ti_magic: Magic number + * @ti_save: Backup of journal_info field of task_struct + * @ti_flags: Flags + * @ti_count: Nest level + * @ti_garbage: List of inode to be put when releasing semaphore + */ +struct nilfs_transaction_info { + u32 ti_magic; + void *ti_save; + /* This should never used. If this happens, + one of other filesystems has a bug. */ + unsigned short ti_flags; + unsigned short ti_count; + struct list_head ti_garbage; +}; + +/* ti_magic */ +#define NILFS_TI_MAGIC 0xd9e392fb + +/* ti_flags */ +#define NILFS_TI_DYNAMIC_ALLOC 0x0001 /* Allocated from slab */ +#define NILFS_TI_SYNC 0x0002 /* Force to construct segment at the + end of transaction. */ +#define NILFS_TI_GC 0x0004 /* GC context */ +#define NILFS_TI_COMMIT 0x0008 /* Change happened or not */ +#define NILFS_TI_WRITER 0x0010 /* Constructor context */ + + +int nilfs_transaction_begin(struct super_block *, + struct nilfs_transaction_info *, int); +int nilfs_transaction_end(struct super_block *, int); + +static inline void nilfs_set_transaction_flag(unsigned int flag) +{ + struct nilfs_transaction_info *ti = current->journal_info; + + BUG_ON(!ti); + ti->ti_flags |= flag; +} + +static inline int nilfs_test_transaction_flag(unsigned int flag) +{ + struct nilfs_transaction_info *ti = current->journal_info; + + if (ti == NULL || ti->ti_magic != NILFS_TI_MAGIC) + return 0; + return !!(ti->ti_flags & flag); +} + +static inline int nilfs_doing_gc(void) +{ + return nilfs_test_transaction_flag(NILFS_TI_GC); +} + +static inline int nilfs_doing_construction(void) +{ + return nilfs_test_transaction_flag(NILFS_TI_WRITER); +} + +static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs) +{ + return nilfs_doing_gc() ? nilfs->ns_gc_dat : nilfs->ns_dat; +} + +/* + * function prototype + */ +#ifdef CONFIG_NILFS_POSIX_ACL +#error "NILFS: not yet supported POSIX ACL" +extern int nilfs_permission(struct inode *, int, struct nameidata *); +extern int nilfs_acl_chmod(struct inode *); +extern int nilfs_init_acl(struct inode *, struct inode *); +#else +#define nilfs_permission NULL + +static inline int nilfs_acl_chmod(struct inode *inode) +{ + return 0; +} + +static inline int nilfs_init_acl(struct inode *inode, struct inode *dir) +{ + inode->i_mode &= ~current_umask(); + return 0; +} +#endif + +#define NILFS_ATIME_DISABLE + +/* dir.c */ +extern int nilfs_add_link(struct dentry *, struct inode *); +extern ino_t nilfs_inode_by_name(struct inode *, struct dentry *); +extern int nilfs_make_empty(struct inode *, struct inode *); +extern struct nilfs_dir_entry * +nilfs_find_entry(struct inode *, struct dentry *, struct page **); +extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *); +extern int nilfs_empty_dir(struct inode *); +extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **); +extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, + struct page *, struct inode *); + +/* file.c */ +extern int nilfs_sync_file(struct file *, struct dentry *, int); + +/* ioctl.c */ +int nilfs_ioctl(struct inode *, struct file *, unsigned int, unsigned long); +long nilfs_compat_ioctl(struct file *, unsigned int, unsigned long); +int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, void __user *); + +/* inode.c */ +extern struct inode *nilfs_new_inode(struct inode *, int); +extern void nilfs_free_inode(struct inode *); +extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); +extern void nilfs_set_inode_flags(struct inode *); +extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *); +extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int); +extern struct inode *nilfs_iget(struct super_block *, unsigned long); +extern void nilfs_update_inode(struct inode *, struct buffer_head *); +extern void nilfs_truncate(struct inode *); +extern void nilfs_delete_inode(struct inode *); +extern int nilfs_setattr(struct dentry *, struct iattr *); +extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, + struct buffer_head **); +extern int nilfs_inode_dirty(struct inode *); +extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *, + unsigned); +extern int nilfs_mark_inode_dirty(struct inode *); +extern void nilfs_dirty_inode(struct inode *); + +/* namei.c */ +extern struct dentry *nilfs_get_parent(struct dentry *); + +/* super.c */ +extern struct inode *nilfs_alloc_inode(struct super_block *); +extern void nilfs_destroy_inode(struct inode *); +extern void nilfs_error(struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void nilfs_warning(struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern struct nilfs_super_block * +nilfs_load_super_block(struct super_block *, struct buffer_head **); +extern struct nilfs_super_block * +nilfs_reload_super_block(struct super_block *, struct buffer_head **, int); +extern int nilfs_store_magic_and_option(struct super_block *, + struct nilfs_super_block *, char *); +extern void nilfs_update_last_segment(struct nilfs_sb_info *, int); +extern int nilfs_commit_super(struct nilfs_sb_info *); +extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64); +extern void nilfs_detach_checkpoint(struct nilfs_sb_info *); + +/* gcinode.c */ +int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64, + struct buffer_head **); +int nilfs_gccache_submit_read_node(struct inode *, sector_t, __u64, + struct buffer_head **); +int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *); +int nilfs_init_gccache(struct the_nilfs *); +void nilfs_destroy_gccache(struct the_nilfs *); +void nilfs_clear_gcinode(struct inode *); +struct inode *nilfs_gc_iget(struct the_nilfs *, ino_t, __u64); +void nilfs_remove_all_gcinode(struct the_nilfs *); + +/* gcdat.c */ +int nilfs_init_gcdat_inode(struct the_nilfs *); +void nilfs_commit_gcdat_inode(struct the_nilfs *); +void nilfs_clear_gcdat_inode(struct the_nilfs *); + +/* + * Inodes and files operations + */ +extern struct file_operations nilfs_dir_operations; +extern struct inode_operations nilfs_file_inode_operations; +extern struct file_operations nilfs_file_operations; +extern struct address_space_operations nilfs_aops; +extern struct inode_operations nilfs_dir_inode_operations; +extern struct inode_operations nilfs_special_inode_operations; +extern struct inode_operations nilfs_symlink_inode_operations; + +/* + * filesystem type + */ +extern struct file_system_type nilfs_fs_type; + + +#endif /* _NILFS_H */ diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h new file mode 100644 index 00000000000..adccd4fc654 --- /dev/null +++ b/fs/nilfs2/sb.h @@ -0,0 +1,102 @@ +/* + * sb.h - NILFS on-memory super block structure. + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + * + */ + +#ifndef _NILFS_SB +#define _NILFS_SB + +#include +#include + +/* + * Mount options + */ +struct nilfs_mount_options { + unsigned long mount_opt; + __u64 snapshot_cno; +}; + +struct the_nilfs; +struct nilfs_sc_info; + +/* + * NILFS super-block data in memory + */ +struct nilfs_sb_info { + /* Snapshot status */ + __u64 s_snapshot_cno; /* Checkpoint number */ + atomic_t s_inodes_count; + atomic_t s_blocks_count; /* Reserved (might be deleted) */ + + /* Mount options */ + unsigned long s_mount_opt; + uid_t s_resuid; + gid_t s_resgid; + + unsigned long s_interval; /* construction interval */ + unsigned long s_watermark; /* threshold of data amount + for the segment construction */ + + /* Fundamental members */ + struct super_block *s_super; /* reverse pointer to super_block */ + struct the_nilfs *s_nilfs; + struct list_head s_list; /* list head for nilfs->ns_supers */ + + /* Segment constructor */ + struct list_head s_dirty_files; /* dirty files list */ + struct nilfs_sc_info *s_sc_info; /* segment constructor info */ + spinlock_t s_inode_lock; /* Lock for the nilfs inode. + It covers s_dirty_files list */ + + /* Metadata files */ + struct inode *s_ifile; /* index file inode */ + + /* Inode allocator */ + spinlock_t s_next_gen_lock; + u32 s_next_generation; +}; + +static inline struct nilfs_sb_info *NILFS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct nilfs_sc_info *NILFS_SC(struct nilfs_sb_info *sbi) +{ + return sbi->s_sc_info; +} + +/* + * Bit operations for the mount option + */ +#define nilfs_clear_opt(sbi, opt) \ + do { (sbi)->s_mount_opt &= ~NILFS_MOUNT_##opt; } while (0) +#define nilfs_set_opt(sbi, opt) \ + do { (sbi)->s_mount_opt |= NILFS_MOUNT_##opt; } while (0) +#define nilfs_test_opt(sbi, opt) ((sbi)->s_mount_opt & NILFS_MOUNT_##opt) +#define nilfs_write_opt(sbi, mask, opt) \ + do { (sbi)->s_mount_opt = \ + (((sbi)->s_mount_opt & ~NILFS_MOUNT_##mask) | \ + NILFS_MOUNT_##opt); \ + } while (0) + +#endif /* _NILFS_SB */ diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h new file mode 100644 index 00000000000..dee8d83e054 --- /dev/null +++ b/fs/nilfs2/the_nilfs.h @@ -0,0 +1,290 @@ +/* + * the_nilfs.h - the_nilfs shared structure. + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + * + */ + +#ifndef _THE_NILFS_H +#define _THE_NILFS_H + +#include +#include +#include +#include +#include +#include "sb.h" + +/* the_nilfs struct */ +enum { + THE_NILFS_INIT = 0, /* Information from super_block is set */ + THE_NILFS_LOADED, /* Roll-back/roll-forward has done and + the latest checkpoint was loaded */ + THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ + THE_NILFS_COND_NONGC_WRITE, /* Condition to wake up cleanerd */ +}; + +/** + * struct the_nilfs - struct to supervise multiple nilfs mount points + * @ns_flags: flags + * @ns_count: reference count + * @ns_bdev: block device + * @ns_bdi: backing dev info + * @ns_writer: back pointer to writable nilfs_sb_info + * @ns_sem: semaphore for shared states + * @ns_writer_mutex: mutex protecting ns_writer attach/detach + * @ns_writer_refcount: number of referrers on ns_writer + * @ns_sbh: buffer head of the on-disk super block + * @ns_sbp: pointer to the super block data + * @ns_used_segments: list of full segments in volatile active state + * @ns_supers: list of nilfs super block structs + * @ns_seg_seq: segment sequence counter + * @ns_segnum: index number of the latest full segment. + * @ns_nextnum: index number of the full segment index to be used next + * @ns_pseg_offset: offset of next partial segment in the current full segment + * @ns_cno: next checkpoint number + * @ns_ctime: write time of the last segment + * @ns_nongc_ctime: write time of the last segment not for cleaner operation + * @ns_ndirtyblks: Number of dirty data blocks + * @ns_last_segment_lock: lock protecting fields for the latest segment + * @ns_last_pseg: start block number of the latest segment + * @ns_last_seq: sequence value of the latest segment + * @ns_last_cno: checkpoint number of the latest segment + * @ns_free_segments_count: counter of free segments + * @ns_segctor_sem: segment constructor semaphore + * @ns_dat: DAT file inode + * @ns_cpfile: checkpoint file inode + * @ns_sufile: segusage file inode + * @ns_gc_dat: shadow inode of the DAT file inode for GC + * @ns_gc_inodes: dummy inodes to keep live blocks + * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks + * @ns_cleanerd_wq: wait queue for cleanerd + * @ns_blocksize_bits: bit length of block size + * @ns_nsegments: number of segments in filesystem + * @ns_blocks_per_segment: number of blocks per segment + * @ns_r_segments_percentage: reserved segments percentage + * @ns_nrsvsegs: number of reserved segments + * @ns_first_data_block: block number of first data block + * @ns_inode_size: size of on-disk inode + * @ns_first_ino: first not-special inode number + * @ns_crc_seed: seed value of CRC32 calculation + */ +struct the_nilfs { + unsigned long ns_flags; + atomic_t ns_count; + + struct block_device *ns_bdev; + struct backing_dev_info *ns_bdi; + struct nilfs_sb_info *ns_writer; + struct rw_semaphore ns_sem; + struct mutex ns_writer_mutex; + atomic_t ns_writer_refcount; + + /* + * used for + * - loading the latest checkpoint exclusively. + * - allocating a new full segment. + * - protecting s_dirt in the super_block struct + * (see nilfs_write_super) and the following fields. + */ + struct buffer_head *ns_sbh; + struct nilfs_super_block *ns_sbp; + struct list_head ns_used_segments; + unsigned ns_mount_state; + struct list_head ns_supers; + + /* + * Following fields are dedicated to a writable FS-instance. + * Except for the period seeking checkpoint, code outside the segment + * constructor must lock a segment semaphore with transaction_begin() + * and transaction_end(), when accessing these fields. + * The writable FS-instance is sole during a lifetime of the_nilfs. + */ + u64 ns_seg_seq; + __u64 ns_segnum; + __u64 ns_nextnum; + unsigned long ns_pseg_offset; + __u64 ns_cno; + time_t ns_ctime; + time_t ns_nongc_ctime; + atomic_t ns_ndirtyblks; + + /* + * The following fields hold information on the latest partial segment + * written to disk with a super root. These fields are protected by + * ns_last_segment_lock. + */ + spinlock_t ns_last_segment_lock; + sector_t ns_last_pseg; + u64 ns_last_seq; + __u64 ns_last_cno; + unsigned long ns_free_segments_count; + + struct rw_semaphore ns_segctor_sem; + + /* + * Following fields are lock free except for the period before + * the_nilfs is initialized. + */ + struct inode *ns_dat; + struct inode *ns_cpfile; + struct inode *ns_sufile; + struct inode *ns_gc_dat; + + /* GC inode list and hash table head */ + struct list_head ns_gc_inodes; + struct hlist_head *ns_gc_inodes_h; + + /* cleanerd */ + wait_queue_head_t ns_cleanerd_wq; + + /* Disk layout information (static) */ + unsigned int ns_blocksize_bits; + unsigned long ns_nsegments; + unsigned long ns_blocks_per_segment; + unsigned long ns_r_segments_percentage; + unsigned long ns_nrsvsegs; + unsigned long ns_first_data_block; + int ns_inode_size; + int ns_first_ino; + u32 ns_crc_seed; +}; + +#define NILFS_GCINODE_HASH_BITS 8 +#define NILFS_GCINODE_HASH_SIZE (1<ns_flags); \ +} \ +static inline void clear_nilfs_##name(struct the_nilfs *nilfs) \ +{ \ + clear_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \ +} \ +static inline int nilfs_##name(struct the_nilfs *nilfs) \ +{ \ + return test_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \ +} + +THE_NILFS_FNS(INIT, init) +THE_NILFS_FNS(LOADED, loaded) +THE_NILFS_FNS(DISCONTINUED, discontinued) +THE_NILFS_FNS(COND_NONGC_WRITE, cond_nongc_write) + +void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); +struct the_nilfs *alloc_nilfs(struct block_device *); +void put_nilfs(struct the_nilfs *); +int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); +int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); +int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); +void nilfs_dispose_used_segments(struct the_nilfs *); +int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); +int nilfs_near_disk_full(struct the_nilfs *); + + +static inline void get_nilfs(struct the_nilfs *nilfs) +{ + /* Caller must have at least one reference of the_nilfs. */ + atomic_inc(&nilfs->ns_count); +} + +static inline struct nilfs_sb_info *nilfs_get_writer(struct the_nilfs *nilfs) +{ + if (atomic_inc_and_test(&nilfs->ns_writer_refcount)) + mutex_lock(&nilfs->ns_writer_mutex); + return nilfs->ns_writer; +} + +static inline void nilfs_put_writer(struct the_nilfs *nilfs) +{ + if (atomic_add_negative(-1, &nilfs->ns_writer_refcount)) + mutex_unlock(&nilfs->ns_writer_mutex); +} + +static inline void +nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) +{ + mutex_lock(&nilfs->ns_writer_mutex); + nilfs->ns_writer = sbi; + mutex_unlock(&nilfs->ns_writer_mutex); +} + +static inline void +nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) +{ + mutex_lock(&nilfs->ns_writer_mutex); + if (sbi == nilfs->ns_writer) + nilfs->ns_writer = NULL; + mutex_unlock(&nilfs->ns_writer_mutex); +} + +static inline void +nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum, + sector_t *seg_start, sector_t *seg_end) +{ + *seg_start = (sector_t)nilfs->ns_blocks_per_segment * segnum; + *seg_end = *seg_start + nilfs->ns_blocks_per_segment - 1; + if (segnum == 0) + *seg_start = nilfs->ns_first_data_block; +} + +static inline sector_t +nilfs_get_segment_start_blocknr(struct the_nilfs *nilfs, __u64 segnum) +{ + return (segnum == 0) ? nilfs->ns_first_data_block : + (sector_t)nilfs->ns_blocks_per_segment * segnum; +} + +static inline __u64 +nilfs_get_segnum_of_block(struct the_nilfs *nilfs, sector_t blocknr) +{ + sector_t segnum = blocknr; + + sector_div(segnum, nilfs->ns_blocks_per_segment); + return segnum; +} + +static inline void +nilfs_terminate_segment(struct the_nilfs *nilfs, sector_t seg_start, + sector_t seg_end) +{ + /* terminate the current full segment (used in case of I/O-error) */ + nilfs->ns_pseg_offset = seg_end - seg_start + 1; +} + +static inline void nilfs_shift_to_next_segment(struct the_nilfs *nilfs) +{ + /* move forward with a full segment */ + nilfs->ns_segnum = nilfs->ns_nextnum; + nilfs->ns_pseg_offset = 0; + nilfs->ns_seg_seq++; +} + +static inline __u64 nilfs_last_cno(struct the_nilfs *nilfs) +{ + __u64 cno; + + spin_lock(&nilfs->ns_last_segment_lock); + cno = nilfs->ns_last_cno; + spin_unlock(&nilfs->ns_last_segment_lock); + return cno; +} + +#endif /* _THE_NILFS_H */ -- cgit v1.2.3 From bdb265eae08db578e7cf5739be16f389d495fc75 Mon Sep 17 00:00:00 2001 From: Koji Sato Date: Mon, 6 Apr 2009 19:01:23 -0700 Subject: nilfs2: integrated block mapping This adds structures and operations for the block mapping (bmap for short). NILFS2 uses direct mappings for short files or B-tree based mappings for longer files. Every on-disk data block is held with inodes and managed through this block mapping. The nilfs_bmap structure and a set of functions here provide this capability to the NILFS2 inode. [penberg@cs.helsinki.fi: remove a bunch of bmap wrapper macros] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Koji Sato Signed-off-by: Ryusuke Konishi Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/bmap.c | 783 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/bmap.h | 244 +++++++++++++++ fs/nilfs2/bmap_union.h | 42 +++ 3 files changed, 1069 insertions(+) create mode 100644 fs/nilfs2/bmap.c create mode 100644 fs/nilfs2/bmap.h create mode 100644 fs/nilfs2/bmap_union.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c new file mode 100644 index 00000000000..6fe72addc9c --- /dev/null +++ b/fs/nilfs2/bmap.c @@ -0,0 +1,783 @@ +/* + * bmap.c - NILFS block mapping. + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#include +#include +#include +#include "nilfs.h" +#include "bmap.h" +#include "sb.h" +#include "btnode.h" +#include "mdt.h" +#include "dat.h" +#include "alloc.h" + +int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, + __u64 *ptrp) +{ + __u64 ptr; + int ret; + + down_read(&bmap->b_sem); + ret = (*bmap->b_ops->bop_lookup)(bmap, key, level, ptrp); + if (ret < 0) + goto out; + if (bmap->b_pops->bpop_translate != NULL) { + ret = (*bmap->b_pops->bpop_translate)(bmap, *ptrp, &ptr); + if (ret < 0) + goto out; + *ptrp = ptr; + } + + out: + up_read(&bmap->b_sem); + return ret; +} + + +/** + * nilfs_bmap_lookup - find a record + * @bmap: bmap + * @key: key + * @recp: pointer to record + * + * Description: nilfs_bmap_lookup() finds a record whose key matches @key in + * @bmap. + * + * Return Value: On success, 0 is returned and the record associated with @key + * is stored in the place pointed by @recp. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - A record associated with @key does not exist. + */ +int nilfs_bmap_lookup(struct nilfs_bmap *bmap, + unsigned long key, + unsigned long *recp) +{ + __u64 ptr; + int ret; + + /* XXX: use macro for level 1 */ + ret = nilfs_bmap_lookup_at_level(bmap, key, 1, &ptr); + if (recp != NULL) + *recp = ptr; + return ret; +} + +static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) +{ + __u64 keys[NILFS_BMAP_SMALL_HIGH + 1]; + __u64 ptrs[NILFS_BMAP_SMALL_HIGH + 1]; + int ret, n; + + if (bmap->b_ops->bop_check_insert != NULL) { + ret = (*bmap->b_ops->bop_check_insert)(bmap, key); + if (ret > 0) { + n = (*bmap->b_ops->bop_gather_data)( + bmap, keys, ptrs, NILFS_BMAP_SMALL_HIGH + 1); + if (n < 0) + return n; + ret = nilfs_btree_convert_and_insert( + bmap, key, ptr, keys, ptrs, n, + NILFS_BMAP_LARGE_LOW, NILFS_BMAP_LARGE_HIGH); + if (ret == 0) + bmap->b_u.u_flags |= NILFS_BMAP_LARGE; + + return ret; + } else if (ret < 0) + return ret; + } + + return (*bmap->b_ops->bop_insert)(bmap, key, ptr); +} + +/** + * nilfs_bmap_insert - insert a new key-record pair into a bmap + * @bmap: bmap + * @key: key + * @rec: record + * + * Description: nilfs_bmap_insert() inserts the new key-record pair specified + * by @key and @rec into @bmap. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EEXIST - A record associated with @key already exist. + */ +int nilfs_bmap_insert(struct nilfs_bmap *bmap, + unsigned long key, + unsigned long rec) +{ + int ret; + + down_write(&bmap->b_sem); + ret = nilfs_bmap_do_insert(bmap, key, rec); + up_write(&bmap->b_sem); + return ret; +} + +static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) +{ + __u64 keys[NILFS_BMAP_LARGE_LOW + 1]; + __u64 ptrs[NILFS_BMAP_LARGE_LOW + 1]; + int ret, n; + + if (bmap->b_ops->bop_check_delete != NULL) { + ret = (*bmap->b_ops->bop_check_delete)(bmap, key); + if (ret > 0) { + n = (*bmap->b_ops->bop_gather_data)( + bmap, keys, ptrs, NILFS_BMAP_LARGE_LOW + 1); + if (n < 0) + return n; + ret = nilfs_direct_delete_and_convert( + bmap, key, keys, ptrs, n, + NILFS_BMAP_SMALL_LOW, NILFS_BMAP_SMALL_HIGH); + if (ret == 0) + bmap->b_u.u_flags &= ~NILFS_BMAP_LARGE; + + return ret; + } else if (ret < 0) + return ret; + } + + return (*bmap->b_ops->bop_delete)(bmap, key); +} + +int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key) +{ + __u64 lastkey; + int ret; + + down_read(&bmap->b_sem); + ret = (*bmap->b_ops->bop_last_key)(bmap, &lastkey); + if (!ret) + *key = lastkey; + up_read(&bmap->b_sem); + return ret; +} + +/** + * nilfs_bmap_delete - delete a key-record pair from a bmap + * @bmap: bmap + * @key: key + * + * Description: nilfs_bmap_delete() deletes the key-record pair specified by + * @key from @bmap. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - A record associated with @key does not exist. + */ +int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key) +{ + int ret; + + down_write(&bmap->b_sem); + ret = nilfs_bmap_do_delete(bmap, key); + up_write(&bmap->b_sem); + return ret; +} + +static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) +{ + __u64 lastkey; + int ret; + + ret = (*bmap->b_ops->bop_last_key)(bmap, &lastkey); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + return ret; + } + + while (key <= lastkey) { + ret = nilfs_bmap_do_delete(bmap, lastkey); + if (ret < 0) + return ret; + ret = (*bmap->b_ops->bop_last_key)(bmap, &lastkey); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + return ret; + } + } + return 0; +} + +/** + * nilfs_bmap_truncate - truncate a bmap to a specified key + * @bmap: bmap + * @key: key + * + * Description: nilfs_bmap_truncate() removes key-record pairs whose keys are + * greater than or equal to @key from @bmap. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key) +{ + int ret; + + down_write(&bmap->b_sem); + ret = nilfs_bmap_do_truncate(bmap, key); + up_write(&bmap->b_sem); + return ret; +} + +/** + * nilfs_bmap_clear - free resources a bmap holds + * @bmap: bmap + * + * Description: nilfs_bmap_clear() frees resources associated with @bmap. + */ +void nilfs_bmap_clear(struct nilfs_bmap *bmap) +{ + down_write(&bmap->b_sem); + if (bmap->b_ops->bop_clear != NULL) + (*bmap->b_ops->bop_clear)(bmap); + up_write(&bmap->b_sem); +} + +/** + * nilfs_bmap_propagate - propagate dirty state + * @bmap: bmap + * @bh: buffer head + * + * Description: nilfs_bmap_propagate() marks the buffers that directly or + * indirectly refer to the block specified by @bh dirty. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) +{ + int ret; + + down_write(&bmap->b_sem); + ret = (*bmap->b_ops->bop_propagate)(bmap, bh); + up_write(&bmap->b_sem); + return ret; +} + +/** + * nilfs_bmap_lookup_dirty_buffers - + * @bmap: bmap + * @listp: pointer to buffer head list + */ +void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap, + struct list_head *listp) +{ + if (bmap->b_ops->bop_lookup_dirty_buffers != NULL) + (*bmap->b_ops->bop_lookup_dirty_buffers)(bmap, listp); +} + +/** + * nilfs_bmap_assign - assign a new block number to a block + * @bmap: bmap + * @bhp: pointer to buffer head + * @blocknr: block number + * @binfo: block information + * + * Description: nilfs_bmap_assign() assigns the block number @blocknr to the + * buffer specified by @bh. + * + * Return Value: On success, 0 is returned and the buffer head of a newly + * create buffer and the block information associated with the buffer are + * stored in the place pointed by @bh and @binfo, respectively. On error, one + * of the following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +int nilfs_bmap_assign(struct nilfs_bmap *bmap, + struct buffer_head **bh, + unsigned long blocknr, + union nilfs_binfo *binfo) +{ + int ret; + + down_write(&bmap->b_sem); + ret = (*bmap->b_ops->bop_assign)(bmap, bh, blocknr, binfo); + up_write(&bmap->b_sem); + return ret; +} + +/** + * nilfs_bmap_mark - mark block dirty + * @bmap: bmap + * @key: key + * @level: level + * + * Description: nilfs_bmap_mark() marks the block specified by @key and @level + * as dirty. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) +{ + int ret; + + if (bmap->b_ops->bop_mark == NULL) + return 0; + + down_write(&bmap->b_sem); + ret = (*bmap->b_ops->bop_mark)(bmap, key, level); + up_write(&bmap->b_sem); + return ret; +} + +/** + * nilfs_bmap_test_and_clear_dirty - test and clear a bmap dirty state + * @bmap: bmap + * + * Description: nilfs_test_and_clear() is the atomic operation to test and + * clear the dirty state of @bmap. + * + * Return Value: 1 is returned if @bmap is dirty, or 0 if clear. + */ +int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) +{ + int ret; + + down_write(&bmap->b_sem); + ret = nilfs_bmap_dirty(bmap); + nilfs_bmap_clear_dirty(bmap); + up_write(&bmap->b_sem); + return ret; +} + + +/* + * Internal use only + */ + +void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n) +{ + inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); + if (NILFS_MDT(bmap->b_inode)) + nilfs_mdt_mark_dirty(bmap->b_inode); + else + mark_inode_dirty(bmap->b_inode); +} + +void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n) +{ + inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); + if (NILFS_MDT(bmap->b_inode)) + nilfs_mdt_mark_dirty(bmap->b_inode); + else + mark_inode_dirty(bmap->b_inode); +} + +int nilfs_bmap_get_block(const struct nilfs_bmap *bmap, __u64 ptr, + struct buffer_head **bhp) +{ + return nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache, + ptr, 0, bhp, 0); +} + +void nilfs_bmap_put_block(const struct nilfs_bmap *bmap, + struct buffer_head *bh) +{ + brelse(bh); +} + +int nilfs_bmap_get_new_block(const struct nilfs_bmap *bmap, __u64 ptr, + struct buffer_head **bhp) +{ + int ret; + + ret = nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache, + ptr, 0, bhp, 1); + if (ret < 0) + return ret; + set_buffer_nilfs_volatile(*bhp); + return 0; +} + +void nilfs_bmap_delete_block(const struct nilfs_bmap *bmap, + struct buffer_head *bh) +{ + nilfs_btnode_delete(bh); +} + +__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, + const struct buffer_head *bh) +{ + struct buffer_head *pbh; + __u64 key; + + key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT - + bmap->b_inode->i_blkbits); + for (pbh = page_buffers(bh->b_page); pbh != bh; + pbh = pbh->b_this_page, key++); + + return key; +} + +__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) +{ + __s64 diff; + + diff = key - bmap->b_last_allocated_key; + if ((nilfs_bmap_keydiff_abs(diff) < NILFS_INODE_BMAP_SIZE) && + (bmap->b_last_allocated_ptr != NILFS_BMAP_INVALID_PTR) && + (bmap->b_last_allocated_ptr + diff > 0)) + return bmap->b_last_allocated_ptr + diff; + else + return NILFS_BMAP_INVALID_PTR; +} + +static struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) +{ + return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); +} + +#define NILFS_BMAP_GROUP_DIV 8 +__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap) +{ + struct inode *dat = nilfs_bmap_get_dat(bmap); + unsigned long entries_per_group = nilfs_palloc_entries_per_group(dat); + unsigned long group = bmap->b_inode->i_ino / entries_per_group; + + return group * entries_per_group + + (bmap->b_inode->i_ino % NILFS_BMAP_GROUP_DIV) * + (entries_per_group / NILFS_BMAP_GROUP_DIV); +} + +static int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); +} + +static void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); +} + +static void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); +} + +static int nilfs_bmap_prepare_start_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + return nilfs_dat_prepare_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); +} + +static void nilfs_bmap_commit_start_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req, + sector_t blocknr) +{ + nilfs_dat_commit_start(nilfs_bmap_get_dat(bmap), &req->bpr_req, + blocknr); +} + +static void nilfs_bmap_abort_start_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + nilfs_dat_abort_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); +} + +static int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); +} + +static void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 0); +} + +static void nilfs_bmap_commit_end_vmdt(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 1); +} + +static void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); +} + +int nilfs_bmap_move_v(const struct nilfs_bmap *bmap, __u64 vblocknr, + sector_t blocknr) +{ + return nilfs_dat_move(nilfs_bmap_get_dat(bmap), vblocknr, blocknr); +} + +int nilfs_bmap_mark_dirty(const struct nilfs_bmap *bmap, __u64 vblocknr) +{ + return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr); +} + +int nilfs_bmap_prepare_update(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *oldreq, + union nilfs_bmap_ptr_req *newreq) +{ + int ret; + + ret = (*bmap->b_pops->bpop_prepare_end_ptr)(bmap, oldreq); + if (ret < 0) + return ret; + ret = (*bmap->b_pops->bpop_prepare_alloc_ptr)(bmap, newreq); + if (ret < 0) + (*bmap->b_pops->bpop_abort_end_ptr)(bmap, oldreq); + + return ret; +} + +void nilfs_bmap_commit_update(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *oldreq, + union nilfs_bmap_ptr_req *newreq) +{ + (*bmap->b_pops->bpop_commit_end_ptr)(bmap, oldreq); + (*bmap->b_pops->bpop_commit_alloc_ptr)(bmap, newreq); +} + +void nilfs_bmap_abort_update(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *oldreq, + union nilfs_bmap_ptr_req *newreq) +{ + (*bmap->b_pops->bpop_abort_end_ptr)(bmap, oldreq); + (*bmap->b_pops->bpop_abort_alloc_ptr)(bmap, newreq); +} + +static int nilfs_bmap_translate_v(const struct nilfs_bmap *bmap, __u64 ptr, + __u64 *ptrp) +{ + sector_t blocknr; + int ret; + + ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), ptr, &blocknr); + if (ret < 0) + return ret; + if (ptrp != NULL) + *ptrp = blocknr; + return 0; +} + +static int nilfs_bmap_prepare_alloc_p(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + /* ignore target ptr */ + req->bpr_ptr = bmap->b_last_allocated_ptr++; + return 0; +} + +static void nilfs_bmap_commit_alloc_p(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + /* do nothing */ +} + +static void nilfs_bmap_abort_alloc_p(struct nilfs_bmap *bmap, + union nilfs_bmap_ptr_req *req) +{ + bmap->b_last_allocated_ptr--; +} + +static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_v = { + .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v, + .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v, + .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v, + .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v, + .bpop_commit_start_ptr = nilfs_bmap_commit_start_v, + .bpop_abort_start_ptr = nilfs_bmap_abort_start_v, + .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v, + .bpop_commit_end_ptr = nilfs_bmap_commit_end_v, + .bpop_abort_end_ptr = nilfs_bmap_abort_end_v, + + .bpop_translate = nilfs_bmap_translate_v, +}; + +static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_vmdt = { + .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v, + .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v, + .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v, + .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v, + .bpop_commit_start_ptr = nilfs_bmap_commit_start_v, + .bpop_abort_start_ptr = nilfs_bmap_abort_start_v, + .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v, + .bpop_commit_end_ptr = nilfs_bmap_commit_end_vmdt, + .bpop_abort_end_ptr = nilfs_bmap_abort_end_v, + + .bpop_translate = nilfs_bmap_translate_v, +}; + +static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_p = { + .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_p, + .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_p, + .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_p, + .bpop_prepare_start_ptr = NULL, + .bpop_commit_start_ptr = NULL, + .bpop_abort_start_ptr = NULL, + .bpop_prepare_end_ptr = NULL, + .bpop_commit_end_ptr = NULL, + .bpop_abort_end_ptr = NULL, + + .bpop_translate = NULL, +}; + +static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = { + .bpop_prepare_alloc_ptr = NULL, + .bpop_commit_alloc_ptr = NULL, + .bpop_abort_alloc_ptr = NULL, + .bpop_prepare_start_ptr = NULL, + .bpop_commit_start_ptr = NULL, + .bpop_abort_start_ptr = NULL, + .bpop_prepare_end_ptr = NULL, + .bpop_commit_end_ptr = NULL, + .bpop_abort_end_ptr = NULL, + + .bpop_translate = NULL, +}; + +/** + * nilfs_bmap_read - read a bmap from an inode + * @bmap: bmap + * @raw_inode: on-disk inode + * + * Description: nilfs_bmap_read() initializes the bmap @bmap. + * + * Return Value: On success, 0 is returned. On error, the following negative + * error code is returned. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) +{ + if (raw_inode == NULL) + memset(bmap->b_u.u_data, 0, NILFS_BMAP_SIZE); + else + memcpy(bmap->b_u.u_data, raw_inode->i_bmap, NILFS_BMAP_SIZE); + + init_rwsem(&bmap->b_sem); + bmap->b_state = 0; + bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; + switch (bmap->b_inode->i_ino) { + case NILFS_DAT_INO: + bmap->b_pops = &nilfs_bmap_ptr_ops_p; + bmap->b_last_allocated_key = 0; /* XXX: use macro */ + bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; + break; + case NILFS_CPFILE_INO: + case NILFS_SUFILE_INO: + bmap->b_pops = &nilfs_bmap_ptr_ops_vmdt; + bmap->b_last_allocated_key = 0; /* XXX: use macro */ + bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; + break; + default: + bmap->b_pops = &nilfs_bmap_ptr_ops_v; + bmap->b_last_allocated_key = 0; /* XXX: use macro */ + bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; + break; + } + + return (bmap->b_u.u_flags & NILFS_BMAP_LARGE) ? + nilfs_btree_init(bmap, + NILFS_BMAP_LARGE_LOW, + NILFS_BMAP_LARGE_HIGH) : + nilfs_direct_init(bmap, + NILFS_BMAP_SMALL_LOW, + NILFS_BMAP_SMALL_HIGH); +} + +/** + * nilfs_bmap_write - write back a bmap to an inode + * @bmap: bmap + * @raw_inode: on-disk inode + * + * Description: nilfs_bmap_write() stores @bmap in @raw_inode. + */ +void nilfs_bmap_write(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) +{ + down_write(&bmap->b_sem); + memcpy(raw_inode->i_bmap, bmap->b_u.u_data, + NILFS_INODE_BMAP_SIZE * sizeof(__le64)); + if (bmap->b_inode->i_ino == NILFS_DAT_INO) + bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; + + up_write(&bmap->b_sem); +} + +void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) +{ + memset(&bmap->b_u, 0, NILFS_BMAP_SIZE); + init_rwsem(&bmap->b_sem); + bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; + bmap->b_pops = &nilfs_bmap_ptr_ops_gc; + bmap->b_last_allocated_key = 0; + bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; + bmap->b_state = 0; + nilfs_btree_init_gc(bmap); +} + +void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) +{ + memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union)); + init_rwsem(&gcbmap->b_sem); + gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; +} + +void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) +{ + memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union)); + init_rwsem(&bmap->b_sem); + bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; +} diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h new file mode 100644 index 00000000000..4f2708abb1b --- /dev/null +++ b/fs/nilfs2/bmap.h @@ -0,0 +1,244 @@ +/* + * bmap.h - NILFS block mapping. + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#ifndef _NILFS_BMAP_H +#define _NILFS_BMAP_H + +#include +#include +#include +#include +#include "alloc.h" + +#define NILFS_BMAP_INVALID_PTR 0 + +#define nilfs_bmap_dkey_to_key(dkey) le64_to_cpu(dkey) +#define nilfs_bmap_key_to_dkey(key) cpu_to_le64(key) +#define nilfs_bmap_dptr_to_ptr(dptr) le64_to_cpu(dptr) +#define nilfs_bmap_ptr_to_dptr(ptr) cpu_to_le64(ptr) + +#define nilfs_bmap_keydiff_abs(diff) ((diff) < 0 ? -(diff) : (diff)) + + +struct nilfs_bmap; + +/** + * union nilfs_bmap_ptr_req - request for bmap ptr + * @bpr_ptr: bmap pointer + * @bpr_req: request for persistent allocator + */ +union nilfs_bmap_ptr_req { + __u64 bpr_ptr; + struct nilfs_palloc_req bpr_req; +}; + +/** + * struct nilfs_bmap_stats - bmap statistics + * @bs_nblocks: number of blocks created or deleted + */ +struct nilfs_bmap_stats { + unsigned int bs_nblocks; +}; + +/** + * struct nilfs_bmap_operations - bmap operation table + */ +struct nilfs_bmap_operations { + int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); + int (*bop_insert)(struct nilfs_bmap *, __u64, __u64); + int (*bop_delete)(struct nilfs_bmap *, __u64); + void (*bop_clear)(struct nilfs_bmap *); + + int (*bop_propagate)(const struct nilfs_bmap *, struct buffer_head *); + void (*bop_lookup_dirty_buffers)(struct nilfs_bmap *, + struct list_head *); + + int (*bop_assign)(struct nilfs_bmap *, + struct buffer_head **, + sector_t, + union nilfs_binfo *); + int (*bop_mark)(struct nilfs_bmap *, __u64, int); + + /* The following functions are internal use only. */ + int (*bop_last_key)(const struct nilfs_bmap *, __u64 *); + int (*bop_check_insert)(const struct nilfs_bmap *, __u64); + int (*bop_check_delete)(struct nilfs_bmap *, __u64); + int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int); +}; + + +/** + * struct nilfs_bmap_ptr_operations - bmap ptr operation table + */ +struct nilfs_bmap_ptr_operations { + int (*bpop_prepare_alloc_ptr)(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); + void (*bpop_commit_alloc_ptr)(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); + void (*bpop_abort_alloc_ptr)(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); + int (*bpop_prepare_start_ptr)(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); + void (*bpop_commit_start_ptr)(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *, + sector_t); + void (*bpop_abort_start_ptr)(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); + int (*bpop_prepare_end_ptr)(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); + void (*bpop_commit_end_ptr)(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); + void (*bpop_abort_end_ptr)(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *); + + int (*bpop_translate)(const struct nilfs_bmap *, __u64, __u64 *); +}; + + +#define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) +#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) +#define NILFS_BMAP_NEW_PTR_INIT \ + (1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1)) + +static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) +{ + return !!(ptr & NILFS_BMAP_NEW_PTR_INIT); +} + + +/** + * struct nilfs_bmap - bmap structure + * @b_u: raw data + * @b_sem: semaphore + * @b_inode: owner of bmap + * @b_ops: bmap operation table + * @b_pops: bmap ptr operation table + * @b_low: low watermark of conversion + * @b_high: high watermark of conversion + * @b_last_allocated_key: last allocated key for data block + * @b_last_allocated_ptr: last allocated ptr for data block + * @b_state: state + */ +struct nilfs_bmap { + union { + __u8 u_flags; + __le64 u_data[NILFS_BMAP_SIZE / sizeof(__le64)]; + } b_u; + struct rw_semaphore b_sem; + struct inode *b_inode; + const struct nilfs_bmap_operations *b_ops; + const struct nilfs_bmap_ptr_operations *b_pops; + __u64 b_low; + __u64 b_high; + __u64 b_last_allocated_key; + __u64 b_last_allocated_ptr; + int b_state; +}; + +/* state */ +#define NILFS_BMAP_DIRTY 0x00000001 + + +int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); +int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); +void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); +int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *); +int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); +int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); +int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *); +int nilfs_bmap_truncate(struct nilfs_bmap *, unsigned long); +void nilfs_bmap_clear(struct nilfs_bmap *); +int nilfs_bmap_propagate(struct nilfs_bmap *, struct buffer_head *); +void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *, struct list_head *); +int nilfs_bmap_assign(struct nilfs_bmap *, struct buffer_head **, + unsigned long, union nilfs_binfo *); +int nilfs_bmap_lookup_at_level(struct nilfs_bmap *, __u64, int, __u64 *); +int nilfs_bmap_mark(struct nilfs_bmap *, __u64, int); + +void nilfs_bmap_init_gc(struct nilfs_bmap *); +void nilfs_bmap_init_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); +void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); + + +/* + * Internal use only + */ + +int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t); +int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64); + + +__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, + const struct buffer_head *); + +__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); +__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); + +int nilfs_bmap_prepare_update(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *, + union nilfs_bmap_ptr_req *); +void nilfs_bmap_commit_update(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *, + union nilfs_bmap_ptr_req *); +void nilfs_bmap_abort_update(struct nilfs_bmap *, + union nilfs_bmap_ptr_req *, + union nilfs_bmap_ptr_req *); + +void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int); +void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int); + + +int nilfs_bmap_get_block(const struct nilfs_bmap *, __u64, + struct buffer_head **); +void nilfs_bmap_put_block(const struct nilfs_bmap *, struct buffer_head *); +int nilfs_bmap_get_new_block(const struct nilfs_bmap *, __u64, + struct buffer_head **); +void nilfs_bmap_delete_block(const struct nilfs_bmap *, struct buffer_head *); + + +/* Assume that bmap semaphore is locked. */ +static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap) +{ + return !!(bmap->b_state & NILFS_BMAP_DIRTY); +} + +/* Assume that bmap semaphore is locked. */ +static inline void nilfs_bmap_set_dirty(struct nilfs_bmap *bmap) +{ + bmap->b_state |= NILFS_BMAP_DIRTY; +} + +/* Assume that bmap semaphore is locked. */ +static inline void nilfs_bmap_clear_dirty(struct nilfs_bmap *bmap) +{ + bmap->b_state &= ~NILFS_BMAP_DIRTY; +} + + +#define NILFS_BMAP_LARGE 0x1 + +#define NILFS_BMAP_SMALL_LOW NILFS_DIRECT_KEY_MIN +#define NILFS_BMAP_SMALL_HIGH NILFS_DIRECT_KEY_MAX +#define NILFS_BMAP_LARGE_LOW NILFS_BTREE_ROOT_NCHILDREN_MAX +#define NILFS_BMAP_LARGE_HIGH NILFS_BTREE_KEY_MAX + +#endif /* _NILFS_BMAP_H */ diff --git a/fs/nilfs2/bmap_union.h b/fs/nilfs2/bmap_union.h new file mode 100644 index 00000000000..d41509bff47 --- /dev/null +++ b/fs/nilfs2/bmap_union.h @@ -0,0 +1,42 @@ +/* + * bmap_union.h - NILFS block mapping. + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#ifndef _NILFS_BMAP_UNION_H +#define _NILFS_BMAP_UNION_H + +#include "bmap.h" +#include "direct.h" +#include "btree.h" + +/** + * nilfs_bmap_union - + * @bi_bmap: bmap structure + * @bi_btree: direct map structure + * @bi_direct: B-tree structure + */ +union nilfs_bmap_union { + struct nilfs_bmap bi_bmap; + struct nilfs_direct bi_direct; + struct nilfs_btree bi_btree; +}; + +#endif /* _NILFS_BMAP_UNION_H */ -- cgit v1.2.3 From 17c76b0104e4a6513983777e1a17e0297a12b0c4 Mon Sep 17 00:00:00 2001 From: Koji Sato Date: Mon, 6 Apr 2009 19:01:24 -0700 Subject: nilfs2: B-tree based block mapping This adds declarations and functions of NILFS2 B-tree. Two variants are integrated in the NILFS2 B-tree. The B-tree for the most files points to the child nodes or data blocks with virtual block addresses, whereas the B-tree of the DAT uses actual block addresses. Signed-off-by: Koji Sato Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/btree.c | 2276 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/btree.h | 117 +++ 2 files changed, 2393 insertions(+) create mode 100644 fs/nilfs2/btree.c create mode 100644 fs/nilfs2/btree.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c new file mode 100644 index 00000000000..893f0190a61 --- /dev/null +++ b/fs/nilfs2/btree.c @@ -0,0 +1,2276 @@ +/* + * btree.c - NILFS B-tree. + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#include +#include +#include +#include +#include "nilfs.h" +#include "page.h" +#include "btnode.h" +#include "btree.h" +#include "alloc.h" + +/** + * struct nilfs_btree_path - A path on which B-tree operations are executed + * @bp_bh: buffer head of node block + * @bp_sib_bh: buffer head of sibling node block + * @bp_index: index of child node + * @bp_oldreq: ptr end request for old ptr + * @bp_newreq: ptr alloc request for new ptr + * @bp_op: rebalance operation + */ +struct nilfs_btree_path { + struct buffer_head *bp_bh; + struct buffer_head *bp_sib_bh; + int bp_index; + union nilfs_bmap_ptr_req bp_oldreq; + union nilfs_bmap_ptr_req bp_newreq; + struct nilfs_btnode_chkey_ctxt bp_ctxt; + void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *, + int, __u64 *, __u64 *); +}; + +/* + * B-tree path operations + */ + +static struct kmem_cache *nilfs_btree_path_cache; + +int __init nilfs_btree_path_cache_init(void) +{ + nilfs_btree_path_cache = + kmem_cache_create("nilfs2_btree_path_cache", + sizeof(struct nilfs_btree_path) * + NILFS_BTREE_LEVEL_MAX, 0, 0, NULL); + return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM; +} + +void nilfs_btree_path_cache_destroy(void) +{ + kmem_cache_destroy(nilfs_btree_path_cache); +} + +static inline struct nilfs_btree_path * +nilfs_btree_alloc_path(const struct nilfs_btree *btree) +{ + return (struct nilfs_btree_path *) + kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); +} + +static inline void nilfs_btree_free_path(const struct nilfs_btree *btree, + struct nilfs_btree_path *path) +{ + kmem_cache_free(nilfs_btree_path_cache, path); +} + +static void nilfs_btree_init_path(const struct nilfs_btree *btree, + struct nilfs_btree_path *path) +{ + int level; + + for (level = NILFS_BTREE_LEVEL_DATA; + level < NILFS_BTREE_LEVEL_MAX; + level++) { + path[level].bp_bh = NULL; + path[level].bp_sib_bh = NULL; + path[level].bp_index = 0; + path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; + path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; + path[level].bp_op = NULL; + } +} + +static void nilfs_btree_clear_path(const struct nilfs_btree *btree, + struct nilfs_btree_path *path) +{ + int level; + + for (level = NILFS_BTREE_LEVEL_DATA; + level < NILFS_BTREE_LEVEL_MAX; + level++) { + if (path[level].bp_bh != NULL) { + nilfs_bmap_put_block(&btree->bt_bmap, + path[level].bp_bh); + path[level].bp_bh = NULL; + } + /* sib_bh is released or deleted by prepare or commit + * operations. */ + path[level].bp_sib_bh = NULL; + path[level].bp_index = 0; + path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; + path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; + path[level].bp_op = NULL; + } +} + + +/* + * B-tree node operations + */ + +static inline int +nilfs_btree_node_get_flags(const struct nilfs_btree *btree, + const struct nilfs_btree_node *node) +{ + return node->bn_flags; +} + +static inline void +nilfs_btree_node_set_flags(struct nilfs_btree *btree, + struct nilfs_btree_node *node, + int flags) +{ + node->bn_flags = flags; +} + +static inline int nilfs_btree_node_root(const struct nilfs_btree *btree, + const struct nilfs_btree_node *node) +{ + return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT; +} + +static inline int +nilfs_btree_node_get_level(const struct nilfs_btree *btree, + const struct nilfs_btree_node *node) +{ + return node->bn_level; +} + +static inline void +nilfs_btree_node_set_level(struct nilfs_btree *btree, + struct nilfs_btree_node *node, + int level) +{ + node->bn_level = level; +} + +static inline int +nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree, + const struct nilfs_btree_node *node) +{ + return le16_to_cpu(node->bn_nchildren); +} + +static inline void +nilfs_btree_node_set_nchildren(struct nilfs_btree *btree, + struct nilfs_btree_node *node, + int nchildren) +{ + node->bn_nchildren = cpu_to_le16(nchildren); +} + +static inline int +nilfs_btree_node_size(const struct nilfs_btree *btree) +{ + return 1 << btree->bt_bmap.b_inode->i_blkbits; +} + +static inline int +nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree, + const struct nilfs_btree_node *node) +{ + return nilfs_btree_node_root(btree, node) ? + NILFS_BTREE_ROOT_NCHILDREN_MIN : + NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); +} + +static inline int +nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree, + const struct nilfs_btree_node *node) +{ + return nilfs_btree_node_root(btree, node) ? + NILFS_BTREE_ROOT_NCHILDREN_MAX : + NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree)); +} + +static inline __le64 * +nilfs_btree_node_dkeys(const struct nilfs_btree *btree, + const struct nilfs_btree_node *node) +{ + return (__le64 *)((char *)(node + 1) + + (nilfs_btree_node_root(btree, node) ? + 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); +} + +static inline __le64 * +nilfs_btree_node_dptrs(const struct nilfs_btree *btree, + const struct nilfs_btree_node *node) +{ + return (__le64 *)(nilfs_btree_node_dkeys(btree, node) + + nilfs_btree_node_nchildren_max(btree, node)); +} + +static inline __u64 +nilfs_btree_node_get_key(const struct nilfs_btree *btree, + const struct nilfs_btree_node *node, int index) +{ + return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) + + index)); +} + +static inline void +nilfs_btree_node_set_key(struct nilfs_btree *btree, + struct nilfs_btree_node *node, int index, __u64 key) +{ + *(nilfs_btree_node_dkeys(btree, node) + index) = + nilfs_bmap_key_to_dkey(key); +} + +static inline __u64 +nilfs_btree_node_get_ptr(const struct nilfs_btree *btree, + const struct nilfs_btree_node *node, + int index) +{ + return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) + + index)); +} + +static inline void +nilfs_btree_node_set_ptr(struct nilfs_btree *btree, + struct nilfs_btree_node *node, + int index, + __u64 ptr) +{ + *(nilfs_btree_node_dptrs(btree, node) + index) = + nilfs_bmap_ptr_to_dptr(ptr); +} + +static void nilfs_btree_node_init(struct nilfs_btree *btree, + struct nilfs_btree_node *node, + int flags, int level, int nchildren, + const __u64 *keys, const __u64 *ptrs) +{ + __le64 *dkeys; + __le64 *dptrs; + int i; + + nilfs_btree_node_set_flags(btree, node, flags); + nilfs_btree_node_set_level(btree, node, level); + nilfs_btree_node_set_nchildren(btree, node, nchildren); + + dkeys = nilfs_btree_node_dkeys(btree, node); + dptrs = nilfs_btree_node_dptrs(btree, node); + for (i = 0; i < nchildren; i++) { + dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]); + dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]); + } +} + +/* Assume the buffer heads corresponding to left and right are locked. */ +static void nilfs_btree_node_move_left(struct nilfs_btree *btree, + struct nilfs_btree_node *left, + struct nilfs_btree_node *right, + int n) +{ + __le64 *ldkeys, *rdkeys; + __le64 *ldptrs, *rdptrs; + int lnchildren, rnchildren; + + ldkeys = nilfs_btree_node_dkeys(btree, left); + ldptrs = nilfs_btree_node_dptrs(btree, left); + lnchildren = nilfs_btree_node_get_nchildren(btree, left); + + rdkeys = nilfs_btree_node_dkeys(btree, right); + rdptrs = nilfs_btree_node_dptrs(btree, right); + rnchildren = nilfs_btree_node_get_nchildren(btree, right); + + memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); + memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); + memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); + memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); + + lnchildren += n; + rnchildren -= n; + nilfs_btree_node_set_nchildren(btree, left, lnchildren); + nilfs_btree_node_set_nchildren(btree, right, rnchildren); +} + +/* Assume that the buffer heads corresponding to left and right are locked. */ +static void nilfs_btree_node_move_right(struct nilfs_btree *btree, + struct nilfs_btree_node *left, + struct nilfs_btree_node *right, + int n) +{ + __le64 *ldkeys, *rdkeys; + __le64 *ldptrs, *rdptrs; + int lnchildren, rnchildren; + + ldkeys = nilfs_btree_node_dkeys(btree, left); + ldptrs = nilfs_btree_node_dptrs(btree, left); + lnchildren = nilfs_btree_node_get_nchildren(btree, left); + + rdkeys = nilfs_btree_node_dkeys(btree, right); + rdptrs = nilfs_btree_node_dptrs(btree, right); + rnchildren = nilfs_btree_node_get_nchildren(btree, right); + + memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); + memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); + memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); + memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); + + lnchildren -= n; + rnchildren += n; + nilfs_btree_node_set_nchildren(btree, left, lnchildren); + nilfs_btree_node_set_nchildren(btree, right, rnchildren); +} + +/* Assume that the buffer head corresponding to node is locked. */ +static void nilfs_btree_node_insert(struct nilfs_btree *btree, + struct nilfs_btree_node *node, + __u64 key, __u64 ptr, int index) +{ + __le64 *dkeys; + __le64 *dptrs; + int nchildren; + + dkeys = nilfs_btree_node_dkeys(btree, node); + dptrs = nilfs_btree_node_dptrs(btree, node); + nchildren = nilfs_btree_node_get_nchildren(btree, node); + if (index < nchildren) { + memmove(dkeys + index + 1, dkeys + index, + (nchildren - index) * sizeof(*dkeys)); + memmove(dptrs + index + 1, dptrs + index, + (nchildren - index) * sizeof(*dptrs)); + } + dkeys[index] = nilfs_bmap_key_to_dkey(key); + dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr); + nchildren++; + nilfs_btree_node_set_nchildren(btree, node, nchildren); +} + +/* Assume that the buffer head corresponding to node is locked. */ +static void nilfs_btree_node_delete(struct nilfs_btree *btree, + struct nilfs_btree_node *node, + __u64 *keyp, __u64 *ptrp, int index) +{ + __u64 key; + __u64 ptr; + __le64 *dkeys; + __le64 *dptrs; + int nchildren; + + dkeys = nilfs_btree_node_dkeys(btree, node); + dptrs = nilfs_btree_node_dptrs(btree, node); + key = nilfs_bmap_dkey_to_key(dkeys[index]); + ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]); + nchildren = nilfs_btree_node_get_nchildren(btree, node); + if (keyp != NULL) + *keyp = key; + if (ptrp != NULL) + *ptrp = ptr; + + if (index < nchildren - 1) { + memmove(dkeys + index, dkeys + index + 1, + (nchildren - index - 1) * sizeof(*dkeys)); + memmove(dptrs + index, dptrs + index + 1, + (nchildren - index - 1) * sizeof(*dptrs)); + } + nchildren--; + nilfs_btree_node_set_nchildren(btree, node, nchildren); +} + +static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, + const struct nilfs_btree_node *node, + __u64 key, int *indexp) +{ + __u64 nkey; + int index, low, high, s; + + /* binary search */ + low = 0; + high = nilfs_btree_node_get_nchildren(btree, node) - 1; + index = 0; + s = 0; + while (low <= high) { + index = (low + high) / 2; + nkey = nilfs_btree_node_get_key(btree, node, index); + if (nkey == key) { + s = 0; + goto out; + } else if (nkey < key) { + low = index + 1; + s = -1; + } else { + high = index - 1; + s = 1; + } + } + + /* adjust index */ + if (nilfs_btree_node_get_level(btree, node) > + NILFS_BTREE_LEVEL_NODE_MIN) { + if ((s > 0) && (index > 0)) + index--; + } else if (s < 0) + index++; + + out: + BUG_ON(indexp == NULL); + *indexp = index; + + return s == 0; +} + +static inline struct nilfs_btree_node * +nilfs_btree_get_root(const struct nilfs_btree *btree) +{ + return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data; +} + +static inline struct nilfs_btree_node * +nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree, + const struct nilfs_btree_path *path, + int level) +{ + return (struct nilfs_btree_node *)path[level].bp_bh->b_data; +} + +static inline struct nilfs_btree_node * +nilfs_btree_get_sib_node(const struct nilfs_btree *btree, + const struct nilfs_btree_path *path, + int level) +{ + return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; +} + +static inline int nilfs_btree_height(const struct nilfs_btree *btree) +{ + return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree)) + + 1; +} + +static inline struct nilfs_btree_node * +nilfs_btree_get_node(const struct nilfs_btree *btree, + const struct nilfs_btree_path *path, + int level) +{ + return (level == nilfs_btree_height(btree) - 1) ? + nilfs_btree_get_root(btree) : + nilfs_btree_get_nonroot_node(btree, path, level); +} + +static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, + struct nilfs_btree_path *path, + __u64 key, __u64 *ptrp, int minlevel) +{ + struct nilfs_btree_node *node; + __u64 ptr; + int level, index, found, ret; + + BUG_ON(minlevel <= NILFS_BTREE_LEVEL_DATA); + + node = nilfs_btree_get_root(btree); + level = nilfs_btree_node_get_level(btree, node); + if ((level < minlevel) || + (nilfs_btree_node_get_nchildren(btree, node) <= 0)) + return -ENOENT; + + found = nilfs_btree_node_lookup(btree, node, key, &index); + ptr = nilfs_btree_node_get_ptr(btree, node, index); + path[level].bp_bh = NULL; + path[level].bp_index = index; + + for (level--; level >= minlevel; level--) { + ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, + &path[level].bp_bh); + if (ret < 0) + return ret; + node = nilfs_btree_get_nonroot_node(btree, path, level); + BUG_ON(level != nilfs_btree_node_get_level(btree, node)); + if (!found) + found = nilfs_btree_node_lookup(btree, node, key, + &index); + else + index = 0; + if (index < nilfs_btree_node_nchildren_max(btree, node)) + ptr = nilfs_btree_node_get_ptr(btree, node, index); + else { + BUG_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); + /* insert */ + ptr = NILFS_BMAP_INVALID_PTR; + } + path[level].bp_index = index; + } + if (!found) + return -ENOENT; + + if (ptrp != NULL) + *ptrp = ptr; + + return 0; +} + +static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, + struct nilfs_btree_path *path, + __u64 *keyp, __u64 *ptrp) +{ + struct nilfs_btree_node *node; + __u64 ptr; + int index, level, ret; + + node = nilfs_btree_get_root(btree); + index = nilfs_btree_node_get_nchildren(btree, node) - 1; + if (index < 0) + return -ENOENT; + level = nilfs_btree_node_get_level(btree, node); + ptr = nilfs_btree_node_get_ptr(btree, node, index); + path[level].bp_bh = NULL; + path[level].bp_index = index; + + for (level--; level > 0; level--) { + ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, + &path[level].bp_bh); + if (ret < 0) + return ret; + node = nilfs_btree_get_nonroot_node(btree, path, level); + BUG_ON(level != nilfs_btree_node_get_level(btree, node)); + index = nilfs_btree_node_get_nchildren(btree, node) - 1; + ptr = nilfs_btree_node_get_ptr(btree, node, index); + path[level].bp_index = index; + } + + if (keyp != NULL) + *keyp = nilfs_btree_node_get_key(btree, node, index); + if (ptrp != NULL) + *ptrp = ptr; + + return 0; +} + +static int nilfs_btree_lookup(const struct nilfs_bmap *bmap, + __u64 key, int level, __u64 *ptrp) +{ + struct nilfs_btree *btree; + struct nilfs_btree_path *path; + __u64 ptr; + int ret; + + btree = (struct nilfs_btree *)bmap; + path = nilfs_btree_alloc_path(btree); + if (path == NULL) + return -ENOMEM; + nilfs_btree_init_path(btree, path); + + ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); + + if (ptrp != NULL) + *ptrp = ptr; + + nilfs_btree_clear_path(btree, path); + nilfs_btree_free_path(btree, path); + + return ret; +} + +static void nilfs_btree_promote_key(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, __u64 key) +{ + if (level < nilfs_btree_height(btree) - 1) { + do { + lock_buffer(path[level].bp_bh); + nilfs_btree_node_set_key( + btree, + nilfs_btree_get_nonroot_node( + btree, path, level), + path[level].bp_index, key); + if (!buffer_dirty(path[level].bp_bh)) + nilfs_btnode_mark_dirty(path[level].bp_bh); + unlock_buffer(path[level].bp_bh); + } while ((path[level].bp_index == 0) && + (++level < nilfs_btree_height(btree) - 1)); + } + + /* root */ + if (level == nilfs_btree_height(btree) - 1) { + nilfs_btree_node_set_key(btree, + nilfs_btree_get_root(btree), + path[level].bp_index, key); + } +} + +static void nilfs_btree_do_insert(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, __u64 *keyp, __u64 *ptrp) +{ + struct nilfs_btree_node *node; + + if (level < nilfs_btree_height(btree) - 1) { + lock_buffer(path[level].bp_bh); + node = nilfs_btree_get_nonroot_node(btree, path, level); + nilfs_btree_node_insert(btree, node, *keyp, *ptrp, + path[level].bp_index); + if (!buffer_dirty(path[level].bp_bh)) + nilfs_btnode_mark_dirty(path[level].bp_bh); + unlock_buffer(path[level].bp_bh); + + if (path[level].bp_index == 0) + nilfs_btree_promote_key(btree, path, level + 1, + nilfs_btree_node_get_key( + btree, node, 0)); + } else { + node = nilfs_btree_get_root(btree); + nilfs_btree_node_insert(btree, node, *keyp, *ptrp, + path[level].bp_index); + } +} + +static void nilfs_btree_carry_left(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, __u64 *keyp, __u64 *ptrp) +{ + struct nilfs_btree_node *node, *left; + int nchildren, lnchildren, n, move; + + lock_buffer(path[level].bp_bh); + lock_buffer(path[level].bp_sib_bh); + + node = nilfs_btree_get_nonroot_node(btree, path, level); + left = nilfs_btree_get_sib_node(btree, path, level); + nchildren = nilfs_btree_node_get_nchildren(btree, node); + lnchildren = nilfs_btree_node_get_nchildren(btree, left); + move = 0; + + n = (nchildren + lnchildren + 1) / 2 - lnchildren; + if (n > path[level].bp_index) { + /* move insert point */ + n--; + move = 1; + } + + nilfs_btree_node_move_left(btree, left, node, n); + + if (!buffer_dirty(path[level].bp_bh)) + nilfs_btnode_mark_dirty(path[level].bp_bh); + if (!buffer_dirty(path[level].bp_sib_bh)) + nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + + unlock_buffer(path[level].bp_bh); + unlock_buffer(path[level].bp_sib_bh); + + nilfs_btree_promote_key(btree, path, level + 1, + nilfs_btree_node_get_key(btree, node, 0)); + + if (move) { + nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); + path[level].bp_bh = path[level].bp_sib_bh; + path[level].bp_sib_bh = NULL; + path[level].bp_index += lnchildren; + path[level + 1].bp_index--; + } else { + nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + path[level].bp_sib_bh = NULL; + path[level].bp_index -= n; + } + + nilfs_btree_do_insert(btree, path, level, keyp, ptrp); +} + +static void nilfs_btree_carry_right(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, __u64 *keyp, __u64 *ptrp) +{ + struct nilfs_btree_node *node, *right; + int nchildren, rnchildren, n, move; + + lock_buffer(path[level].bp_bh); + lock_buffer(path[level].bp_sib_bh); + + node = nilfs_btree_get_nonroot_node(btree, path, level); + right = nilfs_btree_get_sib_node(btree, path, level); + nchildren = nilfs_btree_node_get_nchildren(btree, node); + rnchildren = nilfs_btree_node_get_nchildren(btree, right); + move = 0; + + n = (nchildren + rnchildren + 1) / 2 - rnchildren; + if (n > nchildren - path[level].bp_index) { + /* move insert point */ + n--; + move = 1; + } + + nilfs_btree_node_move_right(btree, node, right, n); + + if (!buffer_dirty(path[level].bp_bh)) + nilfs_btnode_mark_dirty(path[level].bp_bh); + if (!buffer_dirty(path[level].bp_sib_bh)) + nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + + unlock_buffer(path[level].bp_bh); + unlock_buffer(path[level].bp_sib_bh); + + path[level + 1].bp_index++; + nilfs_btree_promote_key(btree, path, level + 1, + nilfs_btree_node_get_key(btree, right, 0)); + path[level + 1].bp_index--; + + if (move) { + nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); + path[level].bp_bh = path[level].bp_sib_bh; + path[level].bp_sib_bh = NULL; + path[level].bp_index -= + nilfs_btree_node_get_nchildren(btree, node); + path[level + 1].bp_index++; + } else { + nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + path[level].bp_sib_bh = NULL; + } + + nilfs_btree_do_insert(btree, path, level, keyp, ptrp); +} + +static void nilfs_btree_split(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, __u64 *keyp, __u64 *ptrp) +{ + struct nilfs_btree_node *node, *right; + __u64 newkey; + __u64 newptr; + int nchildren, n, move; + + lock_buffer(path[level].bp_bh); + lock_buffer(path[level].bp_sib_bh); + + node = nilfs_btree_get_nonroot_node(btree, path, level); + right = nilfs_btree_get_sib_node(btree, path, level); + nchildren = nilfs_btree_node_get_nchildren(btree, node); + move = 0; + + n = (nchildren + 1) / 2; + if (n > nchildren - path[level].bp_index) { + n--; + move = 1; + } + + nilfs_btree_node_move_right(btree, node, right, n); + + if (!buffer_dirty(path[level].bp_bh)) + nilfs_btnode_mark_dirty(path[level].bp_bh); + if (!buffer_dirty(path[level].bp_sib_bh)) + nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + + unlock_buffer(path[level].bp_bh); + unlock_buffer(path[level].bp_sib_bh); + + newkey = nilfs_btree_node_get_key(btree, right, 0); + newptr = path[level].bp_newreq.bpr_ptr; + + if (move) { + path[level].bp_index -= + nilfs_btree_node_get_nchildren(btree, node); + nilfs_btree_node_insert(btree, right, *keyp, *ptrp, + path[level].bp_index); + + *keyp = nilfs_btree_node_get_key(btree, right, 0); + *ptrp = path[level].bp_newreq.bpr_ptr; + + nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); + path[level].bp_bh = path[level].bp_sib_bh; + path[level].bp_sib_bh = NULL; + } else { + nilfs_btree_do_insert(btree, path, level, keyp, ptrp); + + *keyp = nilfs_btree_node_get_key(btree, right, 0); + *ptrp = path[level].bp_newreq.bpr_ptr; + + nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + path[level].bp_sib_bh = NULL; + } + + path[level + 1].bp_index++; +} + +static void nilfs_btree_grow(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, __u64 *keyp, __u64 *ptrp) +{ + struct nilfs_btree_node *root, *child; + int n; + + lock_buffer(path[level].bp_sib_bh); + + root = nilfs_btree_get_root(btree); + child = nilfs_btree_get_sib_node(btree, path, level); + + n = nilfs_btree_node_get_nchildren(btree, root); + + nilfs_btree_node_move_right(btree, root, child, n); + nilfs_btree_node_set_level(btree, root, level + 1); + + if (!buffer_dirty(path[level].bp_sib_bh)) + nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + + unlock_buffer(path[level].bp_sib_bh); + + path[level].bp_bh = path[level].bp_sib_bh; + path[level].bp_sib_bh = NULL; + + nilfs_btree_do_insert(btree, path, level, keyp, ptrp); + + *keyp = nilfs_btree_node_get_key(btree, child, 0); + *ptrp = path[level].bp_newreq.bpr_ptr; +} + +static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree, + const struct nilfs_btree_path *path) +{ + struct nilfs_btree_node *node; + int level; + + if (path == NULL) + return NILFS_BMAP_INVALID_PTR; + + /* left sibling */ + level = NILFS_BTREE_LEVEL_NODE_MIN; + if (path[level].bp_index > 0) { + node = nilfs_btree_get_node(btree, path, level); + return nilfs_btree_node_get_ptr(btree, node, + path[level].bp_index - 1); + } + + /* parent */ + level = NILFS_BTREE_LEVEL_NODE_MIN + 1; + if (level <= nilfs_btree_height(btree) - 1) { + node = nilfs_btree_get_node(btree, path, level); + return nilfs_btree_node_get_ptr(btree, node, + path[level].bp_index); + } + + return NILFS_BMAP_INVALID_PTR; +} + +static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree, + const struct nilfs_btree_path *path, + __u64 key) +{ + __u64 ptr; + + ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key); + if (ptr != NILFS_BMAP_INVALID_PTR) + /* sequential access */ + return ptr; + else { + ptr = nilfs_btree_find_near(btree, path); + if (ptr != NILFS_BMAP_INVALID_PTR) + /* near */ + return ptr; + } + /* block group */ + return nilfs_bmap_find_target_in_group(&btree->bt_bmap); +} + +static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key, + __u64 ptr) +{ + btree->bt_bmap.b_last_allocated_key = key; + btree->bt_bmap.b_last_allocated_ptr = ptr; +} + +static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int *levelp, __u64 key, __u64 ptr, + struct nilfs_bmap_stats *stats) +{ + struct buffer_head *bh; + struct nilfs_btree_node *node, *parent, *sib; + __u64 sibptr; + int pindex, level, ret; + + stats->bs_nblocks = 0; + level = NILFS_BTREE_LEVEL_DATA; + + /* allocate a new ptr for data block */ + if (btree->bt_ops->btop_find_target != NULL) + path[level].bp_newreq.bpr_ptr = + (*btree->bt_ops->btop_find_target)(btree, path, key); + + ret = (*btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr)( + &btree->bt_bmap, &path[level].bp_newreq); + if (ret < 0) + goto err_out_data; + + for (level = NILFS_BTREE_LEVEL_NODE_MIN; + level < nilfs_btree_height(btree) - 1; + level++) { + node = nilfs_btree_get_nonroot_node(btree, path, level); + if (nilfs_btree_node_get_nchildren(btree, node) < + nilfs_btree_node_nchildren_max(btree, node)) { + path[level].bp_op = nilfs_btree_do_insert; + stats->bs_nblocks++; + goto out; + } + + parent = nilfs_btree_get_node(btree, path, level + 1); + pindex = path[level + 1].bp_index; + + /* left sibling */ + if (pindex > 0) { + sibptr = nilfs_btree_node_get_ptr(btree, parent, + pindex - 1); + ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, + &bh); + if (ret < 0) + goto err_out_child_node; + sib = (struct nilfs_btree_node *)bh->b_data; + if (nilfs_btree_node_get_nchildren(btree, sib) < + nilfs_btree_node_nchildren_max(btree, sib)) { + path[level].bp_sib_bh = bh; + path[level].bp_op = nilfs_btree_carry_left; + stats->bs_nblocks++; + goto out; + } else + nilfs_bmap_put_block(&btree->bt_bmap, bh); + } + + /* right sibling */ + if (pindex < + nilfs_btree_node_get_nchildren(btree, parent) - 1) { + sibptr = nilfs_btree_node_get_ptr(btree, parent, + pindex + 1); + ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, + &bh); + if (ret < 0) + goto err_out_child_node; + sib = (struct nilfs_btree_node *)bh->b_data; + if (nilfs_btree_node_get_nchildren(btree, sib) < + nilfs_btree_node_nchildren_max(btree, sib)) { + path[level].bp_sib_bh = bh; + path[level].bp_op = nilfs_btree_carry_right; + stats->bs_nblocks++; + goto out; + } else + nilfs_bmap_put_block(&btree->bt_bmap, bh); + } + + /* split */ + path[level].bp_newreq.bpr_ptr = + path[level - 1].bp_newreq.bpr_ptr + 1; + ret = (*btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr)( + &btree->bt_bmap, &path[level].bp_newreq); + if (ret < 0) + goto err_out_child_node; + ret = nilfs_bmap_get_new_block(&btree->bt_bmap, + path[level].bp_newreq.bpr_ptr, + &bh); + if (ret < 0) + goto err_out_curr_node; + + stats->bs_nblocks++; + + lock_buffer(bh); + nilfs_btree_node_init(btree, + (struct nilfs_btree_node *)bh->b_data, + 0, level, 0, NULL, NULL); + unlock_buffer(bh); + path[level].bp_sib_bh = bh; + path[level].bp_op = nilfs_btree_split; + } + + /* root */ + node = nilfs_btree_get_root(btree); + if (nilfs_btree_node_get_nchildren(btree, node) < + nilfs_btree_node_nchildren_max(btree, node)) { + path[level].bp_op = nilfs_btree_do_insert; + stats->bs_nblocks++; + goto out; + } + + /* grow */ + path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; + ret = (*btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr)( + &btree->bt_bmap, &path[level].bp_newreq); + if (ret < 0) + goto err_out_child_node; + ret = nilfs_bmap_get_new_block(&btree->bt_bmap, + path[level].bp_newreq.bpr_ptr, &bh); + if (ret < 0) + goto err_out_curr_node; + + lock_buffer(bh); + nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data, + 0, level, 0, NULL, NULL); + unlock_buffer(bh); + path[level].bp_sib_bh = bh; + path[level].bp_op = nilfs_btree_grow; + + level++; + path[level].bp_op = nilfs_btree_do_insert; + + /* a newly-created node block and a data block are added */ + stats->bs_nblocks += 2; + + /* success */ + out: + *levelp = level; + return ret; + + /* error */ + err_out_curr_node: + (*btree->bt_bmap.b_pops->bpop_abort_alloc_ptr)(&btree->bt_bmap, + &path[level].bp_newreq); + err_out_child_node: + for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { + nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); + (*btree->bt_bmap.b_pops->bpop_abort_alloc_ptr)( + &btree->bt_bmap, &path[level].bp_newreq); + + } + + (*btree->bt_bmap.b_pops->bpop_abort_alloc_ptr)(&btree->bt_bmap, + &path[level].bp_newreq); + err_out_data: + *levelp = level; + stats->bs_nblocks = 0; + return ret; +} + +static void nilfs_btree_commit_insert(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int maxlevel, __u64 key, __u64 ptr) +{ + int level; + + set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); + ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; + if (btree->bt_ops->btop_set_target != NULL) + (*btree->bt_ops->btop_set_target)(btree, key, ptr); + + for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { + if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) { + (*btree->bt_bmap.b_pops->bpop_commit_alloc_ptr)( + &btree->bt_bmap, &path[level - 1].bp_newreq); + } + (*path[level].bp_op)(btree, path, level, &key, &ptr); + } + + if (!nilfs_bmap_dirty(&btree->bt_bmap)) + nilfs_bmap_set_dirty(&btree->bt_bmap); +} + +static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) +{ + struct nilfs_btree *btree; + struct nilfs_btree_path *path; + struct nilfs_bmap_stats stats; + int level, ret; + + btree = (struct nilfs_btree *)bmap; + path = nilfs_btree_alloc_path(btree); + if (path == NULL) + return -ENOMEM; + nilfs_btree_init_path(btree, path); + + ret = nilfs_btree_do_lookup(btree, path, key, NULL, + NILFS_BTREE_LEVEL_NODE_MIN); + if (ret != -ENOENT) { + if (ret == 0) + ret = -EEXIST; + goto out; + } + + ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); + if (ret < 0) + goto out; + nilfs_btree_commit_insert(btree, path, level, key, ptr); + nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); + + out: + nilfs_btree_clear_path(btree, path); + nilfs_btree_free_path(btree, path); + return ret; +} + +static void nilfs_btree_do_delete(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, __u64 *keyp, __u64 *ptrp) +{ + struct nilfs_btree_node *node; + + if (level < nilfs_btree_height(btree) - 1) { + lock_buffer(path[level].bp_bh); + node = nilfs_btree_get_nonroot_node(btree, path, level); + nilfs_btree_node_delete(btree, node, keyp, ptrp, + path[level].bp_index); + if (!buffer_dirty(path[level].bp_bh)) + nilfs_btnode_mark_dirty(path[level].bp_bh); + unlock_buffer(path[level].bp_bh); + if (path[level].bp_index == 0) + nilfs_btree_promote_key(btree, path, level + 1, + nilfs_btree_node_get_key(btree, node, 0)); + } else { + node = nilfs_btree_get_root(btree); + nilfs_btree_node_delete(btree, node, keyp, ptrp, + path[level].bp_index); + } +} + +static void nilfs_btree_borrow_left(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, __u64 *keyp, __u64 *ptrp) +{ + struct nilfs_btree_node *node, *left; + int nchildren, lnchildren, n; + + nilfs_btree_do_delete(btree, path, level, keyp, ptrp); + + lock_buffer(path[level].bp_bh); + lock_buffer(path[level].bp_sib_bh); + + node = nilfs_btree_get_nonroot_node(btree, path, level); + left = nilfs_btree_get_sib_node(btree, path, level); + nchildren = nilfs_btree_node_get_nchildren(btree, node); + lnchildren = nilfs_btree_node_get_nchildren(btree, left); + + n = (nchildren + lnchildren) / 2 - nchildren; + + nilfs_btree_node_move_right(btree, left, node, n); + + if (!buffer_dirty(path[level].bp_bh)) + nilfs_btnode_mark_dirty(path[level].bp_bh); + if (!buffer_dirty(path[level].bp_sib_bh)) + nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + + unlock_buffer(path[level].bp_bh); + unlock_buffer(path[level].bp_sib_bh); + + nilfs_btree_promote_key(btree, path, level + 1, + nilfs_btree_node_get_key(btree, node, 0)); + + nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + path[level].bp_sib_bh = NULL; + path[level].bp_index += n; +} + +static void nilfs_btree_borrow_right(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, __u64 *keyp, __u64 *ptrp) +{ + struct nilfs_btree_node *node, *right; + int nchildren, rnchildren, n; + + nilfs_btree_do_delete(btree, path, level, keyp, ptrp); + + lock_buffer(path[level].bp_bh); + lock_buffer(path[level].bp_sib_bh); + + node = nilfs_btree_get_nonroot_node(btree, path, level); + right = nilfs_btree_get_sib_node(btree, path, level); + nchildren = nilfs_btree_node_get_nchildren(btree, node); + rnchildren = nilfs_btree_node_get_nchildren(btree, right); + + n = (nchildren + rnchildren) / 2 - nchildren; + + nilfs_btree_node_move_left(btree, node, right, n); + + if (!buffer_dirty(path[level].bp_bh)) + nilfs_btnode_mark_dirty(path[level].bp_bh); + if (!buffer_dirty(path[level].bp_sib_bh)) + nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + + unlock_buffer(path[level].bp_bh); + unlock_buffer(path[level].bp_sib_bh); + + path[level + 1].bp_index++; + nilfs_btree_promote_key(btree, path, level + 1, + nilfs_btree_node_get_key(btree, right, 0)); + path[level + 1].bp_index--; + + nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + path[level].bp_sib_bh = NULL; +} + +static void nilfs_btree_concat_left(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, __u64 *keyp, __u64 *ptrp) +{ + struct nilfs_btree_node *node, *left; + int n; + + nilfs_btree_do_delete(btree, path, level, keyp, ptrp); + + lock_buffer(path[level].bp_bh); + lock_buffer(path[level].bp_sib_bh); + + node = nilfs_btree_get_nonroot_node(btree, path, level); + left = nilfs_btree_get_sib_node(btree, path, level); + + n = nilfs_btree_node_get_nchildren(btree, node); + + nilfs_btree_node_move_left(btree, left, node, n); + + if (!buffer_dirty(path[level].bp_sib_bh)) + nilfs_btnode_mark_dirty(path[level].bp_sib_bh); + + unlock_buffer(path[level].bp_bh); + unlock_buffer(path[level].bp_sib_bh); + + nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); + path[level].bp_bh = path[level].bp_sib_bh; + path[level].bp_sib_bh = NULL; + path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left); +} + +static void nilfs_btree_concat_right(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, __u64 *keyp, __u64 *ptrp) +{ + struct nilfs_btree_node *node, *right; + int n; + + nilfs_btree_do_delete(btree, path, level, keyp, ptrp); + + lock_buffer(path[level].bp_bh); + lock_buffer(path[level].bp_sib_bh); + + node = nilfs_btree_get_nonroot_node(btree, path, level); + right = nilfs_btree_get_sib_node(btree, path, level); + + n = nilfs_btree_node_get_nchildren(btree, right); + + nilfs_btree_node_move_left(btree, node, right, n); + + if (!buffer_dirty(path[level].bp_bh)) + nilfs_btnode_mark_dirty(path[level].bp_bh); + + unlock_buffer(path[level].bp_bh); + unlock_buffer(path[level].bp_sib_bh); + + nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); + path[level].bp_sib_bh = NULL; + path[level + 1].bp_index++; +} + +static void nilfs_btree_shrink(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, __u64 *keyp, __u64 *ptrp) +{ + struct nilfs_btree_node *root, *child; + int n; + + nilfs_btree_do_delete(btree, path, level, keyp, ptrp); + + lock_buffer(path[level].bp_bh); + root = nilfs_btree_get_root(btree); + child = nilfs_btree_get_nonroot_node(btree, path, level); + + nilfs_btree_node_delete(btree, root, NULL, NULL, 0); + nilfs_btree_node_set_level(btree, root, level); + n = nilfs_btree_node_get_nchildren(btree, child); + nilfs_btree_node_move_left(btree, root, child, n); + unlock_buffer(path[level].bp_bh); + + nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); + path[level].bp_bh = NULL; +} + + +static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int *levelp, + struct nilfs_bmap_stats *stats) +{ + struct buffer_head *bh; + struct nilfs_btree_node *node, *parent, *sib; + __u64 sibptr; + int pindex, level, ret; + + ret = 0; + stats->bs_nblocks = 0; + for (level = NILFS_BTREE_LEVEL_NODE_MIN; + level < nilfs_btree_height(btree) - 1; + level++) { + node = nilfs_btree_get_nonroot_node(btree, path, level); + path[level].bp_oldreq.bpr_ptr = + nilfs_btree_node_get_ptr(btree, node, + path[level].bp_index); + if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { + ret = (*btree->bt_bmap.b_pops->bpop_prepare_end_ptr)( + &btree->bt_bmap, &path[level].bp_oldreq); + if (ret < 0) + goto err_out_child_node; + } + + if (nilfs_btree_node_get_nchildren(btree, node) > + nilfs_btree_node_nchildren_min(btree, node)) { + path[level].bp_op = nilfs_btree_do_delete; + stats->bs_nblocks++; + goto out; + } + + parent = nilfs_btree_get_node(btree, path, level + 1); + pindex = path[level + 1].bp_index; + + if (pindex > 0) { + /* left sibling */ + sibptr = nilfs_btree_node_get_ptr(btree, parent, + pindex - 1); + ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, + &bh); + if (ret < 0) + goto err_out_curr_node; + sib = (struct nilfs_btree_node *)bh->b_data; + if (nilfs_btree_node_get_nchildren(btree, sib) > + nilfs_btree_node_nchildren_min(btree, sib)) { + path[level].bp_sib_bh = bh; + path[level].bp_op = nilfs_btree_borrow_left; + stats->bs_nblocks++; + goto out; + } else { + path[level].bp_sib_bh = bh; + path[level].bp_op = nilfs_btree_concat_left; + stats->bs_nblocks++; + /* continue; */ + } + } else if (pindex < + nilfs_btree_node_get_nchildren(btree, parent) - 1) { + /* right sibling */ + sibptr = nilfs_btree_node_get_ptr(btree, parent, + pindex + 1); + ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, + &bh); + if (ret < 0) + goto err_out_curr_node; + sib = (struct nilfs_btree_node *)bh->b_data; + if (nilfs_btree_node_get_nchildren(btree, sib) > + nilfs_btree_node_nchildren_min(btree, sib)) { + path[level].bp_sib_bh = bh; + path[level].bp_op = nilfs_btree_borrow_right; + stats->bs_nblocks++; + goto out; + } else { + path[level].bp_sib_bh = bh; + path[level].bp_op = nilfs_btree_concat_right; + stats->bs_nblocks++; + /* continue; */ + } + } else { + /* no siblings */ + /* the only child of the root node */ + BUG_ON(level != nilfs_btree_height(btree) - 2); + if (nilfs_btree_node_get_nchildren(btree, node) - 1 <= + NILFS_BTREE_ROOT_NCHILDREN_MAX) { + path[level].bp_op = nilfs_btree_shrink; + stats->bs_nblocks += 2; + } else { + path[level].bp_op = nilfs_btree_do_delete; + stats->bs_nblocks++; + } + + goto out; + + } + } + + node = nilfs_btree_get_root(btree); + path[level].bp_oldreq.bpr_ptr = + nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); + if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { + ret = (*btree->bt_bmap.b_pops->bpop_prepare_end_ptr)( + &btree->bt_bmap, &path[level].bp_oldreq); + if (ret < 0) + goto err_out_child_node; + } + /* child of the root node is deleted */ + path[level].bp_op = nilfs_btree_do_delete; + stats->bs_nblocks++; + + /* success */ + out: + *levelp = level; + return ret; + + /* error */ + err_out_curr_node: + if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) + (*btree->bt_bmap.b_pops->bpop_abort_end_ptr)( + &btree->bt_bmap, &path[level].bp_oldreq); + err_out_child_node: + for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { + nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); + if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) + (*btree->bt_bmap.b_pops->bpop_abort_end_ptr)( + &btree->bt_bmap, &path[level].bp_oldreq); + } + *levelp = level; + stats->bs_nblocks = 0; + return ret; +} + +static void nilfs_btree_commit_delete(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int maxlevel) +{ + int level; + + for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { + if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL) + (*btree->bt_bmap.b_pops->bpop_commit_end_ptr)( + &btree->bt_bmap, &path[level].bp_oldreq); + (*path[level].bp_op)(btree, path, level, NULL, NULL); + } + + if (!nilfs_bmap_dirty(&btree->bt_bmap)) + nilfs_bmap_set_dirty(&btree->bt_bmap); +} + +static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key) + +{ + struct nilfs_btree *btree; + struct nilfs_btree_path *path; + struct nilfs_bmap_stats stats; + int level, ret; + + btree = (struct nilfs_btree *)bmap; + path = nilfs_btree_alloc_path(btree); + if (path == NULL) + return -ENOMEM; + nilfs_btree_init_path(btree, path); + ret = nilfs_btree_do_lookup(btree, path, key, NULL, + NILFS_BTREE_LEVEL_NODE_MIN); + if (ret < 0) + goto out; + + ret = nilfs_btree_prepare_delete(btree, path, &level, &stats); + if (ret < 0) + goto out; + nilfs_btree_commit_delete(btree, path, level); + nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); + +out: + nilfs_btree_clear_path(btree, path); + nilfs_btree_free_path(btree, path); + return ret; +} + +static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) +{ + struct nilfs_btree *btree; + struct nilfs_btree_path *path; + int ret; + + btree = (struct nilfs_btree *)bmap; + path = nilfs_btree_alloc_path(btree); + if (path == NULL) + return -ENOMEM; + nilfs_btree_init_path(btree, path); + + ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); + + nilfs_btree_clear_path(btree, path); + nilfs_btree_free_path(btree, path); + + return ret; +} + +static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) +{ + struct buffer_head *bh; + struct nilfs_btree *btree; + struct nilfs_btree_node *root, *node; + __u64 maxkey, nextmaxkey; + __u64 ptr; + int nchildren, ret; + + btree = (struct nilfs_btree *)bmap; + root = nilfs_btree_get_root(btree); + switch (nilfs_btree_height(btree)) { + case 2: + bh = NULL; + node = root; + break; + case 3: + nchildren = nilfs_btree_node_get_nchildren(btree, root); + if (nchildren > 1) + return 0; + ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); + ret = nilfs_bmap_get_block(bmap, ptr, &bh); + if (ret < 0) + return ret; + node = (struct nilfs_btree_node *)bh->b_data; + break; + default: + return 0; + } + + nchildren = nilfs_btree_node_get_nchildren(btree, node); + maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1); + nextmaxkey = (nchildren > 1) ? + nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0; + if (bh != NULL) + nilfs_bmap_put_block(bmap, bh); + + return (maxkey == key) && (nextmaxkey < bmap->b_low); +} + +static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, + __u64 *keys, __u64 *ptrs, int nitems) +{ + struct buffer_head *bh; + struct nilfs_btree *btree; + struct nilfs_btree_node *node, *root; + __le64 *dkeys; + __le64 *dptrs; + __u64 ptr; + int nchildren, i, ret; + + btree = (struct nilfs_btree *)bmap; + root = nilfs_btree_get_root(btree); + switch (nilfs_btree_height(btree)) { + case 2: + bh = NULL; + node = root; + break; + case 3: + nchildren = nilfs_btree_node_get_nchildren(btree, root); + BUG_ON(nchildren > 1); + ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); + ret = nilfs_bmap_get_block(bmap, ptr, &bh); + if (ret < 0) + return ret; + node = (struct nilfs_btree_node *)bh->b_data; + break; + default: + node = NULL; + BUG(); + } + + nchildren = nilfs_btree_node_get_nchildren(btree, node); + if (nchildren < nitems) + nitems = nchildren; + dkeys = nilfs_btree_node_dkeys(btree, node); + dptrs = nilfs_btree_node_dptrs(btree, node); + for (i = 0; i < nitems; i++) { + keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]); + ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]); + } + + if (bh != NULL) + nilfs_bmap_put_block(bmap, bh); + + return nitems; +} + +static int +nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, + union nilfs_bmap_ptr_req *dreq, + union nilfs_bmap_ptr_req *nreq, + struct buffer_head **bhp, + struct nilfs_bmap_stats *stats) +{ + struct buffer_head *bh; + struct nilfs_btree *btree; + int ret; + + btree = (struct nilfs_btree *)bmap; + stats->bs_nblocks = 0; + + /* for data */ + /* cannot find near ptr */ + if (btree->bt_ops->btop_find_target != NULL) + dreq->bpr_ptr + = (*btree->bt_ops->btop_find_target)(btree, NULL, key); + ret = (*bmap->b_pops->bpop_prepare_alloc_ptr)(bmap, dreq); + if (ret < 0) + return ret; + + *bhp = NULL; + stats->bs_nblocks++; + if (nreq != NULL) { + nreq->bpr_ptr = dreq->bpr_ptr + 1; + ret = (*bmap->b_pops->bpop_prepare_alloc_ptr)(bmap, nreq); + if (ret < 0) + goto err_out_dreq; + + ret = nilfs_bmap_get_new_block(bmap, nreq->bpr_ptr, &bh); + if (ret < 0) + goto err_out_nreq; + + *bhp = bh; + stats->bs_nblocks++; + } + + /* success */ + return 0; + + /* error */ + err_out_nreq: + (*bmap->b_pops->bpop_abort_alloc_ptr)(bmap, nreq); + err_out_dreq: + (*bmap->b_pops->bpop_abort_alloc_ptr)(bmap, dreq); + stats->bs_nblocks = 0; + return ret; + +} + +static void +nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, + __u64 key, __u64 ptr, + const __u64 *keys, const __u64 *ptrs, + int n, __u64 low, __u64 high, + union nilfs_bmap_ptr_req *dreq, + union nilfs_bmap_ptr_req *nreq, + struct buffer_head *bh) +{ + struct nilfs_btree *btree; + struct nilfs_btree_node *node; + __u64 tmpptr; + + /* free resources */ + if (bmap->b_ops->bop_clear != NULL) + (*bmap->b_ops->bop_clear)(bmap); + + /* ptr must be a pointer to a buffer head. */ + set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); + + /* convert and insert */ + btree = (struct nilfs_btree *)bmap; + nilfs_btree_init(bmap, low, high); + if (nreq != NULL) { + if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) { + (*bmap->b_pops->bpop_commit_alloc_ptr)(bmap, dreq); + (*bmap->b_pops->bpop_commit_alloc_ptr)(bmap, nreq); + } + + /* create child node at level 1 */ + lock_buffer(bh); + node = (struct nilfs_btree_node *)bh->b_data; + nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs); + nilfs_btree_node_insert(btree, node, + key, dreq->bpr_ptr, n); + if (!buffer_dirty(bh)) + nilfs_btnode_mark_dirty(bh); + if (!nilfs_bmap_dirty(bmap)) + nilfs_bmap_set_dirty(bmap); + + unlock_buffer(bh); + nilfs_bmap_put_block(bmap, bh); + + /* create root node at level 2 */ + node = nilfs_btree_get_root(btree); + tmpptr = nreq->bpr_ptr; + nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, + 2, 1, &keys[0], &tmpptr); + } else { + if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) + (*bmap->b_pops->bpop_commit_alloc_ptr)(bmap, dreq); + + /* create root node at level 1 */ + node = nilfs_btree_get_root(btree); + nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, + 1, n, keys, ptrs); + nilfs_btree_node_insert(btree, node, + key, dreq->bpr_ptr, n); + if (!nilfs_bmap_dirty(bmap)) + nilfs_bmap_set_dirty(bmap); + } + + if (btree->bt_ops->btop_set_target != NULL) + (*btree->bt_ops->btop_set_target)(btree, key, dreq->bpr_ptr); +} + +/** + * nilfs_btree_convert_and_insert - + * @bmap: + * @key: + * @ptr: + * @keys: + * @ptrs: + * @n: + * @low: + * @high: + */ +int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, + __u64 key, __u64 ptr, + const __u64 *keys, const __u64 *ptrs, + int n, __u64 low, __u64 high) +{ + struct buffer_head *bh; + union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; + struct nilfs_bmap_stats stats; + int ret; + + if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { + di = &dreq; + ni = NULL; + } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( + 1 << bmap->b_inode->i_blkbits)) { + di = &dreq; + ni = &nreq; + } else { + di = NULL; + ni = NULL; + BUG(); + } + + ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh, + &stats); + if (ret < 0) + return ret; + nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n, + low, high, di, ni, bh); + nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); + return 0; +} + +static int nilfs_btree_propagate_p(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, + struct buffer_head *bh) +{ + while ((++level < nilfs_btree_height(btree) - 1) && + !buffer_dirty(path[level].bp_bh)) + nilfs_btnode_mark_dirty(path[level].bp_bh); + + return 0; +} + +static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level) +{ + struct nilfs_btree_node *parent; + int ret; + + parent = nilfs_btree_get_node(btree, path, level + 1); + path[level].bp_oldreq.bpr_ptr = + nilfs_btree_node_get_ptr(btree, parent, + path[level + 1].bp_index); + path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; + ret = nilfs_bmap_prepare_update(&btree->bt_bmap, + &path[level].bp_oldreq, + &path[level].bp_newreq); + if (ret < 0) + return ret; + + if (buffer_nilfs_node(path[level].bp_bh)) { + path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; + path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; + path[level].bp_ctxt.bh = path[level].bp_bh; + ret = nilfs_btnode_prepare_change_key( + &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &path[level].bp_ctxt); + if (ret < 0) { + nilfs_bmap_abort_update(&btree->bt_bmap, + &path[level].bp_oldreq, + &path[level].bp_newreq); + return ret; + } + } + + return 0; +} + +static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level) +{ + struct nilfs_btree_node *parent; + + nilfs_bmap_commit_update(&btree->bt_bmap, + &path[level].bp_oldreq, + &path[level].bp_newreq); + + if (buffer_nilfs_node(path[level].bp_bh)) { + nilfs_btnode_commit_change_key( + &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &path[level].bp_ctxt); + path[level].bp_bh = path[level].bp_ctxt.bh; + } + set_buffer_nilfs_volatile(path[level].bp_bh); + + parent = nilfs_btree_get_node(btree, path, level + 1); + nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index, + path[level].bp_newreq.bpr_ptr); +} + +static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level) +{ + nilfs_bmap_abort_update(&btree->bt_bmap, + &path[level].bp_oldreq, + &path[level].bp_newreq); + if (buffer_nilfs_node(path[level].bp_bh)) + nilfs_btnode_abort_change_key( + &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &path[level].bp_ctxt); +} + +static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int minlevel, + int *maxlevelp) +{ + int level, ret; + + level = minlevel; + if (!buffer_nilfs_volatile(path[level].bp_bh)) { + ret = nilfs_btree_prepare_update_v(btree, path, level); + if (ret < 0) + return ret; + } + while ((++level < nilfs_btree_height(btree) - 1) && + !buffer_dirty(path[level].bp_bh)) { + + BUG_ON(buffer_nilfs_volatile(path[level].bp_bh)); + ret = nilfs_btree_prepare_update_v(btree, path, level); + if (ret < 0) + goto out; + } + + /* success */ + BUG_ON(maxlevelp == NULL); + *maxlevelp = level - 1; + return 0; + + /* error */ + out: + while (--level > minlevel) + nilfs_btree_abort_update_v(btree, path, level); + if (!buffer_nilfs_volatile(path[level].bp_bh)) + nilfs_btree_abort_update_v(btree, path, level); + return ret; +} + +static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int minlevel, + int maxlevel, + struct buffer_head *bh) +{ + int level; + + if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) + nilfs_btree_commit_update_v(btree, path, minlevel); + + for (level = minlevel + 1; level <= maxlevel; level++) + nilfs_btree_commit_update_v(btree, path, level); +} + +static int nilfs_btree_propagate_v(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, + struct buffer_head *bh) +{ + int maxlevel, ret; + struct nilfs_btree_node *parent; + __u64 ptr; + + get_bh(bh); + path[level].bp_bh = bh; + ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel); + if (ret < 0) + goto out; + + if (buffer_nilfs_volatile(path[level].bp_bh)) { + parent = nilfs_btree_get_node(btree, path, level + 1); + ptr = nilfs_btree_node_get_ptr(btree, parent, + path[level + 1].bp_index); + ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr); + if (ret < 0) + goto out; + } + + nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh); + + out: + brelse(path[level].bp_bh); + path[level].bp_bh = NULL; + return ret; +} + +static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, + struct buffer_head *bh) +{ + struct nilfs_btree *btree; + struct nilfs_btree_path *path; + struct nilfs_btree_node *node; + __u64 key; + int level, ret; + + BUG_ON(!buffer_dirty(bh)); + + btree = (struct nilfs_btree *)bmap; + path = nilfs_btree_alloc_path(btree); + if (path == NULL) + return -ENOMEM; + nilfs_btree_init_path(btree, path); + + if (buffer_nilfs_node(bh)) { + node = (struct nilfs_btree_node *)bh->b_data; + key = nilfs_btree_node_get_key(btree, node, 0); + level = nilfs_btree_node_get_level(btree, node); + } else { + key = nilfs_bmap_data_get_key(bmap, bh); + level = NILFS_BTREE_LEVEL_DATA; + } + + ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); + if (ret < 0) { + /* BUG_ON(ret == -ENOENT); */ + if (ret == -ENOENT) { + printk(KERN_CRIT "%s: key = %llu, level == %d\n", + __func__, (unsigned long long)key, level); + BUG(); + } + goto out; + } + + ret = (*btree->bt_ops->btop_propagate)(btree, path, level, bh); + + out: + nilfs_btree_clear_path(btree, path); + nilfs_btree_free_path(btree, path); + + return ret; +} + +static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap, + struct buffer_head *bh) +{ + return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr); +} + +static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, + struct list_head *lists, + struct buffer_head *bh) +{ + struct list_head *head; + struct buffer_head *cbh; + struct nilfs_btree_node *node, *cnode; + __u64 key, ckey; + int level; + + get_bh(bh); + node = (struct nilfs_btree_node *)bh->b_data; + key = nilfs_btree_node_get_key(btree, node, 0); + level = nilfs_btree_node_get_level(btree, node); + list_for_each(head, &lists[level]) { + cbh = list_entry(head, struct buffer_head, b_assoc_buffers); + cnode = (struct nilfs_btree_node *)cbh->b_data; + ckey = nilfs_btree_node_get_key(btree, cnode, 0); + if (key < ckey) + break; + } + list_add_tail(&bh->b_assoc_buffers, head); +} + +static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap, + struct list_head *listp) +{ + struct nilfs_btree *btree = (struct nilfs_btree *)bmap; + struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache; + struct list_head lists[NILFS_BTREE_LEVEL_MAX]; + struct pagevec pvec; + struct buffer_head *bh, *head; + pgoff_t index = 0; + int level, i; + + for (level = NILFS_BTREE_LEVEL_NODE_MIN; + level < NILFS_BTREE_LEVEL_MAX; + level++) + INIT_LIST_HEAD(&lists[level]); + + pagevec_init(&pvec, 0); + + while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY, + PAGEVEC_SIZE)) { + for (i = 0; i < pagevec_count(&pvec); i++) { + bh = head = page_buffers(pvec.pages[i]); + do { + if (buffer_dirty(bh)) + nilfs_btree_add_dirty_buffer(btree, + lists, bh); + } while ((bh = bh->b_this_page) != head); + } + pagevec_release(&pvec); + cond_resched(); + } + + for (level = NILFS_BTREE_LEVEL_NODE_MIN; + level < NILFS_BTREE_LEVEL_MAX; + level++) + list_splice(&lists[level], listp->prev); +} + +static int nilfs_btree_assign_p(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, + struct buffer_head **bh, + sector_t blocknr, + union nilfs_binfo *binfo) +{ + struct nilfs_btree_node *parent; + __u64 key; + __u64 ptr; + int ret; + + parent = nilfs_btree_get_node(btree, path, level + 1); + ptr = nilfs_btree_node_get_ptr(btree, parent, + path[level + 1].bp_index); + if (buffer_nilfs_node(*bh)) { + path[level].bp_ctxt.oldkey = ptr; + path[level].bp_ctxt.newkey = blocknr; + path[level].bp_ctxt.bh = *bh; + ret = nilfs_btnode_prepare_change_key( + &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &path[level].bp_ctxt); + if (ret < 0) + return ret; + nilfs_btnode_commit_change_key( + &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &path[level].bp_ctxt); + *bh = path[level].bp_ctxt.bh; + } + + nilfs_btree_node_set_ptr(btree, parent, + path[level + 1].bp_index, blocknr); + + key = nilfs_btree_node_get_key(btree, parent, + path[level + 1].bp_index); + /* on-disk format */ + binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); + binfo->bi_dat.bi_level = level; + + return 0; +} + +static int nilfs_btree_assign_v(struct nilfs_btree *btree, + struct nilfs_btree_path *path, + int level, + struct buffer_head **bh, + sector_t blocknr, + union nilfs_binfo *binfo) +{ + struct nilfs_btree_node *parent; + __u64 key; + __u64 ptr; + union nilfs_bmap_ptr_req req; + int ret; + + parent = nilfs_btree_get_node(btree, path, level + 1); + ptr = nilfs_btree_node_get_ptr(btree, parent, + path[level + 1].bp_index); + req.bpr_ptr = ptr; + ret = (*btree->bt_bmap.b_pops->bpop_prepare_start_ptr)(&btree->bt_bmap, + &req); + if (ret < 0) + return ret; + (*btree->bt_bmap.b_pops->bpop_commit_start_ptr)(&btree->bt_bmap, + &req, blocknr); + + key = nilfs_btree_node_get_key(btree, parent, + path[level + 1].bp_index); + /* on-disk format */ + binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); + binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); + + return 0; +} + +static int nilfs_btree_assign(struct nilfs_bmap *bmap, + struct buffer_head **bh, + sector_t blocknr, + union nilfs_binfo *binfo) +{ + struct nilfs_btree *btree; + struct nilfs_btree_path *path; + struct nilfs_btree_node *node; + __u64 key; + int level, ret; + + btree = (struct nilfs_btree *)bmap; + path = nilfs_btree_alloc_path(btree); + if (path == NULL) + return -ENOMEM; + nilfs_btree_init_path(btree, path); + + if (buffer_nilfs_node(*bh)) { + node = (struct nilfs_btree_node *)(*bh)->b_data; + key = nilfs_btree_node_get_key(btree, node, 0); + level = nilfs_btree_node_get_level(btree, node); + } else { + key = nilfs_bmap_data_get_key(bmap, *bh); + level = NILFS_BTREE_LEVEL_DATA; + } + + ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); + if (ret < 0) { + BUG_ON(ret == -ENOENT); + goto out; + } + + ret = (*btree->bt_ops->btop_assign)(btree, path, level, bh, + blocknr, binfo); + + out: + nilfs_btree_clear_path(btree, path); + nilfs_btree_free_path(btree, path); + + return ret; +} + +static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap, + struct buffer_head **bh, + sector_t blocknr, + union nilfs_binfo *binfo) +{ + struct nilfs_btree *btree; + struct nilfs_btree_node *node; + __u64 key; + int ret; + + btree = (struct nilfs_btree *)bmap; + ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr); + if (ret < 0) + return ret; + + if (buffer_nilfs_node(*bh)) { + node = (struct nilfs_btree_node *)(*bh)->b_data; + key = nilfs_btree_node_get_key(btree, node, 0); + } else + key = nilfs_bmap_data_get_key(bmap, *bh); + + /* on-disk format */ + binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); + binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); + + return 0; +} + +static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) +{ + struct buffer_head *bh; + struct nilfs_btree *btree; + struct nilfs_btree_path *path; + __u64 ptr; + int ret; + + btree = (struct nilfs_btree *)bmap; + path = nilfs_btree_alloc_path(btree); + if (path == NULL) + return -ENOMEM; + nilfs_btree_init_path(btree, path); + + ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); + if (ret < 0) { + BUG_ON(ret == -ENOENT); + goto out; + } + ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh); + if (ret < 0) { + BUG_ON(ret == -ENOENT); + goto out; + } + + if (!buffer_dirty(bh)) + nilfs_btnode_mark_dirty(bh); + nilfs_bmap_put_block(&btree->bt_bmap, bh); + if (!nilfs_bmap_dirty(&btree->bt_bmap)) + nilfs_bmap_set_dirty(&btree->bt_bmap); + + out: + nilfs_btree_clear_path(btree, path); + nilfs_btree_free_path(btree, path); + return ret; +} + +static const struct nilfs_bmap_operations nilfs_btree_ops = { + .bop_lookup = nilfs_btree_lookup, + .bop_insert = nilfs_btree_insert, + .bop_delete = nilfs_btree_delete, + .bop_clear = NULL, + + .bop_propagate = nilfs_btree_propagate, + + .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, + + .bop_assign = nilfs_btree_assign, + .bop_mark = nilfs_btree_mark, + + .bop_last_key = nilfs_btree_last_key, + .bop_check_insert = NULL, + .bop_check_delete = nilfs_btree_check_delete, + .bop_gather_data = nilfs_btree_gather_data, +}; + +static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { + .bop_lookup = NULL, + .bop_insert = NULL, + .bop_delete = NULL, + .bop_clear = NULL, + + .bop_propagate = nilfs_btree_propagate_gc, + + .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, + + .bop_assign = nilfs_btree_assign_gc, + .bop_mark = NULL, + + .bop_last_key = NULL, + .bop_check_insert = NULL, + .bop_check_delete = NULL, + .bop_gather_data = NULL, +}; + +static const struct nilfs_btree_operations nilfs_btree_ops_v = { + .btop_find_target = nilfs_btree_find_target_v, + .btop_set_target = nilfs_btree_set_target_v, + .btop_propagate = nilfs_btree_propagate_v, + .btop_assign = nilfs_btree_assign_v, +}; + +static const struct nilfs_btree_operations nilfs_btree_ops_p = { + .btop_find_target = NULL, + .btop_set_target = NULL, + .btop_propagate = nilfs_btree_propagate_p, + .btop_assign = nilfs_btree_assign_p, +}; + +int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high) +{ + struct nilfs_btree *btree; + + btree = (struct nilfs_btree *)bmap; + bmap->b_ops = &nilfs_btree_ops; + bmap->b_low = low; + bmap->b_high = high; + switch (bmap->b_inode->i_ino) { + case NILFS_DAT_INO: + btree->bt_ops = &nilfs_btree_ops_p; + break; + default: + btree->bt_ops = &nilfs_btree_ops_v; + break; + } + + return 0; +} + +void nilfs_btree_init_gc(struct nilfs_bmap *bmap) +{ + bmap->b_low = NILFS_BMAP_LARGE_LOW; + bmap->b_high = NILFS_BMAP_LARGE_HIGH; + bmap->b_ops = &nilfs_btree_ops_gc; +} diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h new file mode 100644 index 00000000000..4766deb52fb --- /dev/null +++ b/fs/nilfs2/btree.h @@ -0,0 +1,117 @@ +/* + * btree.h - NILFS B-tree. + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#ifndef _NILFS_BTREE_H +#define _NILFS_BTREE_H + +#include +#include +#include +#include +#include "btnode.h" +#include "bmap.h" + +struct nilfs_btree; +struct nilfs_btree_path; + +/** + * struct nilfs_btree_operations - B-tree operation table + */ +struct nilfs_btree_operations { + __u64 (*btop_find_target)(const struct nilfs_btree *, + const struct nilfs_btree_path *, __u64); + void (*btop_set_target)(struct nilfs_btree *, __u64, __u64); + + struct the_nilfs *(*btop_get_nilfs)(struct nilfs_btree *); + + int (*btop_propagate)(struct nilfs_btree *, + struct nilfs_btree_path *, + int, + struct buffer_head *); + int (*btop_assign)(struct nilfs_btree *, + struct nilfs_btree_path *, + int, + struct buffer_head **, + sector_t, + union nilfs_binfo *); +}; + +/** + * struct nilfs_btree_node - B-tree node + * @bn_flags: flags + * @bn_level: level + * @bn_nchildren: number of children + * @bn_pad: padding + */ +struct nilfs_btree_node { + __u8 bn_flags; + __u8 bn_level; + __le16 bn_nchildren; + __le32 bn_pad; +}; + +/* flags */ +#define NILFS_BTREE_NODE_ROOT 0x01 + +/* level */ +#define NILFS_BTREE_LEVEL_DATA 0 +#define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) +#define NILFS_BTREE_LEVEL_MAX 14 + +/** + * struct nilfs_btree - B-tree structure + * @bt_bmap: bmap base structure + * @bt_ops: B-tree operation table + */ +struct nilfs_btree { + struct nilfs_bmap bt_bmap; + + /* B-tree-specific members */ + const struct nilfs_btree_operations *bt_ops; +}; + + +#define NILFS_BTREE_ROOT_SIZE NILFS_BMAP_SIZE +#define NILFS_BTREE_ROOT_NCHILDREN_MAX \ + ((NILFS_BTREE_ROOT_SIZE - sizeof(struct nilfs_btree_node)) / \ + (sizeof(__le64 /* dkey */) + sizeof(__le64 /* dptr */))) +#define NILFS_BTREE_ROOT_NCHILDREN_MIN 0 +#define NILFS_BTREE_NODE_EXTRA_PAD_SIZE (sizeof(__le64)) +#define NILFS_BTREE_NODE_NCHILDREN_MAX(nodesize) \ + (((nodesize) - sizeof(struct nilfs_btree_node) - \ + NILFS_BTREE_NODE_EXTRA_PAD_SIZE) / \ + (sizeof(__le64 /* dkey */) + sizeof(__le64 /* dptr */))) +#define NILFS_BTREE_NODE_NCHILDREN_MIN(nodesize) \ + ((NILFS_BTREE_NODE_NCHILDREN_MAX(nodesize) - 1) / 2 + 1) +#define NILFS_BTREE_KEY_MIN ((__u64)0) +#define NILFS_BTREE_KEY_MAX (~(__u64)0) + + +int nilfs_btree_path_cache_init(void); +void nilfs_btree_path_cache_destroy(void); +int nilfs_btree_init(struct nilfs_bmap *, __u64, __u64); +int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64, + const __u64 *, const __u64 *, + int, __u64, __u64); +void nilfs_btree_init_gc(struct nilfs_bmap *); + +#endif /* _NILFS_BTREE_H */ -- cgit v1.2.3 From 36a580eb489f54d81a0534974962e732a314b999 Mon Sep 17 00:00:00 2001 From: Koji Sato Date: Mon, 6 Apr 2009 19:01:25 -0700 Subject: nilfs2: direct block mapping This adds block mappings using direct pointers which are stored in the i_bmap array of inode. Signed-off-by: Koji Sato Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/direct.c | 429 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/direct.h | 78 ++++++++++ 2 files changed, 507 insertions(+) create mode 100644 fs/nilfs2/direct.c create mode 100644 fs/nilfs2/direct.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c new file mode 100644 index 00000000000..303d7f1982f --- /dev/null +++ b/fs/nilfs2/direct.c @@ -0,0 +1,429 @@ +/* + * direct.c - NILFS direct block pointer. + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#include +#include "nilfs.h" +#include "page.h" +#include "direct.h" +#include "alloc.h" + +static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct) +{ + return (__le64 *) + ((struct nilfs_direct_node *)direct->d_bmap.b_u.u_data + 1); +} + +static inline __u64 +nilfs_direct_get_ptr(const struct nilfs_direct *direct, __u64 key) +{ + return nilfs_bmap_dptr_to_ptr(*(nilfs_direct_dptrs(direct) + key)); +} + +static inline void nilfs_direct_set_ptr(struct nilfs_direct *direct, + __u64 key, __u64 ptr) +{ + *(nilfs_direct_dptrs(direct) + key) = nilfs_bmap_ptr_to_dptr(ptr); +} + +static int nilfs_direct_lookup(const struct nilfs_bmap *bmap, + __u64 key, int level, __u64 *ptrp) +{ + struct nilfs_direct *direct; + __u64 ptr; + + direct = (struct nilfs_direct *)bmap; + if ((key > NILFS_DIRECT_KEY_MAX) || + (level != 1) || /* XXX: use macro for level 1 */ + ((ptr = nilfs_direct_get_ptr(direct, key)) == + NILFS_BMAP_INVALID_PTR)) + return -ENOENT; + + if (ptrp != NULL) + *ptrp = ptr; + return 0; +} + +static __u64 +nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key) +{ + __u64 ptr; + + ptr = nilfs_bmap_find_target_seq(&direct->d_bmap, key); + if (ptr != NILFS_BMAP_INVALID_PTR) + /* sequential access */ + return ptr; + else + /* block group */ + return nilfs_bmap_find_target_in_group(&direct->d_bmap); +} + +static void nilfs_direct_set_target_v(struct nilfs_direct *direct, + __u64 key, __u64 ptr) +{ + direct->d_bmap.b_last_allocated_key = key; + direct->d_bmap.b_last_allocated_ptr = ptr; +} + +static int nilfs_direct_prepare_insert(struct nilfs_direct *direct, + __u64 key, + union nilfs_bmap_ptr_req *req, + struct nilfs_bmap_stats *stats) +{ + int ret; + + if (direct->d_ops->dop_find_target != NULL) + req->bpr_ptr = (*direct->d_ops->dop_find_target)(direct, key); + ret = (*direct->d_bmap.b_pops->bpop_prepare_alloc_ptr)(&direct->d_bmap, + req); + if (ret < 0) + return ret; + + stats->bs_nblocks = 1; + return 0; +} + +static void nilfs_direct_commit_insert(struct nilfs_direct *direct, + union nilfs_bmap_ptr_req *req, + __u64 key, __u64 ptr) +{ + struct buffer_head *bh; + + /* ptr must be a pointer to a buffer head. */ + bh = (struct buffer_head *)((unsigned long)ptr); + set_buffer_nilfs_volatile(bh); + + if (direct->d_bmap.b_pops->bpop_commit_alloc_ptr != NULL) + (*direct->d_bmap.b_pops->bpop_commit_alloc_ptr)( + &direct->d_bmap, req); + nilfs_direct_set_ptr(direct, key, req->bpr_ptr); + + if (!nilfs_bmap_dirty(&direct->d_bmap)) + nilfs_bmap_set_dirty(&direct->d_bmap); + + if (direct->d_ops->dop_set_target != NULL) + (*direct->d_ops->dop_set_target)(direct, key, req->bpr_ptr); +} + +static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) +{ + struct nilfs_direct *direct; + union nilfs_bmap_ptr_req req; + struct nilfs_bmap_stats stats; + int ret; + + direct = (struct nilfs_direct *)bmap; + if (key > NILFS_DIRECT_KEY_MAX) + return -ENOENT; + if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR) + return -EEXIST; + + ret = nilfs_direct_prepare_insert(direct, key, &req, &stats); + if (ret < 0) + return ret; + nilfs_direct_commit_insert(direct, &req, key, ptr); + nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); + + return 0; +} + +static int nilfs_direct_prepare_delete(struct nilfs_direct *direct, + union nilfs_bmap_ptr_req *req, + __u64 key, + struct nilfs_bmap_stats *stats) +{ + int ret; + + if (direct->d_bmap.b_pops->bpop_prepare_end_ptr != NULL) { + req->bpr_ptr = nilfs_direct_get_ptr(direct, key); + ret = (*direct->d_bmap.b_pops->bpop_prepare_end_ptr)( + &direct->d_bmap, req); + if (ret < 0) + return ret; + } + + stats->bs_nblocks = 1; + return 0; +} + +static void nilfs_direct_commit_delete(struct nilfs_direct *direct, + union nilfs_bmap_ptr_req *req, + __u64 key) +{ + if (direct->d_bmap.b_pops->bpop_commit_end_ptr != NULL) + (*direct->d_bmap.b_pops->bpop_commit_end_ptr)( + &direct->d_bmap, req); + nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR); +} + +static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) +{ + struct nilfs_direct *direct; + union nilfs_bmap_ptr_req req; + struct nilfs_bmap_stats stats; + int ret; + + direct = (struct nilfs_direct *)bmap; + if ((key > NILFS_DIRECT_KEY_MAX) || + nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR) + return -ENOENT; + + ret = nilfs_direct_prepare_delete(direct, &req, key, &stats); + if (ret < 0) + return ret; + nilfs_direct_commit_delete(direct, &req, key); + nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); + + return 0; +} + +static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) +{ + struct nilfs_direct *direct; + __u64 key, lastkey; + + direct = (struct nilfs_direct *)bmap; + lastkey = NILFS_DIRECT_KEY_MAX + 1; + for (key = NILFS_DIRECT_KEY_MIN; key <= NILFS_DIRECT_KEY_MAX; key++) + if (nilfs_direct_get_ptr(direct, key) != + NILFS_BMAP_INVALID_PTR) + lastkey = key; + + if (lastkey == NILFS_DIRECT_KEY_MAX + 1) + return -ENOENT; + + BUG_ON(keyp == NULL); + *keyp = lastkey; + + return 0; +} + +static int nilfs_direct_check_insert(const struct nilfs_bmap *bmap, __u64 key) +{ + return key > NILFS_DIRECT_KEY_MAX; +} + +static int nilfs_direct_gather_data(struct nilfs_bmap *bmap, + __u64 *keys, __u64 *ptrs, int nitems) +{ + struct nilfs_direct *direct; + __u64 key; + __u64 ptr; + int n; + + direct = (struct nilfs_direct *)bmap; + if (nitems > NILFS_DIRECT_NBLOCKS) + nitems = NILFS_DIRECT_NBLOCKS; + n = 0; + for (key = 0; key < nitems; key++) { + ptr = nilfs_direct_get_ptr(direct, key); + if (ptr != NILFS_BMAP_INVALID_PTR) { + keys[n] = key; + ptrs[n] = ptr; + n++; + } + } + return n; +} + +int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, + __u64 key, __u64 *keys, __u64 *ptrs, + int n, __u64 low, __u64 high) +{ + struct nilfs_direct *direct; + __le64 *dptrs; + int ret, i, j; + + /* no need to allocate any resource for conversion */ + + /* delete */ + ret = (*bmap->b_ops->bop_delete)(bmap, key); + if (ret < 0) + return ret; + + /* free resources */ + if (bmap->b_ops->bop_clear != NULL) + (*bmap->b_ops->bop_clear)(bmap); + + /* convert */ + direct = (struct nilfs_direct *)bmap; + dptrs = nilfs_direct_dptrs(direct); + for (i = 0, j = 0; i < NILFS_DIRECT_NBLOCKS; i++) { + if ((j < n) && (i == keys[j])) { + dptrs[i] = (i != key) ? + nilfs_bmap_ptr_to_dptr(ptrs[j]) : + NILFS_BMAP_INVALID_PTR; + j++; + } else + dptrs[i] = NILFS_BMAP_INVALID_PTR; + } + + nilfs_direct_init(bmap, low, high); + + return 0; +} + +static int nilfs_direct_propagate_v(struct nilfs_direct *direct, + struct buffer_head *bh) +{ + union nilfs_bmap_ptr_req oldreq, newreq; + __u64 key; + __u64 ptr; + int ret; + + key = nilfs_bmap_data_get_key(&direct->d_bmap, bh); + ptr = nilfs_direct_get_ptr(direct, key); + if (!buffer_nilfs_volatile(bh)) { + oldreq.bpr_ptr = ptr; + newreq.bpr_ptr = ptr; + ret = nilfs_bmap_prepare_update(&direct->d_bmap, &oldreq, + &newreq); + if (ret < 0) + return ret; + nilfs_bmap_commit_update(&direct->d_bmap, &oldreq, &newreq); + set_buffer_nilfs_volatile(bh); + nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr); + } else + ret = nilfs_bmap_mark_dirty(&direct->d_bmap, ptr); + + return ret; +} + +static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, + struct buffer_head *bh) +{ + struct nilfs_direct *direct; + + direct = (struct nilfs_direct *)bmap; + return (direct->d_ops->dop_propagate != NULL) ? + (*direct->d_ops->dop_propagate)(direct, bh) : + 0; +} + +static int nilfs_direct_assign_v(struct nilfs_direct *direct, + __u64 key, __u64 ptr, + struct buffer_head **bh, + sector_t blocknr, + union nilfs_binfo *binfo) +{ + union nilfs_bmap_ptr_req req; + int ret; + + req.bpr_ptr = ptr; + ret = (*direct->d_bmap.b_pops->bpop_prepare_start_ptr)( + &direct->d_bmap, &req); + if (ret < 0) + return ret; + (*direct->d_bmap.b_pops->bpop_commit_start_ptr)(&direct->d_bmap, + &req, blocknr); + + binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); + binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); + + return 0; +} + +static int nilfs_direct_assign_p(struct nilfs_direct *direct, + __u64 key, __u64 ptr, + struct buffer_head **bh, + sector_t blocknr, + union nilfs_binfo *binfo) +{ + nilfs_direct_set_ptr(direct, key, blocknr); + + binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); + binfo->bi_dat.bi_level = 0; + + return 0; +} + +static int nilfs_direct_assign(struct nilfs_bmap *bmap, + struct buffer_head **bh, + sector_t blocknr, + union nilfs_binfo *binfo) +{ + struct nilfs_direct *direct; + __u64 key; + __u64 ptr; + + direct = (struct nilfs_direct *)bmap; + key = nilfs_bmap_data_get_key(bmap, *bh); + BUG_ON(key > NILFS_DIRECT_KEY_MAX); + ptr = nilfs_direct_get_ptr(direct, key); + BUG_ON(ptr == NILFS_BMAP_INVALID_PTR); + + return (*direct->d_ops->dop_assign)(direct, key, ptr, bh, + blocknr, binfo); +} + +static const struct nilfs_bmap_operations nilfs_direct_ops = { + .bop_lookup = nilfs_direct_lookup, + .bop_insert = nilfs_direct_insert, + .bop_delete = nilfs_direct_delete, + .bop_clear = NULL, + + .bop_propagate = nilfs_direct_propagate, + + .bop_lookup_dirty_buffers = NULL, + + .bop_assign = nilfs_direct_assign, + .bop_mark = NULL, + + .bop_last_key = nilfs_direct_last_key, + .bop_check_insert = nilfs_direct_check_insert, + .bop_check_delete = NULL, + .bop_gather_data = nilfs_direct_gather_data, +}; + + +static const struct nilfs_direct_operations nilfs_direct_ops_v = { + .dop_find_target = nilfs_direct_find_target_v, + .dop_set_target = nilfs_direct_set_target_v, + .dop_propagate = nilfs_direct_propagate_v, + .dop_assign = nilfs_direct_assign_v, +}; + +static const struct nilfs_direct_operations nilfs_direct_ops_p = { + .dop_find_target = NULL, + .dop_set_target = NULL, + .dop_propagate = NULL, + .dop_assign = nilfs_direct_assign_p, +}; + +int nilfs_direct_init(struct nilfs_bmap *bmap, __u64 low, __u64 high) +{ + struct nilfs_direct *direct; + + direct = (struct nilfs_direct *)bmap; + bmap->b_ops = &nilfs_direct_ops; + bmap->b_low = low; + bmap->b_high = high; + switch (bmap->b_inode->i_ino) { + case NILFS_DAT_INO: + direct->d_ops = &nilfs_direct_ops_p; + break; + default: + direct->d_ops = &nilfs_direct_ops_v; + break; + } + + return 0; +} diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h new file mode 100644 index 00000000000..45d2c5cda81 --- /dev/null +++ b/fs/nilfs2/direct.h @@ -0,0 +1,78 @@ +/* + * direct.h - NILFS direct block pointer. + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#ifndef _NILFS_DIRECT_H +#define _NILFS_DIRECT_H + +#include +#include +#include "bmap.h" + + +struct nilfs_direct; + +/** + * struct nilfs_direct_operations - direct mapping operation table + */ +struct nilfs_direct_operations { + __u64 (*dop_find_target)(const struct nilfs_direct *, __u64); + void (*dop_set_target)(struct nilfs_direct *, __u64, __u64); + int (*dop_propagate)(struct nilfs_direct *, struct buffer_head *); + int (*dop_assign)(struct nilfs_direct *, __u64, __u64, + struct buffer_head **, sector_t, + union nilfs_binfo *); +}; + +/** + * struct nilfs_direct_node - direct node + * @dn_flags: flags + * @dn_pad: padding + */ +struct nilfs_direct_node { + __u8 dn_flags; + __u8 pad[7]; +}; + +/** + * struct nilfs_direct - direct mapping + * @d_bmap: bmap structure + * @d_ops: direct mapping operation table + */ +struct nilfs_direct { + struct nilfs_bmap d_bmap; + + /* direct-mapping-specific members */ + const struct nilfs_direct_operations *d_ops; +}; + + +#define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1) +#define NILFS_DIRECT_KEY_MIN 0 +#define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1) + + +int nilfs_direct_init(struct nilfs_bmap *, __u64, __u64); +int nilfs_direct_delete_and_convert(struct nilfs_bmap *, __u64, __u64 *, + __u64 *, int, __u64, __u64); + + +#endif /* _NILFS_DIRECT_H */ -- cgit v1.2.3 From a60be987d45dd510aeb54389526f9957cfab106c Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:25 -0700 Subject: nilfs2: B-tree node cache This adds routines for B-tree node buffers. Signed-off-by: Seiji Kihara Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/btnode.c | 316 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/btnode.h | 58 ++++++++++ 2 files changed, 374 insertions(+) create mode 100644 fs/nilfs2/btnode.c create mode 100644 fs/nilfs2/btnode.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c new file mode 100644 index 00000000000..4cc07b2c30e --- /dev/null +++ b/fs/nilfs2/btnode.c @@ -0,0 +1,316 @@ +/* + * btnode.c - NILFS B-tree node cache + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * This file was originally written by Seiji Kihara + * and fully revised by Ryusuke Konishi for + * stabilization and simplification. + * + */ + +#include +#include +#include +#include +#include "nilfs.h" +#include "mdt.h" +#include "dat.h" +#include "page.h" +#include "btnode.h" + + +void nilfs_btnode_cache_init_once(struct address_space *btnc) +{ + INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC); + spin_lock_init(&btnc->tree_lock); + INIT_LIST_HEAD(&btnc->private_list); + spin_lock_init(&btnc->private_lock); + + spin_lock_init(&btnc->i_mmap_lock); + INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap); + INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); +} + +static struct address_space_operations def_btnode_aops; + +void nilfs_btnode_cache_init(struct address_space *btnc) +{ + btnc->host = NULL; /* can safely set to host inode ? */ + btnc->flags = 0; + mapping_set_gfp_mask(btnc, GFP_NOFS); + btnc->assoc_mapping = NULL; + btnc->backing_dev_info = &default_backing_dev_info; + btnc->a_ops = &def_btnode_aops; +} + +void nilfs_btnode_cache_clear(struct address_space *btnc) +{ + invalidate_mapping_pages(btnc, 0, -1); + truncate_inode_pages(btnc, 0); +} + +int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, + sector_t pblocknr, struct buffer_head **pbh, + int newblk) +{ + struct buffer_head *bh; + struct inode *inode = NILFS_BTNC_I(btnc); + int err; + + bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); + if (unlikely(!bh)) + return -ENOMEM; + + err = -EEXIST; /* internal code */ + if (newblk) { + if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || + buffer_dirty(bh))) { + brelse(bh); + BUG(); + } + bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; + bh->b_blocknr = blocknr; + set_buffer_mapped(bh); + set_buffer_uptodate(bh); + goto found; + } + + if (buffer_uptodate(bh) || buffer_dirty(bh)) + goto found; + + if (pblocknr == 0) { + pblocknr = blocknr; + if (inode->i_ino != NILFS_DAT_INO) { + struct inode *dat = + nilfs_dat_inode(NILFS_I_NILFS(inode)); + + /* blocknr is a virtual block number */ + err = nilfs_dat_translate(dat, blocknr, &pblocknr); + if (unlikely(err)) { + brelse(bh); + goto out_locked; + } + } + } + lock_buffer(bh); + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + err = -EEXIST; /* internal code */ + goto found; + } + set_buffer_mapped(bh); + bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; + bh->b_blocknr = pblocknr; /* set block address for read */ + bh->b_end_io = end_buffer_read_sync; + get_bh(bh); + submit_bh(READ, bh); + bh->b_blocknr = blocknr; /* set back to the given block address */ + err = 0; +found: + *pbh = bh; + +out_locked: + unlock_page(bh->b_page); + page_cache_release(bh->b_page); + return err; +} + +int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr, + sector_t pblocknr, struct buffer_head **pbh, int newblk) +{ + struct buffer_head *bh; + int err; + + err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk); + if (err == -EEXIST) /* internal code (cache hit) */ + return 0; + if (unlikely(err)) + return err; + + bh = *pbh; + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { + brelse(bh); + return -EIO; + } + return 0; +} + +/** + * nilfs_btnode_delete - delete B-tree node buffer + * @bh: buffer to be deleted + * + * nilfs_btnode_delete() invalidates the specified buffer and delete the page + * including the buffer if the page gets unbusy. + */ +void nilfs_btnode_delete(struct buffer_head *bh) +{ + struct address_space *mapping; + struct page *page = bh->b_page; + pgoff_t index = page_index(page); + int still_dirty; + + page_cache_get(page); + lock_page(page); + wait_on_page_writeback(page); + + nilfs_forget_buffer(bh); + still_dirty = PageDirty(page); + mapping = page->mapping; + unlock_page(page); + page_cache_release(page); + + if (!still_dirty && mapping) + invalidate_inode_pages2_range(mapping, index, index); +} + +/** + * nilfs_btnode_prepare_change_key + * prepare to move contents of the block for old key to one of new key. + * the old buffer will not be removed, but might be reused for new buffer. + * it might return -ENOMEM because of memory allocation errors, + * and might return -EIO because of disk read errors. + */ +int nilfs_btnode_prepare_change_key(struct address_space *btnc, + struct nilfs_btnode_chkey_ctxt *ctxt) +{ + struct buffer_head *obh, *nbh; + struct inode *inode = NILFS_BTNC_I(btnc); + __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; + int err; + + if (oldkey == newkey) + return 0; + + obh = ctxt->bh; + ctxt->newbh = NULL; + + if (inode->i_blkbits == PAGE_CACHE_SHIFT) { + lock_page(obh->b_page); + /* + * We cannot call radix_tree_preload for the kernels older + * than 2.6.23, because it is not exported for modules. + */ + err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); + if (err) + goto failed_unlock; + /* BUG_ON(oldkey != obh->b_page->index); */ + if (unlikely(oldkey != obh->b_page->index)) + NILFS_PAGE_BUG(obh->b_page, + "invalid oldkey %lld (newkey=%lld)", + (unsigned long long)oldkey, + (unsigned long long)newkey); + +retry: + spin_lock_irq(&btnc->tree_lock); + err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); + spin_unlock_irq(&btnc->tree_lock); + /* + * Note: page->index will not change to newkey until + * nilfs_btnode_commit_change_key() will be called. + * To protect the page in intermediate state, the page lock + * is held. + */ + radix_tree_preload_end(); + if (!err) + return 0; + else if (err != -EEXIST) + goto failed_unlock; + + err = invalidate_inode_pages2_range(btnc, newkey, newkey); + if (!err) + goto retry; + /* fallback to copy mode */ + unlock_page(obh->b_page); + } + + err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1); + if (likely(!err)) { + BUG_ON(nbh == obh); + ctxt->newbh = nbh; + } + return err; + + failed_unlock: + unlock_page(obh->b_page); + return err; +} + +/** + * nilfs_btnode_commit_change_key + * commit the change_key operation prepared by prepare_change_key(). + */ +void nilfs_btnode_commit_change_key(struct address_space *btnc, + struct nilfs_btnode_chkey_ctxt *ctxt) +{ + struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh; + __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; + struct page *opage; + + if (oldkey == newkey) + return; + + if (nbh == NULL) { /* blocksize == pagesize */ + opage = obh->b_page; + if (unlikely(oldkey != opage->index)) + NILFS_PAGE_BUG(opage, + "invalid oldkey %lld (newkey=%lld)", + (unsigned long long)oldkey, + (unsigned long long)newkey); + if (!test_set_buffer_dirty(obh) && TestSetPageDirty(opage)) + BUG(); + + spin_lock_irq(&btnc->tree_lock); + radix_tree_delete(&btnc->page_tree, oldkey); + radix_tree_tag_set(&btnc->page_tree, newkey, + PAGECACHE_TAG_DIRTY); + spin_unlock_irq(&btnc->tree_lock); + + opage->index = obh->b_blocknr = newkey; + unlock_page(opage); + } else { + nilfs_copy_buffer(nbh, obh); + nilfs_btnode_mark_dirty(nbh); + + nbh->b_blocknr = newkey; + ctxt->bh = nbh; + nilfs_btnode_delete(obh); /* will decrement bh->b_count */ + } +} + +/** + * nilfs_btnode_abort_change_key + * abort the change_key operation prepared by prepare_change_key(). + */ +void nilfs_btnode_abort_change_key(struct address_space *btnc, + struct nilfs_btnode_chkey_ctxt *ctxt) +{ + struct buffer_head *nbh = ctxt->newbh; + __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; + + if (oldkey == newkey) + return; + + if (nbh == NULL) { /* blocksize == pagesize */ + spin_lock_irq(&btnc->tree_lock); + radix_tree_delete(&btnc->page_tree, newkey); + spin_unlock_irq(&btnc->tree_lock); + unlock_page(ctxt->bh->b_page); + } else + brelse(nbh); +} diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h new file mode 100644 index 00000000000..35faa86444a --- /dev/null +++ b/fs/nilfs2/btnode.h @@ -0,0 +1,58 @@ +/* + * btnode.h - NILFS B-tree node cache + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Seiji Kihara + * Revised by Ryusuke Konishi + */ + +#ifndef _NILFS_BTNODE_H +#define _NILFS_BTNODE_H + +#include +#include +#include +#include + + +struct nilfs_btnode_chkey_ctxt { + __u64 oldkey; + __u64 newkey; + struct buffer_head *bh; + struct buffer_head *newbh; +}; + +void nilfs_btnode_cache_init_once(struct address_space *); +void nilfs_btnode_cache_init(struct address_space *); +void nilfs_btnode_cache_clear(struct address_space *); +int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, + struct buffer_head **, int); +int nilfs_btnode_get(struct address_space *, __u64, sector_t, + struct buffer_head **, int); +void nilfs_btnode_delete(struct buffer_head *); +int nilfs_btnode_prepare_change_key(struct address_space *, + struct nilfs_btnode_chkey_ctxt *); +void nilfs_btnode_commit_change_key(struct address_space *, + struct nilfs_btnode_chkey_ctxt *); +void nilfs_btnode_abort_change_key(struct address_space *, + struct nilfs_btnode_chkey_ctxt *); + +#define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh) + + +#endif /* _NILFS_BTNODE_H */ -- cgit v1.2.3 From 0bd49f9446130a6a3914eb07b54db489e3222b34 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:27 -0700 Subject: nilfs2: buffer and page operations This adds common routines for buffer/page operations used in B-tree node caches, meta data files, or segment constructor (log writer). NILFS uses copy functions for buffers and pages due to the following reasons: 1) Relocation required for COW Since NILFS changes address of on-disk blocks, moving buffers in page cache is needed for the buffers which are not addressed by a file offset. If buffer size is smaller than page size, this involves partial copy of pages. 2) Freezing mmapped pages NILFS calculates checksums for each log to ensure its validity. If page data changes after the checksum calculation, this validity check will not work correctly. To avoid this failure for mmaped pages, NILFS freezes their data by copying. 3) Copy-on-write for DAT pages NILFS makes clones of DAT page caches in a copy-on-write manner during GC processes, and this ensures atomicity and consistency of the DAT in the transient state. In addition, NILFS uses two obsolete functions, nilfs_mark_buffer_dirty() and nilfs_clear_page_dirty() respectively. * nilfs_mark_buffer_dirty() was required to avoid NULL pointer dereference faults: Since the page cache of B-tree node pages or data page cache of pseudo inodes does not have a valid mapping->host, calling mark_buffer_dirty() for their buffers causes the fault; it calls __mark_inode_dirty(NULL) through __set_page_dirty(). * nilfs_clear_page_dirty() was needed in the two cases: 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears page dirty flags when it copies back pages from the cloned cache (gcdat->{i_mapping,i_btnode_cache}) to its original cache (dat->{i_mapping,i_btnode_cache}). 2) Some B-tree operations like insertion or deletion may dispose buffers in dirty state, and this needs to cancel the dirty state of their pages. clear_page_dirty_for_io() caused faults because it does not clear the dirty tag on the page cache. Signed-off-by: Seiji Kihara Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/page.c | 542 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/page.h | 76 ++++++++ 2 files changed, 618 insertions(+) create mode 100644 fs/nilfs2/page.c create mode 100644 fs/nilfs2/page.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c new file mode 100644 index 00000000000..7b18be8cd47 --- /dev/null +++ b/fs/nilfs2/page.c @@ -0,0 +1,542 @@ +/* + * page.c - buffer/page management specific to NILFS + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi , + * Seiji Kihara . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "nilfs.h" +#include "page.h" +#include "mdt.h" + + +#define NILFS_BUFFER_INHERENT_BITS \ + ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ + (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated)) + +static struct buffer_head * +__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, + int blkbits, unsigned long b_state) + +{ + unsigned long first_block; + struct buffer_head *bh; + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << blkbits, b_state); + + first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits); + bh = nilfs_page_get_nth_block(page, block - first_block); + + touch_buffer(bh); + wait_on_buffer(bh); + return bh; +} + +/* + * Since the page cache of B-tree node pages or data page cache of pseudo + * inodes does not have a valid mapping->host pointer, calling + * mark_buffer_dirty() for their buffers causes a NULL pointer dereference; + * it calls __mark_inode_dirty(NULL) through __set_page_dirty(). + * To avoid this problem, the old style mark_buffer_dirty() is used instead. + */ +void nilfs_mark_buffer_dirty(struct buffer_head *bh) +{ + if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) + __set_page_dirty_nobuffers(bh->b_page); +} + +struct buffer_head *nilfs_grab_buffer(struct inode *inode, + struct address_space *mapping, + unsigned long blkoff, + unsigned long b_state) +{ + int blkbits = inode->i_blkbits; + pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); + struct page *page, *opage; + struct buffer_head *bh, *obh; + + page = grab_cache_page(mapping, index); + if (unlikely(!page)) + return NULL; + + bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); + if (unlikely(!bh)) { + unlock_page(page); + page_cache_release(page); + return NULL; + } + if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { + /* + * Shadow page cache uses assoc_mapping to point its original + * page cache. The following code tries the original cache + * if the given cache is a shadow and it didn't hit. + */ + opage = find_lock_page(mapping->assoc_mapping, index); + if (!opage) + return bh; + + obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, + b_state); + if (buffer_uptodate(obh)) { + nilfs_copy_buffer(bh, obh); + if (buffer_dirty(obh)) { + nilfs_mark_buffer_dirty(bh); + if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) + nilfs_mdt_mark_dirty(inode); + } + } + brelse(obh); + unlock_page(opage); + page_cache_release(opage); + } + return bh; +} + +/** + * nilfs_forget_buffer - discard dirty state + * @inode: owner inode of the buffer + * @bh: buffer head of the buffer to be discarded + */ +void nilfs_forget_buffer(struct buffer_head *bh) +{ + struct page *page = bh->b_page; + + lock_buffer(bh); + clear_buffer_nilfs_volatile(bh); + if (test_clear_buffer_dirty(bh) && nilfs_page_buffers_clean(page)) + __nilfs_clear_page_dirty(page); + + clear_buffer_uptodate(bh); + clear_buffer_mapped(bh); + bh->b_blocknr = -1; + ClearPageUptodate(page); + ClearPageMappedToDisk(page); + unlock_buffer(bh); + brelse(bh); +} + +/** + * nilfs_copy_buffer -- copy buffer data and flags + * @dbh: destination buffer + * @sbh: source buffer + */ +void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) +{ + void *kaddr0, *kaddr1; + unsigned long bits; + struct page *spage = sbh->b_page, *dpage = dbh->b_page; + struct buffer_head *bh; + + kaddr0 = kmap_atomic(spage, KM_USER0); + kaddr1 = kmap_atomic(dpage, KM_USER1); + memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); + kunmap_atomic(kaddr1, KM_USER1); + kunmap_atomic(kaddr0, KM_USER0); + + dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; + dbh->b_blocknr = sbh->b_blocknr; + dbh->b_bdev = sbh->b_bdev; + + bh = dbh; + bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped)); + while ((bh = bh->b_this_page) != dbh) { + lock_buffer(bh); + bits &= bh->b_state; + unlock_buffer(bh); + } + if (bits & (1UL << BH_Uptodate)) + SetPageUptodate(dpage); + else + ClearPageUptodate(dpage); + if (bits & (1UL << BH_Mapped)) + SetPageMappedToDisk(dpage); + else + ClearPageMappedToDisk(dpage); +} + +/** + * nilfs_page_buffers_clean - check if a page has dirty buffers or not. + * @page: page to be checked + * + * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. + * Otherwise, it returns non-zero value. + */ +int nilfs_page_buffers_clean(struct page *page) +{ + struct buffer_head *bh, *head; + + bh = head = page_buffers(page); + do { + if (buffer_dirty(bh)) + return 0; + bh = bh->b_this_page; + } while (bh != head); + return 1; +} + +void nilfs_page_bug(struct page *page) +{ + struct address_space *m; + unsigned long ino = 0; + + if (unlikely(!page)) { + printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); + return; + } + + m = page->mapping; + if (m) { + struct inode *inode = NILFS_AS_I(m); + if (inode != NULL) + ino = inode->i_ino; + } + printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " + "mapping=%p ino=%lu\n", + page, atomic_read(&page->_count), + (unsigned long long)page->index, page->flags, m, ino); + + if (page_has_buffers(page)) { + struct buffer_head *bh, *head; + int i = 0; + + bh = head = page_buffers(page); + do { + printk(KERN_CRIT + " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", + i++, bh, atomic_read(&bh->b_count), + (unsigned long long)bh->b_blocknr, bh->b_state); + bh = bh->b_this_page; + } while (bh != head); + } +} + +/** + * nilfs_alloc_private_page - allocate a private page with buffer heads + * + * Return Value: On success, a pointer to the allocated page is returned. + * On error, NULL is returned. + */ +struct page *nilfs_alloc_private_page(struct block_device *bdev, int size, + unsigned long state) +{ + struct buffer_head *bh, *head, *tail; + struct page *page; + + page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */ + if (unlikely(!page)) + return NULL; + + lock_page(page); + head = alloc_page_buffers(page, size, 0); + if (unlikely(!head)) { + unlock_page(page); + __free_page(page); + return NULL; + } + + bh = head; + do { + bh->b_state = (1UL << BH_NILFS_Allocated) | state; + tail = bh; + bh->b_bdev = bdev; + bh = bh->b_this_page; + } while (bh); + + tail->b_this_page = head; + attach_page_buffers(page, head); + + return page; +} + +void nilfs_free_private_page(struct page *page) +{ + BUG_ON(!PageLocked(page)); + BUG_ON(page->mapping); + + if (page_has_buffers(page) && !try_to_free_buffers(page)) + NILFS_PAGE_BUG(page, "failed to free page"); + + unlock_page(page); + __free_page(page); +} + +/** + * nilfs_copy_page -- copy the page with buffers + * @dst: destination page + * @src: source page + * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. + * + * This fuction is for both data pages and btnode pages. The dirty flag + * should be treated by caller. The page must not be under i/o. + * Both src and dst page must be locked + */ +static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) +{ + struct buffer_head *dbh, *dbufs, *sbh, *sbufs; + unsigned long mask = NILFS_BUFFER_INHERENT_BITS; + + BUG_ON(PageWriteback(dst)); + + sbh = sbufs = page_buffers(src); + if (!page_has_buffers(dst)) + create_empty_buffers(dst, sbh->b_size, 0); + + if (copy_dirty) + mask |= (1UL << BH_Dirty); + + dbh = dbufs = page_buffers(dst); + do { + lock_buffer(sbh); + lock_buffer(dbh); + dbh->b_state = sbh->b_state & mask; + dbh->b_blocknr = sbh->b_blocknr; + dbh->b_bdev = sbh->b_bdev; + sbh = sbh->b_this_page; + dbh = dbh->b_this_page; + } while (dbh != dbufs); + + copy_highpage(dst, src); + + if (PageUptodate(src) && !PageUptodate(dst)) + SetPageUptodate(dst); + else if (!PageUptodate(src) && PageUptodate(dst)) + ClearPageUptodate(dst); + if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) + SetPageMappedToDisk(dst); + else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) + ClearPageMappedToDisk(dst); + + do { + unlock_buffer(sbh); + unlock_buffer(dbh); + sbh = sbh->b_this_page; + dbh = dbh->b_this_page; + } while (dbh != dbufs); +} + +int nilfs_copy_dirty_pages(struct address_space *dmap, + struct address_space *smap) +{ + struct pagevec pvec; + unsigned int i; + pgoff_t index = 0; + int err = 0; + + pagevec_init(&pvec, 0); +repeat: + if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, + PAGEVEC_SIZE)) + return 0; + + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i], *dpage; + + lock_page(page); + if (unlikely(!PageDirty(page))) + NILFS_PAGE_BUG(page, "inconsistent dirty state"); + + dpage = grab_cache_page(dmap, page->index); + if (unlikely(!dpage)) { + /* No empty page is added to the page cache */ + err = -ENOMEM; + unlock_page(page); + break; + } + if (unlikely(!page_has_buffers(page))) + NILFS_PAGE_BUG(page, + "found empty page in dat page cache"); + + nilfs_copy_page(dpage, page, 1); + __set_page_dirty_nobuffers(dpage); + + unlock_page(dpage); + page_cache_release(dpage); + unlock_page(page); + } + pagevec_release(&pvec); + cond_resched(); + + if (likely(!err)) + goto repeat; + return err; +} + +/** + * nilfs_copy_back_pages -- copy back pages to orignal cache from shadow cache + * @dmap: destination page cache + * @smap: source page cache + * + * No pages must no be added to the cache during this process. + * This must be ensured by the caller. + */ +void nilfs_copy_back_pages(struct address_space *dmap, + struct address_space *smap) +{ + struct pagevec pvec; + unsigned int i, n; + pgoff_t index = 0; + int err; + + pagevec_init(&pvec, 0); +repeat: + n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE); + if (!n) + return; + index = pvec.pages[n - 1]->index + 1; + + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i], *dpage; + pgoff_t offset = page->index; + + lock_page(page); + dpage = find_lock_page(dmap, offset); + if (dpage) { + /* override existing page on the destination cache */ + BUG_ON(PageDirty(dpage)); + nilfs_copy_page(dpage, page, 0); + unlock_page(dpage); + page_cache_release(dpage); + } else { + struct page *page2; + + /* move the page to the destination cache */ + spin_lock_irq(&smap->tree_lock); + page2 = radix_tree_delete(&smap->page_tree, offset); + if (unlikely(page2 != page)) + NILFS_PAGE_BUG(page, "page removal failed " + "(offset=%lu, page2=%p)", + offset, page2); + smap->nrpages--; + spin_unlock_irq(&smap->tree_lock); + + spin_lock_irq(&dmap->tree_lock); + err = radix_tree_insert(&dmap->page_tree, offset, page); + if (unlikely(err < 0)) { + BUG_ON(err == -EEXIST); + page->mapping = NULL; + page_cache_release(page); /* for cache */ + } else { + page->mapping = dmap; + dmap->nrpages++; + if (PageDirty(page)) + radix_tree_tag_set(&dmap->page_tree, + offset, + PAGECACHE_TAG_DIRTY); + } + spin_unlock_irq(&dmap->tree_lock); + } + unlock_page(page); + } + pagevec_release(&pvec); + cond_resched(); + + goto repeat; +} + +void nilfs_clear_dirty_pages(struct address_space *mapping) +{ + struct pagevec pvec; + unsigned int i; + pgoff_t index = 0; + + pagevec_init(&pvec, 0); + + while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, + PAGEVEC_SIZE)) { + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + struct buffer_head *bh, *head; + + lock_page(page); + ClearPageUptodate(page); + ClearPageMappedToDisk(page); + bh = head = page_buffers(page); + do { + lock_buffer(bh); + clear_buffer_dirty(bh); + clear_buffer_nilfs_volatile(bh); + clear_buffer_uptodate(bh); + clear_buffer_mapped(bh); + unlock_buffer(bh); + bh = bh->b_this_page; + } while (bh != head); + + __nilfs_clear_page_dirty(page); + unlock_page(page); + } + pagevec_release(&pvec); + cond_resched(); + } +} + +unsigned nilfs_page_count_clean_buffers(struct page *page, + unsigned from, unsigned to) +{ + unsigned block_start, block_end; + struct buffer_head *bh, *head; + unsigned nc = 0; + + for (bh = head = page_buffers(page), block_start = 0; + bh != head || !block_start; + block_start = block_end, bh = bh->b_this_page) { + block_end = block_start + bh->b_size; + if (block_end > from && block_start < to && !buffer_dirty(bh)) + nc++; + } + return nc; +} + +/* + * NILFS2 needs clear_page_dirty() in the following two cases: + * + * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears + * page dirty flags when it copies back pages from the shadow cache + * (gcdat->{i_mapping,i_btnode_cache}) to its original cache + * (dat->{i_mapping,i_btnode_cache}). + * + * 2) Some B-tree operations like insertion or deletion may dispose buffers + * in dirty state, and this needs to cancel the dirty state of their pages. + */ +int __nilfs_clear_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + + if (mapping) { + spin_lock_irq(&mapping->tree_lock); + if (test_bit(PG_dirty, &page->flags)) { + radix_tree_tag_clear(&mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irq(&mapping->tree_lock); + return clear_page_dirty_for_io(page); + } + spin_unlock_irq(&mapping->tree_lock); + return 0; + } + return TestClearPageDirty(page); +} diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h new file mode 100644 index 00000000000..8abca4d1c1f --- /dev/null +++ b/fs/nilfs2/page.h @@ -0,0 +1,76 @@ +/* + * page.h - buffer/page management specific to NILFS + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi , + * Seiji Kihara . + */ + +#ifndef _NILFS_PAGE_H +#define _NILFS_PAGE_H + +#include +#include "nilfs.h" + +/* + * Extended buffer state bits + */ +enum { + BH_NILFS_Allocated = BH_PrivateStart, + BH_NILFS_Node, + BH_NILFS_Volatile, +}; + +BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */ +BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ +BUFFER_FNS(NILFS_Volatile, nilfs_volatile) + + +void nilfs_mark_buffer_dirty(struct buffer_head *bh); +int __nilfs_clear_page_dirty(struct page *); + +struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, + unsigned long, unsigned long); +void nilfs_forget_buffer(struct buffer_head *); +void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); +int nilfs_page_buffers_clean(struct page *); +void nilfs_page_bug(struct page *); +struct page *nilfs_alloc_private_page(struct block_device *, int, + unsigned long); +void nilfs_free_private_page(struct page *); + +int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); +void nilfs_copy_back_pages(struct address_space *, struct address_space *); +void nilfs_clear_dirty_pages(struct address_space *); +unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); + +#define NILFS_PAGE_BUG(page, m, a...) \ + do { nilfs_page_bug(page); BUG(); } while (0) + +static inline struct buffer_head * +nilfs_page_get_nth_block(struct page *page, unsigned int count) +{ + struct buffer_head *bh = page_buffers(page); + + while (count-- > 0) + bh = bh->b_this_page; + get_bh(bh); + return bh; +} + +#endif /* _NILFS_PAGE_H */ -- cgit v1.2.3 From 5eb563f5f299a2ed488d9eb52acede45ccb14c7b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:28 -0700 Subject: nilfs2: meta data file This adds the meta data file, which serves common buffer functions to the DAT, sufile, cpfile, ifile, and so forth. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/mdt.c | 562 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/mdt.h | 125 +++++++++++++ 2 files changed, 687 insertions(+) create mode 100644 fs/nilfs2/mdt.c create mode 100644 fs/nilfs2/mdt.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c new file mode 100644 index 00000000000..6ab84757861 --- /dev/null +++ b/fs/nilfs2/mdt.c @@ -0,0 +1,562 @@ +/* + * mdt.c - meta data file for NILFS + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + */ + +#include +#include +#include +#include +#include +#include +#include "nilfs.h" +#include "segment.h" +#include "page.h" +#include "mdt.h" + + +#define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) + +#define INIT_UNUSED_INODE_FIELDS + +static int +nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, + struct buffer_head *bh, + void (*init_block)(struct inode *, + struct buffer_head *, void *)) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + void *kaddr; + int ret; + + /* Caller exclude read accesses using page lock */ + + /* set_buffer_new(bh); */ + bh->b_blocknr = 0; + + ret = nilfs_bmap_insert(ii->i_bmap, block, (unsigned long)bh); + if (unlikely(ret)) + return ret; + + set_buffer_mapped(bh); + + kaddr = kmap_atomic(bh->b_page, KM_USER0); + memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); + if (init_block) + init_block(inode, bh, kaddr); + flush_dcache_page(bh->b_page); + kunmap_atomic(kaddr, KM_USER0); + + set_buffer_uptodate(bh); + nilfs_mark_buffer_dirty(bh); + nilfs_mdt_mark_dirty(inode); + return 0; +} + +static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, + struct buffer_head **out_bh, + void (*init_block)(struct inode *, + struct buffer_head *, + void *)) +{ + struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; + struct nilfs_sb_info *writer = NULL; + struct super_block *sb = inode->i_sb; + struct nilfs_transaction_info ti; + struct buffer_head *bh; + int err; + + if (!sb) { + writer = nilfs_get_writer(nilfs); + if (!writer) { + err = -EROFS; + goto out; + } + sb = writer->s_super; + } + + nilfs_transaction_begin(sb, &ti, 0); + + err = -ENOMEM; + bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0); + if (unlikely(!bh)) + goto failed_unlock; + + err = -EEXIST; + if (buffer_uptodate(bh) || buffer_mapped(bh)) + goto failed_bh; +#if 0 + /* The uptodate flag is not protected by the page lock, but + the mapped flag is. Thus, we don't have to wait the buffer. */ + wait_on_buffer(bh); + if (buffer_uptodate(bh)) + goto failed_bh; +#endif + + bh->b_bdev = nilfs->ns_bdev; + err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); + if (likely(!err)) { + get_bh(bh); + *out_bh = bh; + } + + failed_bh: + unlock_page(bh->b_page); + page_cache_release(bh->b_page); + brelse(bh); + + failed_unlock: + nilfs_transaction_end(sb, !err); + if (writer) + nilfs_put_writer(nilfs); + out: + return err; +} + +static int +nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, + int mode, struct buffer_head **out_bh) +{ + struct buffer_head *bh; + unsigned long blknum = 0; + int ret = -ENOMEM; + + bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); + if (unlikely(!bh)) + goto failed; + + ret = -EEXIST; /* internal code */ + if (buffer_uptodate(bh)) + goto out; + + if (mode == READA) { + if (!trylock_buffer(bh)) { + ret = -EBUSY; + goto failed_bh; + } + } else { + BUG_ON(mode != READ); + lock_buffer(bh); + } + + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + goto out; + } + if (!buffer_mapped(bh)) { /* unused buffer */ + ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, + &blknum); + if (unlikely(ret)) { + unlock_buffer(bh); + goto failed_bh; + } + bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev; + bh->b_blocknr = blknum; + set_buffer_mapped(bh); + } + + bh->b_end_io = end_buffer_read_sync; + get_bh(bh); + submit_bh(mode, bh); + ret = 0; + out: + get_bh(bh); + *out_bh = bh; + + failed_bh: + unlock_page(bh->b_page); + page_cache_release(bh->b_page); + brelse(bh); + failed: + return ret; +} + +static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, + struct buffer_head **out_bh) +{ + struct buffer_head *first_bh, *bh; + unsigned long blkoff; + int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS; + int err; + + err = nilfs_mdt_submit_block(inode, block, READ, &first_bh); + if (err == -EEXIST) /* internal code */ + goto out; + + if (unlikely(err)) + goto failed; + + blkoff = block + 1; + for (i = 0; i < nr_ra_blocks; i++, blkoff++) { + err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); + if (likely(!err || err == -EEXIST)) + brelse(bh); + else if (err != -EBUSY) + break; /* abort readahead if bmap lookup failed */ + + if (!buffer_locked(first_bh)) + goto out_no_wait; + } + + wait_on_buffer(first_bh); + + out_no_wait: + err = -EIO; + if (!buffer_uptodate(first_bh)) + goto failed_bh; + out: + *out_bh = first_bh; + return 0; + + failed_bh: + brelse(first_bh); + failed: + return err; +} + +/** + * nilfs_mdt_get_block - read or create a buffer on meta data file. + * @inode: inode of the meta data file + * @blkoff: block offset + * @create: create flag + * @init_block: initializer used for newly allocated block + * @out_bh: output of a pointer to the buffer_head + * + * nilfs_mdt_get_block() looks up the specified buffer and tries to create + * a new buffer if @create is not zero. On success, the returned buffer is + * assured to be either existing or formatted using a buffer lock on success. + * @out_bh is substituted only when zero is returned. + * + * Return Value: On success, it returns 0. On error, the following negative + * error code is returned. + * + * %-ENOMEM - Insufficient memory available. + * + * %-EIO - I/O error + * + * %-ENOENT - the specified block does not exist (hole block) + * + * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) + * + * %-EROFS - Read only filesystem (for create mode) + */ +int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, + void (*init_block)(struct inode *, + struct buffer_head *, void *), + struct buffer_head **out_bh) +{ + int ret; + + /* Should be rewritten with merging nilfs_mdt_read_block() */ + retry: + ret = nilfs_mdt_read_block(inode, blkoff, out_bh); + if (!create || ret != -ENOENT) + return ret; + + ret = nilfs_mdt_create_block(inode, blkoff, out_bh, init_block); + if (unlikely(ret == -EEXIST)) { + /* create = 0; */ /* limit read-create loop retries */ + goto retry; + } + return ret; +} + +/** + * nilfs_mdt_delete_block - make a hole on the meta data file. + * @inode: inode of the meta data file + * @block: block offset + * + * Return Value: On success, zero is returned. + * On error, one of the following negative error code is returned. + * + * %-ENOMEM - Insufficient memory available. + * + * %-EIO - I/O error + * + * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) + */ +int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + int err; + + err = nilfs_bmap_delete(ii->i_bmap, block); + if (likely(!err)) { + nilfs_mdt_mark_dirty(inode); + nilfs_mdt_forget_block(inode, block); + } + return err; +} + +/** + * nilfs_mdt_forget_block - discard dirty state and try to remove the page + * @inode: inode of the meta data file + * @block: block offset + * + * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and + * tries to release the page including the buffer from a page cache. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error code is returned. + * + * %-EBUSY - page has an active buffer. + * + * %-ENOENT - page cache has no page addressed by the offset. + */ +int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) +{ + pgoff_t index = (pgoff_t)block >> + (PAGE_CACHE_SHIFT - inode->i_blkbits); + struct page *page; + unsigned long first_block; + int ret = 0; + int still_dirty; + + page = find_lock_page(inode->i_mapping, index); + if (!page) + return -ENOENT; + + wait_on_page_writeback(page); + + first_block = (unsigned long)index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + if (page_has_buffers(page)) { + struct buffer_head *bh; + + bh = nilfs_page_get_nth_block(page, block - first_block); + nilfs_forget_buffer(bh); + } + still_dirty = PageDirty(page); + unlock_page(page); + page_cache_release(page); + + if (still_dirty || + invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0) + ret = -EBUSY; + return ret; +} + +/** + * nilfs_mdt_mark_block_dirty - mark a block on the meta data file dirty. + * @inode: inode of the meta data file + * @block: block offset + * + * Return Value: On success, it returns 0. On error, the following negative + * error code is returned. + * + * %-ENOMEM - Insufficient memory available. + * + * %-EIO - I/O error + * + * %-ENOENT - the specified block does not exist (hole block) + * + * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) + */ +int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) +{ + struct buffer_head *bh; + int err; + + err = nilfs_mdt_read_block(inode, block, &bh); + if (unlikely(err)) + return err; + nilfs_mark_buffer_dirty(bh); + nilfs_mdt_mark_dirty(inode); + brelse(bh); + return 0; +} + +int nilfs_mdt_fetch_dirty(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + + if (nilfs_bmap_test_and_clear_dirty(ii->i_bmap)) { + set_bit(NILFS_I_DIRTY, &ii->i_state); + return 1; + } + return test_bit(NILFS_I_DIRTY, &ii->i_state); +} + +static int +nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = container_of(page->mapping, + struct inode, i_data); + struct super_block *sb = inode->i_sb; + struct nilfs_sb_info *writer = NULL; + int err = 0; + + redirty_page_for_writepage(wbc, page); + unlock_page(page); + + if (page->mapping->assoc_mapping) + return 0; /* Do not request flush for shadow page cache */ + if (!sb) { + writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs); + if (!writer) + return -EROFS; + sb = writer->s_super; + } + + if (wbc->sync_mode == WB_SYNC_ALL) + err = nilfs_construct_segment(sb); + else if (wbc->for_reclaim) + nilfs_flush_segment(sb, inode->i_ino); + + if (writer) + nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs); + return err; +} + + +static struct address_space_operations def_mdt_aops = { + .writepage = nilfs_mdt_write_page, +}; + +static struct inode_operations def_mdt_iops; +static struct file_operations def_mdt_fops; + +/* + * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile, + * ifile, or gcinodes. This allows the B-tree code and segment constructor + * to treat them like regular files, and this helps to simplify the + * implementation. + * On the other hand, some of the pseudo inodes have an irregular point: + * They don't have valid inode->i_sb pointer because their lifetimes are + * longer than those of the super block structs; they may continue for + * several consecutive mounts/umounts. This would need discussions. + */ +struct inode * +nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, + ino_t ino, gfp_t gfp_mask) +{ + struct inode *inode = nilfs_alloc_inode(sb); + + if (!inode) + return NULL; + else { + struct address_space * const mapping = &inode->i_data; + struct nilfs_mdt_info *mi = kzalloc(sizeof(*mi), GFP_NOFS); + + if (!mi) { + nilfs_destroy_inode(inode); + return NULL; + } + mi->mi_nilfs = nilfs; + init_rwsem(&mi->mi_sem); + + inode->i_sb = sb; /* sb may be NULL for some meta data files */ + inode->i_blkbits = nilfs->ns_blocksize_bits; + inode->i_flags = 0; + atomic_set(&inode->i_count, 1); + inode->i_nlink = 1; + inode->i_ino = ino; + inode->i_mode = S_IFREG; + inode->i_private = mi; + +#ifdef INIT_UNUSED_INODE_FIELDS + atomic_set(&inode->i_writecount, 0); + inode->i_size = 0; + inode->i_blocks = 0; + inode->i_bytes = 0; + inode->i_generation = 0; +#ifdef CONFIG_QUOTA + memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); +#endif + inode->i_pipe = NULL; + inode->i_bdev = NULL; + inode->i_cdev = NULL; + inode->i_rdev = 0; +#ifdef CONFIG_SECURITY + inode->i_security = NULL; +#endif + inode->dirtied_when = 0; + + INIT_LIST_HEAD(&inode->i_list); + INIT_LIST_HEAD(&inode->i_sb_list); + inode->i_state = 0; +#endif + + spin_lock_init(&inode->i_lock); + mutex_init(&inode->i_mutex); + init_rwsem(&inode->i_alloc_sem); + + mapping->host = NULL; /* instead of inode */ + mapping->flags = 0; + mapping_set_gfp_mask(mapping, gfp_mask); + mapping->assoc_mapping = NULL; + mapping->backing_dev_info = nilfs->ns_bdi; + + inode->i_mapping = mapping; + } + + return inode; +} + +struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, + ino_t ino, gfp_t gfp_mask) +{ + struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, gfp_mask); + + if (!inode) + return NULL; + + inode->i_op = &def_mdt_iops; + inode->i_fop = &def_mdt_fops; + inode->i_mapping->a_ops = &def_mdt_aops; + return inode; +} + +void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, + unsigned header_size) +{ + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + + mi->mi_entry_size = entry_size; + mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size; + mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); +} + +void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow) +{ + shadow->i_mapping->assoc_mapping = orig->i_mapping; + NILFS_I(shadow)->i_btnode_cache.assoc_mapping = + &NILFS_I(orig)->i_btnode_cache; +} + +void nilfs_mdt_clear(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + + invalidate_mapping_pages(inode->i_mapping, 0, -1); + truncate_inode_pages(inode->i_mapping, 0); + + nilfs_bmap_clear(ii->i_bmap); + nilfs_btnode_cache_clear(&ii->i_btnode_cache); +} + +void nilfs_mdt_destroy(struct inode *inode) +{ + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + + kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ + kfree(mdi); + nilfs_destroy_inode(inode); +} diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h new file mode 100644 index 00000000000..df683e0bca6 --- /dev/null +++ b/fs/nilfs2/mdt.h @@ -0,0 +1,125 @@ +/* + * mdt.h - NILFS meta data file prototype and definitions + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + */ + +#ifndef _NILFS_MDT_H +#define _NILFS_MDT_H + +#include +#include +#include "nilfs.h" +#include "page.h" + +/** + * struct nilfs_mdt_info - on-memory private data of meta data files + * @mi_nilfs: back pointer to the_nilfs struct + * @mi_sem: reader/writer semaphore for meta data operations + * @mi_bgl: per-blockgroup locking + * @mi_entry_size: size of an entry + * @mi_first_entry_offset: offset to the first entry + * @mi_entries_per_block: number of entries in a block + * @mi_blocks_per_group: number of blocks in a group + * @mi_blocks_per_desc_block: number of blocks per descriptor block + */ +struct nilfs_mdt_info { + struct the_nilfs *mi_nilfs; + struct rw_semaphore mi_sem; + struct blockgroup_lock *mi_bgl; + unsigned mi_entry_size; + unsigned mi_first_entry_offset; + unsigned long mi_entries_per_block; + unsigned long mi_blocks_per_group; + unsigned long mi_blocks_per_desc_block; +}; + +static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) +{ + return inode->i_private; +} + +static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + + return sb ? NILFS_SB(sb)->s_nilfs : NILFS_MDT(inode)->mi_nilfs; +} + +/* Default GFP flags using highmem */ +#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) + +int nilfs_mdt_get_block(struct inode *, unsigned long, int, + void (*init_block)(struct inode *, + struct buffer_head *, void *), + struct buffer_head **); +int nilfs_mdt_delete_block(struct inode *, unsigned long); +int nilfs_mdt_forget_block(struct inode *, unsigned long); +int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); +int nilfs_mdt_fetch_dirty(struct inode *); + +struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t, + gfp_t); +struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, + ino_t, gfp_t); +void nilfs_mdt_destroy(struct inode *); +void nilfs_mdt_clear(struct inode *); +void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); +void nilfs_mdt_set_shadow(struct inode *, struct inode *); + + +#define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh) + +static inline void nilfs_mdt_mark_dirty(struct inode *inode) +{ + if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state)) + set_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state); +} + +static inline void nilfs_mdt_clear_dirty(struct inode *inode) +{ + clear_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state); +} + +static inline __u64 nilfs_mdt_cno(struct inode *inode) +{ + return NILFS_MDT(inode)->mi_nilfs->ns_cno; +} + +#define nilfs_mdt_bgl_lock(inode, bg) \ + (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock) + + +static inline int +nilfs_mdt_read_inode_direct(struct inode *inode, struct buffer_head *bh, + unsigned n) +{ + return nilfs_read_inode_common( + inode, (struct nilfs_inode *)(bh->b_data + n)); +} + +static inline void +nilfs_mdt_write_inode_direct(struct inode *inode, struct buffer_head *bh, + unsigned n) +{ + nilfs_write_inode_common( + inode, (struct nilfs_inode *)(bh->b_data + n), 1); +} + +#endif /* _NILFS_MDT_H */ -- cgit v1.2.3 From 5442680fd23abc01f29af585cc1a2f793bd74565 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:29 -0700 Subject: nilfs2: persistent object allocator This adds common functions to allocate or deallocate entries with bitmaps on a meta data file. This feature is used by the DAT and ifile. Signed-off-by: Koji Sato Signed-off-by: Ryusuke Konishi Signed-off-by: Yoshiji Amagai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/alloc.c | 504 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/alloc.h | 72 ++++++++ 2 files changed, 576 insertions(+) create mode 100644 fs/nilfs2/alloc.c create mode 100644 fs/nilfs2/alloc.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c new file mode 100644 index 00000000000..d69e6ae5925 --- /dev/null +++ b/fs/nilfs2/alloc.c @@ -0,0 +1,504 @@ +/* + * alloc.c - NILFS dat/inode allocator + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Original code was written by Koji Sato . + * Two allocators were unified by Ryusuke Konishi , + * Amagai Yoshiji . + */ + +#include +#include +#include +#include +#include "mdt.h" +#include "alloc.h" + + +static inline unsigned long +nilfs_palloc_groups_per_desc_block(const struct inode *inode) +{ + return (1UL << inode->i_blkbits) / + sizeof(struct nilfs_palloc_group_desc); +} + +static inline unsigned long +nilfs_palloc_groups_count(const struct inode *inode) +{ + return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */)); +} + +int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size) +{ + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + + mi->mi_bgl = kmalloc(sizeof(*mi->mi_bgl), GFP_NOFS); + if (!mi->mi_bgl) + return -ENOMEM; + + bgl_lock_init(mi->mi_bgl); + + nilfs_mdt_set_entry_size(inode, entry_size, 0); + + mi->mi_blocks_per_group = + DIV_ROUND_UP(nilfs_palloc_entries_per_group(inode), + mi->mi_entries_per_block) + 1; + /* Number of blocks in a group including entry blocks and + a bitmap block */ + mi->mi_blocks_per_desc_block = + nilfs_palloc_groups_per_desc_block(inode) * + mi->mi_blocks_per_group + 1; + /* Number of blocks per descriptor including the + descriptor block */ + return 0; +} + +static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, + unsigned long *offset) +{ + __u64 group = nr; + + *offset = do_div(group, nilfs_palloc_entries_per_group(inode)); + return group; +} + +static unsigned long +nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) +{ + unsigned long desc_block = + group / nilfs_palloc_groups_per_desc_block(inode); + return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block; +} + +static unsigned long +nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) +{ + unsigned long desc_offset = + group % nilfs_palloc_groups_per_desc_block(inode); + return nilfs_palloc_desc_blkoff(inode, group) + 1 + + desc_offset * NILFS_MDT(inode)->mi_blocks_per_group; +} + +static unsigned long +nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group, + const struct nilfs_palloc_group_desc *desc) +{ + unsigned long nfree; + + spin_lock(nilfs_mdt_bgl_lock(inode, group)); + nfree = le32_to_cpu(desc->pg_nfrees); + spin_unlock(nilfs_mdt_bgl_lock(inode, group)); + return nfree; +} + +static void +nilfs_palloc_group_desc_add_entries(struct inode *inode, + unsigned long group, + struct nilfs_palloc_group_desc *desc, + u32 n) +{ + spin_lock(nilfs_mdt_bgl_lock(inode, group)); + le32_add_cpu(&desc->pg_nfrees, n); + spin_unlock(nilfs_mdt_bgl_lock(inode, group)); +} + +static unsigned long +nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) +{ + unsigned long group, group_offset; + + group = nilfs_palloc_group(inode, nr, &group_offset); + + return nilfs_palloc_bitmap_blkoff(inode, group) + 1 + + group_offset / NILFS_MDT(inode)->mi_entries_per_block; +} + +static void nilfs_palloc_desc_block_init(struct inode *inode, + struct buffer_head *bh, void *kaddr) +{ + struct nilfs_palloc_group_desc *desc = kaddr + bh_offset(bh); + unsigned long n = nilfs_palloc_groups_per_desc_block(inode); + __le32 nfrees; + + nfrees = cpu_to_le32(nilfs_palloc_entries_per_group(inode)); + while (n-- > 0) { + desc->pg_nfrees = nfrees; + desc++; + } +} + +static int nilfs_palloc_get_desc_block(struct inode *inode, + unsigned long group, + int create, struct buffer_head **bhp) +{ + return nilfs_mdt_get_block(inode, + nilfs_palloc_desc_blkoff(inode, group), + create, nilfs_palloc_desc_block_init, bhp); +} + +static int nilfs_palloc_get_bitmap_block(struct inode *inode, + unsigned long group, + int create, struct buffer_head **bhp) +{ + return nilfs_mdt_get_block(inode, + nilfs_palloc_bitmap_blkoff(inode, group), + create, NULL, bhp); +} + +int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, + int create, struct buffer_head **bhp) +{ + return nilfs_mdt_get_block(inode, nilfs_palloc_entry_blkoff(inode, nr), + create, NULL, bhp); +} + +static struct nilfs_palloc_group_desc * +nilfs_palloc_block_get_group_desc(const struct inode *inode, + unsigned long group, + const struct buffer_head *bh, void *kaddr) +{ + return (struct nilfs_palloc_group_desc *)(kaddr + bh_offset(bh)) + + group % nilfs_palloc_groups_per_desc_block(inode); +} + +static unsigned char * +nilfs_palloc_block_get_bitmap(const struct inode *inode, + const struct buffer_head *bh, void *kaddr) +{ + return (unsigned char *)(kaddr + bh_offset(bh)); +} + +void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, + const struct buffer_head *bh, void *kaddr) +{ + unsigned long entry_offset, group_offset; + + nilfs_palloc_group(inode, nr, &group_offset); + entry_offset = group_offset % NILFS_MDT(inode)->mi_entries_per_block; + + return kaddr + bh_offset(bh) + + entry_offset * NILFS_MDT(inode)->mi_entry_size; +} + +static int nilfs_palloc_find_available_slot(struct inode *inode, + unsigned long group, + unsigned long target, + unsigned char *bitmap, + int bsize) /* size in bits */ +{ + int curr, pos, end, i; + + if (target > 0) { + end = (target + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1); + if (end > bsize) + end = bsize; + pos = nilfs_find_next_zero_bit(bitmap, end, target); + if (pos < end && + !nilfs_set_bit_atomic( + nilfs_mdt_bgl_lock(inode, group), pos, bitmap)) + return pos; + } else + end = 0; + + for (i = 0, curr = end; + i < bsize; + i += BITS_PER_LONG, curr += BITS_PER_LONG) { + /* wrap around */ + if (curr >= bsize) + curr = 0; + while (*((unsigned long *)bitmap + curr / BITS_PER_LONG) + != ~0UL) { + end = curr + BITS_PER_LONG; + if (end > bsize) + end = bsize; + pos = nilfs_find_next_zero_bit(bitmap, end, curr); + if ((pos < end) && + !nilfs_set_bit_atomic( + nilfs_mdt_bgl_lock(inode, group), pos, + bitmap)) + return pos; + } + } + return -ENOSPC; +} + +static unsigned long +nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, + unsigned long curr, unsigned long max) +{ + return min_t(unsigned long, + nilfs_palloc_groups_per_desc_block(inode) - + curr % nilfs_palloc_groups_per_desc_block(inode), + max - curr + 1); +} + +int nilfs_palloc_prepare_alloc_entry(struct inode *inode, + struct nilfs_palloc_req *req) +{ + struct buffer_head *desc_bh, *bitmap_bh; + struct nilfs_palloc_group_desc *desc; + unsigned char *bitmap; + void *desc_kaddr, *bitmap_kaddr; + unsigned long group, maxgroup, ngroups; + unsigned long group_offset, maxgroup_offset; + unsigned long n, entries_per_group, groups_per_desc_block; + unsigned long i, j; + int pos, ret; + + ngroups = nilfs_palloc_groups_count(inode); + maxgroup = ngroups - 1; + group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); + entries_per_group = nilfs_palloc_entries_per_group(inode); + groups_per_desc_block = nilfs_palloc_groups_per_desc_block(inode); + + for (i = 0; i < ngroups; i += n) { + if (group >= ngroups) { + /* wrap around */ + group = 0; + maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr, + &maxgroup_offset) - 1; + } + ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh); + if (ret < 0) + return ret; + desc_kaddr = kmap(desc_bh->b_page); + desc = nilfs_palloc_block_get_group_desc( + inode, group, desc_bh, desc_kaddr); + n = nilfs_palloc_rest_groups_in_desc_block(inode, group, + maxgroup); + for (j = 0; j < n; j++, desc++, group++) { + if (nilfs_palloc_group_desc_nfrees(inode, group, desc) + > 0) { + ret = nilfs_palloc_get_bitmap_block( + inode, group, 1, &bitmap_bh); + if (ret < 0) + goto out_desc; + bitmap_kaddr = kmap(bitmap_bh->b_page); + bitmap = nilfs_palloc_block_get_bitmap( + inode, bitmap_bh, bitmap_kaddr); + pos = nilfs_palloc_find_available_slot( + inode, group, group_offset, bitmap, + entries_per_group); + if (pos >= 0) { + /* found a free entry */ + nilfs_palloc_group_desc_add_entries( + inode, group, desc, -1); + req->pr_entry_nr = + entries_per_group * group + pos; + kunmap(desc_bh->b_page); + kunmap(bitmap_bh->b_page); + + req->pr_desc_bh = desc_bh; + req->pr_bitmap_bh = bitmap_bh; + return 0; + } + kunmap(bitmap_bh->b_page); + brelse(bitmap_bh); + } + + group_offset = 0; + } + + kunmap(desc_bh->b_page); + brelse(desc_bh); + } + + /* no entries left */ + return -ENOSPC; + + out_desc: + kunmap(desc_bh->b_page); + brelse(desc_bh); + return ret; +} + +void nilfs_palloc_commit_alloc_entry(struct inode *inode, + struct nilfs_palloc_req *req) +{ + nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); + nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); + nilfs_mdt_mark_dirty(inode); + + brelse(req->pr_bitmap_bh); + brelse(req->pr_desc_bh); +} + +void nilfs_palloc_commit_free_entry(struct inode *inode, + struct nilfs_palloc_req *req) +{ + struct nilfs_palloc_group_desc *desc; + unsigned long group, group_offset; + unsigned char *bitmap; + void *desc_kaddr, *bitmap_kaddr; + + group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); + desc_kaddr = kmap(req->pr_desc_bh->b_page); + desc = nilfs_palloc_block_get_group_desc(inode, group, + req->pr_desc_bh, desc_kaddr); + bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); + bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, + bitmap_kaddr); + + if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), + group_offset, bitmap)) + printk(KERN_WARNING "%s: entry number %llu already freed\n", + __func__, (unsigned long long)req->pr_entry_nr); + + nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); + + kunmap(req->pr_bitmap_bh->b_page); + kunmap(req->pr_desc_bh->b_page); + + nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); + nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); + nilfs_mdt_mark_dirty(inode); + + brelse(req->pr_bitmap_bh); + brelse(req->pr_desc_bh); +} + +void nilfs_palloc_abort_alloc_entry(struct inode *inode, + struct nilfs_palloc_req *req) +{ + struct nilfs_palloc_group_desc *desc; + void *desc_kaddr, *bitmap_kaddr; + unsigned char *bitmap; + unsigned long group, group_offset; + + group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); + desc_kaddr = kmap(req->pr_desc_bh->b_page); + desc = nilfs_palloc_block_get_group_desc(inode, group, + req->pr_desc_bh, desc_kaddr); + bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); + bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, + bitmap_kaddr); + if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), + group_offset, bitmap)) + printk(KERN_WARNING "%s: entry numer %llu already freed\n", + __func__, (unsigned long long)req->pr_entry_nr); + + nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); + + kunmap(req->pr_bitmap_bh->b_page); + kunmap(req->pr_desc_bh->b_page); + + brelse(req->pr_bitmap_bh); + brelse(req->pr_desc_bh); + + req->pr_entry_nr = 0; + req->pr_bitmap_bh = NULL; + req->pr_desc_bh = NULL; +} + +int nilfs_palloc_prepare_free_entry(struct inode *inode, + struct nilfs_palloc_req *req) +{ + struct buffer_head *desc_bh, *bitmap_bh; + unsigned long group, group_offset; + int ret; + + group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); + ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh); + if (ret < 0) + return ret; + ret = nilfs_palloc_get_bitmap_block(inode, group, 1, &bitmap_bh); + if (ret < 0) { + brelse(desc_bh); + return ret; + } + + req->pr_desc_bh = desc_bh; + req->pr_bitmap_bh = bitmap_bh; + return 0; +} + +void nilfs_palloc_abort_free_entry(struct inode *inode, + struct nilfs_palloc_req *req) +{ + brelse(req->pr_bitmap_bh); + brelse(req->pr_desc_bh); + + req->pr_entry_nr = 0; + req->pr_bitmap_bh = NULL; + req->pr_desc_bh = NULL; +} + +static int +nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr) +{ + __u64 first, last; + + first = group * nilfs_palloc_entries_per_group(inode); + last = first + nilfs_palloc_entries_per_group(inode) - 1; + return (nr >= first) && (nr <= last); +} + +int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) +{ + struct buffer_head *desc_bh, *bitmap_bh; + struct nilfs_palloc_group_desc *desc; + unsigned char *bitmap; + void *desc_kaddr, *bitmap_kaddr; + unsigned long group, group_offset; + int i, j, n, ret; + + for (i = 0; i < nitems; i += n) { + group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset); + ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh); + if (ret < 0) + return ret; + ret = nilfs_palloc_get_bitmap_block(inode, group, 0, + &bitmap_bh); + if (ret < 0) { + brelse(desc_bh); + return ret; + } + desc_kaddr = kmap(desc_bh->b_page); + desc = nilfs_palloc_block_get_group_desc( + inode, group, desc_bh, desc_kaddr); + bitmap_kaddr = kmap(bitmap_bh->b_page); + bitmap = nilfs_palloc_block_get_bitmap( + inode, bitmap_bh, bitmap_kaddr); + for (j = i, n = 0; + (j < nitems) && nilfs_palloc_group_is_in(inode, group, + entry_nrs[j]); + j++, n++) { + nilfs_palloc_group(inode, entry_nrs[j], &group_offset); + if (!nilfs_clear_bit_atomic( + nilfs_mdt_bgl_lock(inode, group), + group_offset, bitmap)) { + printk(KERN_WARNING + "%s: entry number %llu already freed\n", + __func__, + (unsigned long long)entry_nrs[j]); + } + } + nilfs_palloc_group_desc_add_entries(inode, group, desc, n); + + kunmap(bitmap_bh->b_page); + kunmap(desc_bh->b_page); + + nilfs_mdt_mark_buffer_dirty(desc_bh); + nilfs_mdt_mark_buffer_dirty(bitmap_bh); + nilfs_mdt_mark_dirty(inode); + + brelse(bitmap_bh); + brelse(desc_bh); + } + return 0; +} diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h new file mode 100644 index 00000000000..4ace5475c2c --- /dev/null +++ b/fs/nilfs2/alloc.h @@ -0,0 +1,72 @@ +/* + * alloc.h - persistent object (dat entry/disk inode) allocator/deallocator + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Original code was written by Koji Sato . + * Two allocators were unified by Ryusuke Konishi , + * Amagai Yoshiji . + */ + +#ifndef _NILFS_ALLOC_H +#define _NILFS_ALLOC_H + +#include +#include +#include + +static inline unsigned long +nilfs_palloc_entries_per_group(const struct inode *inode) +{ + return 1UL << (inode->i_blkbits + 3 /* log2(8 = CHAR_BITS) */); +} + +int nilfs_palloc_init_blockgroup(struct inode *, unsigned); +int nilfs_palloc_get_entry_block(struct inode *, __u64, int, + struct buffer_head **); +void *nilfs_palloc_block_get_entry(const struct inode *, __u64, + const struct buffer_head *, void *); + +/** + * nilfs_palloc_req - persistent alloctor request and reply + * @pr_entry_nr: entry number (vblocknr or inode number) + * @pr_desc_bh: buffer head of the buffer containing block group descriptors + * @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap + * @pr_entry_bh: buffer head of the buffer containing translation entries + */ +struct nilfs_palloc_req { + __u64 pr_entry_nr; + struct buffer_head *pr_desc_bh; + struct buffer_head *pr_bitmap_bh; + struct buffer_head *pr_entry_bh; +}; + +int nilfs_palloc_prepare_alloc_entry(struct inode *, + struct nilfs_palloc_req *); +void nilfs_palloc_commit_alloc_entry(struct inode *, + struct nilfs_palloc_req *); +void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *); +void nilfs_palloc_commit_free_entry(struct inode *, struct nilfs_palloc_req *); +int nilfs_palloc_prepare_free_entry(struct inode *, struct nilfs_palloc_req *); +void nilfs_palloc_abort_free_entry(struct inode *, struct nilfs_palloc_req *); +int nilfs_palloc_freev(struct inode *, __u64 *, size_t); + +#define nilfs_set_bit_atomic ext2_set_bit_atomic +#define nilfs_clear_bit_atomic ext2_clear_bit_atomic +#define nilfs_find_next_zero_bit ext2_find_next_zero_bit + +#endif /* _NILFS_ALLOC_H */ -- cgit v1.2.3 From a17564f58b11476c011d623fa1f268602a81c27c Mon Sep 17 00:00:00 2001 From: Koji Sato Date: Mon, 6 Apr 2009 19:01:30 -0700 Subject: nilfs2: disk address translator This adds the disk address translation file (DAT) whose primary function is to convert virtual disk block numbers to actual disk block numbers. The virtual block numbers of NILFS are associated with checkpoint generation numbers, and this file also provides functions to manage the lifetime information of each virtual block number. Signed-off-by: Koji Sato Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/dat.c | 429 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/dat.h | 52 +++++++ 2 files changed, 481 insertions(+) create mode 100644 fs/nilfs2/dat.c create mode 100644 fs/nilfs2/dat.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c new file mode 100644 index 00000000000..9360920f7d3 --- /dev/null +++ b/fs/nilfs2/dat.c @@ -0,0 +1,429 @@ +/* + * dat.c - NILFS disk address translation. + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#include +#include +#include +#include +#include "nilfs.h" +#include "mdt.h" +#include "alloc.h" +#include "dat.h" + + +#define NILFS_CNO_MIN ((__u64)1) +#define NILFS_CNO_MAX (~(__u64)0) + +static int nilfs_dat_prepare_entry(struct inode *dat, + struct nilfs_palloc_req *req, int create) +{ + return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, + create, &req->pr_entry_bh); +} + +static void nilfs_dat_commit_entry(struct inode *dat, + struct nilfs_palloc_req *req) +{ + nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh); + nilfs_mdt_mark_dirty(dat); + brelse(req->pr_entry_bh); +} + +static void nilfs_dat_abort_entry(struct inode *dat, + struct nilfs_palloc_req *req) +{ + brelse(req->pr_entry_bh); +} + +int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) +{ + int ret; + + ret = nilfs_palloc_prepare_alloc_entry(dat, req); + if (ret < 0) + return ret; + + ret = nilfs_dat_prepare_entry(dat, req, 1); + if (ret < 0) + nilfs_palloc_abort_alloc_entry(dat, req); + + return ret; +} + +void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) +{ + struct nilfs_dat_entry *entry; + void *kaddr; + + kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, + req->pr_entry_bh, kaddr); + entry->de_start = cpu_to_le64(NILFS_CNO_MIN); + entry->de_end = cpu_to_le64(NILFS_CNO_MAX); + entry->de_blocknr = cpu_to_le64(0); + kunmap_atomic(kaddr, KM_USER0); + + nilfs_palloc_commit_alloc_entry(dat, req); + nilfs_dat_commit_entry(dat, req); +} + +void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) +{ + nilfs_dat_abort_entry(dat, req); + nilfs_palloc_abort_alloc_entry(dat, req); +} + +int nilfs_dat_prepare_free(struct inode *dat, struct nilfs_palloc_req *req) +{ + int ret; + + ret = nilfs_palloc_prepare_free_entry(dat, req); + if (ret < 0) + return ret; + ret = nilfs_dat_prepare_entry(dat, req, 0); + if (ret < 0) { + nilfs_palloc_abort_free_entry(dat, req); + return ret; + } + return 0; +} + +void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) +{ + struct nilfs_dat_entry *entry; + void *kaddr; + + kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, + req->pr_entry_bh, kaddr); + entry->de_start = cpu_to_le64(NILFS_CNO_MIN); + entry->de_end = cpu_to_le64(NILFS_CNO_MIN); + entry->de_blocknr = cpu_to_le64(0); + kunmap_atomic(kaddr, KM_USER0); + + nilfs_dat_commit_entry(dat, req); + nilfs_palloc_commit_free_entry(dat, req); +} + +void nilfs_dat_abort_free(struct inode *dat, struct nilfs_palloc_req *req) +{ + nilfs_dat_abort_entry(dat, req); + nilfs_palloc_abort_free_entry(dat, req); +} + +int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) +{ + int ret; + + ret = nilfs_dat_prepare_entry(dat, req, 0); + BUG_ON(ret == -ENOENT); + return ret; +} + +void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, + sector_t blocknr) +{ + struct nilfs_dat_entry *entry; + void *kaddr; + + kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, + req->pr_entry_bh, kaddr); + entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); + if (entry->de_blocknr != cpu_to_le64(0) || + entry->de_end != cpu_to_le64(NILFS_CNO_MAX)) { + printk(KERN_CRIT + "%s: vbn = %llu, start = %llu, end = %llu, pbn = %llu\n", + __func__, (unsigned long long)req->pr_entry_nr, + (unsigned long long)le64_to_cpu(entry->de_start), + (unsigned long long)le64_to_cpu(entry->de_end), + (unsigned long long)le64_to_cpu(entry->de_blocknr)); + BUG(); + } + entry->de_blocknr = cpu_to_le64(blocknr); + kunmap_atomic(kaddr, KM_USER0); + + nilfs_dat_commit_entry(dat, req); +} + +void nilfs_dat_abort_start(struct inode *dat, struct nilfs_palloc_req *req) +{ + nilfs_dat_abort_entry(dat, req); +} + +int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) +{ + struct nilfs_dat_entry *entry; + __u64 start; + sector_t blocknr; + void *kaddr; + int ret; + + ret = nilfs_dat_prepare_entry(dat, req, 0); + if (ret < 0) { + BUG_ON(ret == -ENOENT); + return ret; + } + + kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, + req->pr_entry_bh, kaddr); + start = le64_to_cpu(entry->de_start); + blocknr = le64_to_cpu(entry->de_blocknr); + kunmap_atomic(kaddr, KM_USER0); + + if (blocknr == 0) { + ret = nilfs_palloc_prepare_free_entry(dat, req); + if (ret < 0) { + nilfs_dat_abort_entry(dat, req); + return ret; + } + } + + return 0; +} + +void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, + int dead) +{ + struct nilfs_dat_entry *entry; + __u64 start, end; + sector_t blocknr; + void *kaddr; + + kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, + req->pr_entry_bh, kaddr); + end = start = le64_to_cpu(entry->de_start); + if (!dead) { + end = nilfs_mdt_cno(dat); + BUG_ON(start > end); + } + entry->de_end = cpu_to_le64(end); + blocknr = le64_to_cpu(entry->de_blocknr); + kunmap_atomic(kaddr, KM_USER0); + + if (blocknr == 0) + nilfs_dat_commit_free(dat, req); + else + nilfs_dat_commit_entry(dat, req); +} + +void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) +{ + struct nilfs_dat_entry *entry; + __u64 start; + sector_t blocknr; + void *kaddr; + + kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); + entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, + req->pr_entry_bh, kaddr); + start = le64_to_cpu(entry->de_start); + blocknr = le64_to_cpu(entry->de_blocknr); + kunmap_atomic(kaddr, KM_USER0); + + if (start == nilfs_mdt_cno(dat) && blocknr == 0) + nilfs_palloc_abort_free_entry(dat, req); + nilfs_dat_abort_entry(dat, req); +} + +/** + * nilfs_dat_mark_dirty - + * @dat: DAT file inode + * @vblocknr: virtual block number + * + * Description: + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) +{ + struct nilfs_palloc_req req; + int ret; + + req.pr_entry_nr = vblocknr; + ret = nilfs_dat_prepare_entry(dat, &req, 0); + if (ret == 0) + nilfs_dat_commit_entry(dat, &req); + return ret; +} + +/** + * nilfs_dat_freev - free virtual block numbers + * @dat: DAT file inode + * @vblocknrs: array of virtual block numbers + * @nitems: number of virtual block numbers + * + * Description: nilfs_dat_freev() frees the virtual block numbers specified by + * @vblocknrs and @nitems. + * + * Return Value: On success, 0 is returned. On error, one of the following + * nagative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - The virtual block number have not been allocated. + */ +int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) +{ + return nilfs_palloc_freev(dat, vblocknrs, nitems); +} + +/** + * nilfs_dat_move - change a block number + * @dat: DAT file inode + * @vblocknr: virtual block number + * @blocknr: block number + * + * Description: nilfs_dat_move() changes the block number associated with + * @vblocknr to @blocknr. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) +{ + struct buffer_head *entry_bh; + struct nilfs_dat_entry *entry; + void *kaddr; + int ret; + + ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); + if (ret < 0) + return ret; + kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); + entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); + if (entry->de_blocknr == cpu_to_le64(0)) { + printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__, + (unsigned long long)vblocknr, + (unsigned long long)le64_to_cpu(entry->de_start), + (unsigned long long)le64_to_cpu(entry->de_end)); + BUG(); + } + BUG_ON(blocknr == 0); + entry->de_blocknr = cpu_to_le64(blocknr); + kunmap_atomic(kaddr, KM_USER0); + + nilfs_mdt_mark_buffer_dirty(entry_bh); + nilfs_mdt_mark_dirty(dat); + + brelse(entry_bh); + + return 0; +} + +/** + * nilfs_dat_translate - translate a virtual block number to a block number + * @dat: DAT file inode + * @vblocknr: virtual block number + * @blocknrp: pointer to a block number + * + * Description: nilfs_dat_translate() maps the virtual block number @vblocknr + * to the corresponding block number. + * + * Return Value: On success, 0 is returned and the block number associated + * with @vblocknr is stored in the place pointed by @blocknrp. On error, one + * of the following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - A block number associated with @vblocknr does not exist. + */ +int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) +{ + struct buffer_head *entry_bh; + struct nilfs_dat_entry *entry; + sector_t blocknr; + void *kaddr; + int ret; + + ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); + if (ret < 0) + return ret; + + kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); + entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); + blocknr = le64_to_cpu(entry->de_blocknr); + if (blocknr == 0) { + ret = -ENOENT; + goto out; + } + if (blocknrp != NULL) + *blocknrp = blocknr; + + out: + kunmap_atomic(kaddr, KM_USER0); + brelse(entry_bh); + return ret; +} + +ssize_t nilfs_dat_get_vinfo(struct inode *dat, struct nilfs_vinfo *vinfo, + size_t nvi) +{ + struct buffer_head *entry_bh; + struct nilfs_dat_entry *entry; + __u64 first, last; + void *kaddr; + unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; + int i, j, n, ret; + + for (i = 0; i < nvi; i += n) { + ret = nilfs_palloc_get_entry_block(dat, vinfo[i].vi_vblocknr, + 0, &entry_bh); + if (ret < 0) + return ret; + kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); + /* last virtual block number in this block */ + first = vinfo[i].vi_vblocknr; + do_div(first, entries_per_block); + first *= entries_per_block; + last = first + entries_per_block - 1; + for (j = i, n = 0; + j < nvi && vinfo[j].vi_vblocknr >= first && + vinfo[j].vi_vblocknr <= last; + j++, n++) { + entry = nilfs_palloc_block_get_entry( + dat, vinfo[j].vi_vblocknr, entry_bh, kaddr); + vinfo[j].vi_start = le64_to_cpu(entry->de_start); + vinfo[j].vi_end = le64_to_cpu(entry->de_end); + vinfo[j].vi_blocknr = le64_to_cpu(entry->de_blocknr); + } + kunmap_atomic(kaddr, KM_USER0); + brelse(entry_bh); + } + + return nvi; +} diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h new file mode 100644 index 00000000000..d9560654a4b --- /dev/null +++ b/fs/nilfs2/dat.h @@ -0,0 +1,52 @@ +/* + * dat.h - NILFS disk address translation. + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#ifndef _NILFS_DAT_H +#define _NILFS_DAT_H + +#include +#include +#include + +#define NILFS_DAT_GFP NILFS_MDT_GFP + +struct nilfs_palloc_req; + +int nilfs_dat_translate(struct inode *, __u64, sector_t *); + +int nilfs_dat_prepare_alloc(struct inode *, struct nilfs_palloc_req *); +void nilfs_dat_commit_alloc(struct inode *, struct nilfs_palloc_req *); +void nilfs_dat_abort_alloc(struct inode *, struct nilfs_palloc_req *); +int nilfs_dat_prepare_start(struct inode *, struct nilfs_palloc_req *); +void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *, + sector_t); +void nilfs_dat_abort_start(struct inode *, struct nilfs_palloc_req *); +int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *); +void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int); +void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *); + +int nilfs_dat_mark_dirty(struct inode *, __u64); +int nilfs_dat_freev(struct inode *, __u64 *, size_t); +int nilfs_dat_move(struct inode *, __u64, sector_t); +ssize_t nilfs_dat_get_vinfo(struct inode *, struct nilfs_vinfo *, size_t); + +#endif /* _NILFS_DAT_H */ -- cgit v1.2.3 From 43bfb45ed4feace26157778889be55e4046b7a4b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:30 -0700 Subject: nilfs2: inode map file This adds a meta data file which stores on-disk inodes in its data blocks. Signed-off-by: Ryusuke Konishi Signed-off-by: Yoshiji Amagai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/ifile.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/ifile.h | 53 +++++++++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 fs/nilfs2/ifile.c create mode 100644 fs/nilfs2/ifile.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c new file mode 100644 index 00000000000..de86401f209 --- /dev/null +++ b/fs/nilfs2/ifile.c @@ -0,0 +1,150 @@ +/* + * ifile.c - NILFS inode file + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Amagai Yoshiji . + * Revised by Ryusuke Konishi . + * + */ + +#include +#include +#include "nilfs.h" +#include "mdt.h" +#include "alloc.h" +#include "ifile.h" + +/** + * nilfs_ifile_create_inode - create a new disk inode + * @ifile: ifile inode + * @out_ino: pointer to a variable to store inode number + * @out_bh: buffer_head contains newly allocated disk inode + * + * Return Value: On success, 0 is returned and the newly allocated inode + * number is stored in the place pointed by @ino, and buffer_head pointer + * that contains newly allocated disk inode structure is stored in the + * place pointed by @out_bh + * On error, one of the following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOSPC - No inode left. + */ +int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, + struct buffer_head **out_bh) +{ + struct nilfs_palloc_req req; + int ret; + + req.pr_entry_nr = 0; /* 0 says find free inode from beginning of + a group. dull code!! */ + req.pr_entry_bh = NULL; + + ret = nilfs_palloc_prepare_alloc_entry(ifile, &req); + if (!ret) { + ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1, + &req.pr_entry_bh); + if (ret < 0) + nilfs_palloc_abort_alloc_entry(ifile, &req); + } + if (ret < 0) { + brelse(req.pr_entry_bh); + return ret; + } + nilfs_palloc_commit_alloc_entry(ifile, &req); + nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); + nilfs_mdt_mark_dirty(ifile); + *out_ino = (ino_t)req.pr_entry_nr; + *out_bh = req.pr_entry_bh; + return 0; +} + +/** + * nilfs_ifile_delete_inode - delete a disk inode + * @ifile: ifile inode + * @ino: inode number + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - The inode number @ino have not been allocated. + */ +int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) +{ + struct nilfs_palloc_req req = { + .pr_entry_nr = ino, .pr_entry_bh = NULL + }; + struct nilfs_inode *raw_inode; + void *kaddr; + int ret; + + ret = nilfs_palloc_prepare_free_entry(ifile, &req); + if (!ret) { + ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 0, + &req.pr_entry_bh); + if (ret < 0) + nilfs_palloc_abort_free_entry(ifile, &req); + } + if (ret < 0) { + brelse(req.pr_entry_bh); + return ret; + } + + kaddr = kmap_atomic(req.pr_entry_bh->b_page, KM_USER0); + raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, + req.pr_entry_bh, kaddr); + raw_inode->i_flags = 0; + kunmap_atomic(kaddr, KM_USER0); + + nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); + brelse(req.pr_entry_bh); + + nilfs_palloc_commit_free_entry(ifile, &req); + + return 0; +} + +int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, + struct buffer_head **out_bh) +{ + struct super_block *sb = ifile->i_sb; + int err; + + if (unlikely(!NILFS_VALID_INODE(sb, ino))) { + nilfs_error(sb, __func__, "bad inode number: %lu", + (unsigned long) ino); + return -EINVAL; + } + + err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh); + if (unlikely(err)) { + if (err == -EINVAL) + nilfs_error(sb, __func__, "ifile is broken"); + else + nilfs_warning(sb, __func__, + "unable to read inode: %lu", + (unsigned long) ino); + } + return err; +} diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h new file mode 100644 index 00000000000..5d30a35679b --- /dev/null +++ b/fs/nilfs2/ifile.h @@ -0,0 +1,53 @@ +/* + * ifile.h - NILFS inode file + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Amagai Yoshiji + * Revised by Ryusuke Konishi + * + */ + +#ifndef _NILFS_IFILE_H +#define _NILFS_IFILE_H + +#include +#include +#include +#include "mdt.h" +#include "alloc.h" + +#define NILFS_IFILE_GFP NILFS_MDT_GFP + +static inline struct nilfs_inode * +nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh) +{ + void *kaddr = kmap(ibh->b_page); + return nilfs_palloc_block_get_entry(ifile, ino, ibh, kaddr); +} + +static inline void nilfs_ifile_unmap_inode(struct inode *ifile, ino_t ino, + struct buffer_head *ibh) +{ + kunmap(ibh->b_page); +} + +int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); +int nilfs_ifile_delete_inode(struct inode *, ino_t); +int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); + +#endif /* _NILFS_IFILE_H */ -- cgit v1.2.3 From 29619809727a4e524e26dbd7bfdc93ff7f50aa74 Mon Sep 17 00:00:00 2001 From: Koji Sato Date: Mon, 6 Apr 2009 19:01:31 -0700 Subject: nilfs2: checkpoint file This adds a meta data file which holds checkpoint entries in its data blocks. Signed-off-by: Koji Sato Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/cpfile.c | 908 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/cpfile.h | 45 +++ 2 files changed, 953 insertions(+) create mode 100644 fs/nilfs2/cpfile.c create mode 100644 fs/nilfs2/cpfile.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c new file mode 100644 index 00000000000..991633aad1d --- /dev/null +++ b/fs/nilfs2/cpfile.c @@ -0,0 +1,908 @@ +/* + * cpfile.c - NILFS checkpoint file. + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#include +#include +#include +#include +#include +#include +#include "mdt.h" +#include "cpfile.h" + + +static inline unsigned long +nilfs_cpfile_checkpoints_per_block(const struct inode *cpfile) +{ + return NILFS_MDT(cpfile)->mi_entries_per_block; +} + +/* block number from the beginning of the file */ +static unsigned long +nilfs_cpfile_get_blkoff(const struct inode *cpfile, __u64 cno) +{ + __u64 tcno; + + BUG_ON(cno == 0); /* checkpoint number 0 is invalid */ + tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; + do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); + return (unsigned long)tcno; +} + +/* offset in block */ +static unsigned long +nilfs_cpfile_get_offset(const struct inode *cpfile, __u64 cno) +{ + __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; + return do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); +} + +static unsigned long +nilfs_cpfile_checkpoints_in_block(const struct inode *cpfile, + __u64 curr, + __u64 max) +{ + return min_t(__u64, + nilfs_cpfile_checkpoints_per_block(cpfile) - + nilfs_cpfile_get_offset(cpfile, curr), + max - curr); +} + +static inline int nilfs_cpfile_is_in_first(const struct inode *cpfile, + __u64 cno) +{ + return nilfs_cpfile_get_blkoff(cpfile, cno) == 0; +} + +static unsigned int +nilfs_cpfile_block_add_valid_checkpoints(const struct inode *cpfile, + struct buffer_head *bh, + void *kaddr, + unsigned int n) +{ + struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); + unsigned int count; + + count = le32_to_cpu(cp->cp_checkpoints_count) + n; + cp->cp_checkpoints_count = cpu_to_le32(count); + return count; +} + +static unsigned int +nilfs_cpfile_block_sub_valid_checkpoints(const struct inode *cpfile, + struct buffer_head *bh, + void *kaddr, + unsigned int n) +{ + struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); + unsigned int count; + + BUG_ON(le32_to_cpu(cp->cp_checkpoints_count) < n); + count = le32_to_cpu(cp->cp_checkpoints_count) - n; + cp->cp_checkpoints_count = cpu_to_le32(count); + return count; +} + +static inline struct nilfs_cpfile_header * +nilfs_cpfile_block_get_header(const struct inode *cpfile, + struct buffer_head *bh, + void *kaddr) +{ + return kaddr + bh_offset(bh); +} + +static struct nilfs_checkpoint * +nilfs_cpfile_block_get_checkpoint(const struct inode *cpfile, __u64 cno, + struct buffer_head *bh, + void *kaddr) +{ + return kaddr + bh_offset(bh) + nilfs_cpfile_get_offset(cpfile, cno) * + NILFS_MDT(cpfile)->mi_entry_size; +} + +static void nilfs_cpfile_block_init(struct inode *cpfile, + struct buffer_head *bh, + void *kaddr) +{ + struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); + size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; + int n = nilfs_cpfile_checkpoints_per_block(cpfile); + + while (n-- > 0) { + nilfs_checkpoint_set_invalid(cp); + cp = (void *)cp + cpsz; + } +} + +static inline int nilfs_cpfile_get_header_block(struct inode *cpfile, + struct buffer_head **bhp) +{ + return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); +} + +static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile, + __u64 cno, + int create, + struct buffer_head **bhp) +{ + return nilfs_mdt_get_block(cpfile, + nilfs_cpfile_get_blkoff(cpfile, cno), + create, nilfs_cpfile_block_init, bhp); +} + +static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile, + __u64 cno) +{ + return nilfs_mdt_delete_block(cpfile, + nilfs_cpfile_get_blkoff(cpfile, cno)); +} + +/** + * nilfs_cpfile_get_checkpoint - get a checkpoint + * @cpfile: inode of checkpoint file + * @cno: checkpoint number + * @create: create flag + * @cpp: pointer to a checkpoint + * @bhp: pointer to a buffer head + * + * Description: nilfs_cpfile_get_checkpoint() acquires the checkpoint + * specified by @cno. A new checkpoint will be created if @cno is the current + * checkpoint number and @create is nonzero. + * + * Return Value: On success, 0 is returned, and the checkpoint and the + * buffer head of the buffer on which the checkpoint is located are stored in + * the place pointed by @cpp and @bhp, respectively. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - No such checkpoint. + */ +int nilfs_cpfile_get_checkpoint(struct inode *cpfile, + __u64 cno, + int create, + struct nilfs_checkpoint **cpp, + struct buffer_head **bhp) +{ + struct buffer_head *header_bh, *cp_bh; + struct nilfs_cpfile_header *header; + struct nilfs_checkpoint *cp; + void *kaddr; + int ret; + + BUG_ON(cno < 1 || cno > nilfs_mdt_cno(cpfile) || + (cno < nilfs_mdt_cno(cpfile) && create)); + + down_write(&NILFS_MDT(cpfile)->mi_sem); + + ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); + if (ret < 0) + goto out_sem; + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, create, &cp_bh); + if (ret < 0) + goto out_header; + kaddr = kmap(cp_bh->b_page); + cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + if (nilfs_checkpoint_invalid(cp)) { + if (!create) { + kunmap(cp_bh->b_page); + brelse(cp_bh); + ret = -ENOENT; + goto out_header; + } + /* a newly-created checkpoint */ + nilfs_checkpoint_clear_invalid(cp); + if (!nilfs_cpfile_is_in_first(cpfile, cno)) + nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, + kaddr, 1); + nilfs_mdt_mark_buffer_dirty(cp_bh); + + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = nilfs_cpfile_block_get_header(cpfile, header_bh, + kaddr); + le64_add_cpu(&header->ch_ncheckpoints, 1); + kunmap_atomic(kaddr, KM_USER0); + nilfs_mdt_mark_buffer_dirty(header_bh); + nilfs_mdt_mark_dirty(cpfile); + } + + if (cpp != NULL) + *cpp = cp; + *bhp = cp_bh; + + out_header: + brelse(header_bh); + + out_sem: + up_write(&NILFS_MDT(cpfile)->mi_sem); + return ret; +} + +/** + * nilfs_cpfile_put_checkpoint - put a checkpoint + * @cpfile: inode of checkpoint file + * @cno: checkpoint number + * @bh: buffer head + * + * Description: nilfs_cpfile_put_checkpoint() releases the checkpoint + * specified by @cno. @bh must be the buffer head which has been returned by + * a previous call to nilfs_cpfile_get_checkpoint() with @cno. + */ +void nilfs_cpfile_put_checkpoint(struct inode *cpfile, __u64 cno, + struct buffer_head *bh) +{ + kunmap(bh->b_page); + brelse(bh); +} + +/** + * nilfs_cpfile_delete_checkpoints - delete checkpoints + * @cpfile: inode of checkpoint file + * @start: start checkpoint number + * @end: end checkpoint numer + * + * Description: nilfs_cpfile_delete_checkpoints() deletes the checkpoints in + * the period from @start to @end, excluding @end itself. The checkpoints + * which have been already deleted are ignored. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - invalid checkpoints. + */ +int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, + __u64 start, + __u64 end) +{ + struct buffer_head *header_bh, *cp_bh; + struct nilfs_cpfile_header *header; + struct nilfs_checkpoint *cp; + size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; + __u64 cno; + void *kaddr; + unsigned long tnicps; + int ret, ncps, nicps, count, i; + + if ((start == 0) || (start > end)) { + printk(KERN_CRIT "%s: start = %llu, end = %llu\n", + __func__, + (unsigned long long)start, + (unsigned long long)end); + BUG(); + } + + /* cannot delete the latest checkpoint */ + if (start == nilfs_mdt_cno(cpfile) - 1) + return -EPERM; + + down_write(&NILFS_MDT(cpfile)->mi_sem); + + ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); + if (ret < 0) + goto out_sem; + tnicps = 0; + + for (cno = start; cno < end; cno += ncps) { + ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, end); + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); + if (ret < 0) { + if (ret != -ENOENT) + goto out_sem; + /* skip hole */ + ret = 0; + continue; + } + + kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + cp = nilfs_cpfile_block_get_checkpoint( + cpfile, cno, cp_bh, kaddr); + nicps = 0; + for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) { + BUG_ON(nilfs_checkpoint_snapshot(cp)); + if (!nilfs_checkpoint_invalid(cp)) { + nilfs_checkpoint_set_invalid(cp); + nicps++; + } + } + if (nicps > 0) { + tnicps += nicps; + nilfs_mdt_mark_buffer_dirty(cp_bh); + nilfs_mdt_mark_dirty(cpfile); + if (!nilfs_cpfile_is_in_first(cpfile, cno) && + (count = nilfs_cpfile_block_sub_valid_checkpoints( + cpfile, cp_bh, kaddr, nicps)) == 0) { + /* make hole */ + kunmap_atomic(kaddr, KM_USER0); + brelse(cp_bh); + ret = nilfs_cpfile_delete_checkpoint_block( + cpfile, cno); + if (ret == 0) + continue; + printk(KERN_ERR "%s: cannot delete block\n", + __func__); + goto out_sem; + } + } + + kunmap_atomic(kaddr, KM_USER0); + brelse(cp_bh); + } + + if (tnicps > 0) { + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = nilfs_cpfile_block_get_header(cpfile, header_bh, + kaddr); + le64_add_cpu(&header->ch_ncheckpoints, -tnicps); + nilfs_mdt_mark_buffer_dirty(header_bh); + nilfs_mdt_mark_dirty(cpfile); + kunmap_atomic(kaddr, KM_USER0); + } + brelse(header_bh); + + out_sem: + up_write(&NILFS_MDT(cpfile)->mi_sem); + return ret; +} + +static void nilfs_cpfile_checkpoint_to_cpinfo(struct inode *cpfile, + struct nilfs_checkpoint *cp, + struct nilfs_cpinfo *ci) +{ + ci->ci_flags = le32_to_cpu(cp->cp_flags); + ci->ci_cno = le64_to_cpu(cp->cp_cno); + ci->ci_create = le64_to_cpu(cp->cp_create); + ci->ci_nblk_inc = le64_to_cpu(cp->cp_nblk_inc); + ci->ci_inodes_count = le64_to_cpu(cp->cp_inodes_count); + ci->ci_blocks_count = le64_to_cpu(cp->cp_blocks_count); + ci->ci_next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); +} + +static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 cno, + struct nilfs_cpinfo *ci, size_t nci) +{ + struct nilfs_checkpoint *cp; + struct buffer_head *bh; + size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; + __u64 cur_cno = nilfs_mdt_cno(cpfile); + void *kaddr; + int n, ret; + int ncps, i; + + down_read(&NILFS_MDT(cpfile)->mi_sem); + + for (n = 0; cno < cur_cno && n < nci; cno += ncps) { + ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno); + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); + if (ret < 0) { + if (ret != -ENOENT) + goto out; + continue; /* skip hole */ + } + + kaddr = kmap_atomic(bh->b_page, KM_USER0); + cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); + for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { + if (!nilfs_checkpoint_invalid(cp)) + nilfs_cpfile_checkpoint_to_cpinfo( + cpfile, cp, &ci[n++]); + } + kunmap_atomic(kaddr, KM_USER0); + brelse(bh); + } + + ret = n; + + out: + up_read(&NILFS_MDT(cpfile)->mi_sem); + return ret; +} + +static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 cno, + struct nilfs_cpinfo *ci, size_t nci) +{ + struct buffer_head *bh; + struct nilfs_cpfile_header *header; + struct nilfs_checkpoint *cp; + __u64 curr, next; + unsigned long curr_blkoff, next_blkoff; + void *kaddr; + int n, ret; + + down_read(&NILFS_MDT(cpfile)->mi_sem); + + if (cno == 0) { + ret = nilfs_cpfile_get_header_block(cpfile, &bh); + if (ret < 0) + goto out; + kaddr = kmap_atomic(bh->b_page, KM_USER0); + header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); + curr = le64_to_cpu(header->ch_snapshot_list.ssl_next); + kunmap_atomic(kaddr, KM_USER0); + brelse(bh); + if (curr == 0) { + ret = 0; + goto out; + } + } else + curr = cno; + curr_blkoff = nilfs_cpfile_get_blkoff(cpfile, curr); + ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &bh); + if (ret < 0) + goto out; + kaddr = kmap_atomic(bh->b_page, KM_USER0); + for (n = 0; n < nci; n++) { + cp = nilfs_cpfile_block_get_checkpoint( + cpfile, curr, bh, kaddr); + nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, &ci[n]); + next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); + if (next == 0) { + curr = next; + n++; + break; + } + next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); + if (curr_blkoff != next_blkoff) { + kunmap_atomic(kaddr, KM_USER0); + brelse(bh); + ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, + 0, &bh); + if (ret < 0) + goto out; + kaddr = kmap_atomic(bh->b_page, KM_USER0); + } + curr = next; + curr_blkoff = next_blkoff; + } + kunmap_atomic(kaddr, KM_USER0); + brelse(bh); + ret = n; + + out: + up_read(&NILFS_MDT(cpfile)->mi_sem); + return ret; +} + +/** + * nilfs_cpfile_get_cpinfo - + * @cpfile: + * @cno: + * @ci: + * @nci: + */ +ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, + __u64 cno, int mode, + struct nilfs_cpinfo *ci, size_t nci) +{ + switch (mode) { + case NILFS_CHECKPOINT: + return nilfs_cpfile_do_get_cpinfo(cpfile, cno, ci, nci); + case NILFS_SNAPSHOT: + return nilfs_cpfile_do_get_ssinfo(cpfile, cno, ci, nci); + default: + return -EINVAL; + } +} + +/** + * nilfs_cpfile_delete_checkpoint - + * @cpfile: + * @cno: + */ +int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) +{ + struct nilfs_cpinfo ci; + ssize_t nci; + int ret; + + /* checkpoint number 0 is invalid */ + if (cno == 0) + return -ENOENT; + nci = nilfs_cpfile_do_get_cpinfo(cpfile, cno, &ci, 1); + if (nci < 0) + return nci; + else if (nci == 0 || ci.ci_cno != cno) + return -ENOENT; + + /* cannot delete the latest checkpoint nor snapshots */ + ret = nilfs_cpinfo_snapshot(&ci); + if (ret < 0) + return ret; + else if (ret > 0 || cno == nilfs_mdt_cno(cpfile) - 1) + return -EPERM; + + return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1); +} + +static struct nilfs_snapshot_list * +nilfs_cpfile_block_get_snapshot_list(const struct inode *cpfile, + __u64 cno, + struct buffer_head *bh, + void *kaddr) +{ + struct nilfs_cpfile_header *header; + struct nilfs_checkpoint *cp; + struct nilfs_snapshot_list *list; + + if (cno != 0) { + cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); + list = &cp->cp_snapshot_list; + } else { + header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); + list = &header->ch_snapshot_list; + } + return list; +} + +static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) +{ + struct buffer_head *header_bh, *curr_bh, *prev_bh, *cp_bh; + struct nilfs_cpfile_header *header; + struct nilfs_checkpoint *cp; + struct nilfs_snapshot_list *list; + __u64 curr, prev; + unsigned long curr_blkoff, prev_blkoff; + void *kaddr; + int ret; + + down_write(&NILFS_MDT(cpfile)->mi_sem); + + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); + if (ret < 0) + goto out_sem; + kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + if (nilfs_checkpoint_invalid(cp)) { + ret = -ENOENT; + kunmap_atomic(kaddr, KM_USER0); + goto out_cp; + } + if (nilfs_checkpoint_snapshot(cp)) { + ret = 0; + kunmap_atomic(kaddr, KM_USER0); + goto out_cp; + } + kunmap_atomic(kaddr, KM_USER0); + + ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); + if (ret < 0) + goto out_cp; + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); + list = &header->ch_snapshot_list; + curr_bh = header_bh; + get_bh(curr_bh); + curr = 0; + curr_blkoff = 0; + prev = le64_to_cpu(list->ssl_prev); + while (prev > cno) { + prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev); + curr = prev; + if (curr_blkoff != prev_blkoff) { + kunmap_atomic(kaddr, KM_USER0); + brelse(curr_bh); + ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, + 0, &curr_bh); + if (ret < 0) + goto out_header; + kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); + } + curr_blkoff = prev_blkoff; + cp = nilfs_cpfile_block_get_checkpoint( + cpfile, curr, curr_bh, kaddr); + list = &cp->cp_snapshot_list; + prev = le64_to_cpu(list->ssl_prev); + } + kunmap_atomic(kaddr, KM_USER0); + + if (prev != 0) { + ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, + &prev_bh); + if (ret < 0) + goto out_curr; + } else { + prev_bh = header_bh; + get_bh(prev_bh); + } + + kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); + list = nilfs_cpfile_block_get_snapshot_list( + cpfile, curr, curr_bh, kaddr); + list->ssl_prev = cpu_to_le64(cno); + kunmap_atomic(kaddr, KM_USER0); + + kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr); + cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev); + nilfs_checkpoint_set_snapshot(cp); + kunmap_atomic(kaddr, KM_USER0); + + kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); + list = nilfs_cpfile_block_get_snapshot_list( + cpfile, prev, prev_bh, kaddr); + list->ssl_next = cpu_to_le64(cno); + kunmap_atomic(kaddr, KM_USER0); + + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); + le64_add_cpu(&header->ch_nsnapshots, 1); + kunmap_atomic(kaddr, KM_USER0); + + nilfs_mdt_mark_buffer_dirty(prev_bh); + nilfs_mdt_mark_buffer_dirty(curr_bh); + nilfs_mdt_mark_buffer_dirty(cp_bh); + nilfs_mdt_mark_buffer_dirty(header_bh); + nilfs_mdt_mark_dirty(cpfile); + + brelse(prev_bh); + + out_curr: + brelse(curr_bh); + + out_header: + brelse(header_bh); + + out_cp: + brelse(cp_bh); + + out_sem: + up_write(&NILFS_MDT(cpfile)->mi_sem); + return ret; +} + +static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) +{ + struct buffer_head *header_bh, *next_bh, *prev_bh, *cp_bh; + struct nilfs_cpfile_header *header; + struct nilfs_checkpoint *cp; + struct nilfs_snapshot_list *list; + __u64 next, prev; + void *kaddr; + int ret; + + down_write(&NILFS_MDT(cpfile)->mi_sem); + + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); + if (ret < 0) + goto out_sem; + kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + if (nilfs_checkpoint_invalid(cp)) { + ret = -ENOENT; + kunmap_atomic(kaddr, KM_USER0); + goto out_cp; + } + if (!nilfs_checkpoint_snapshot(cp)) { + ret = 0; + kunmap_atomic(kaddr, KM_USER0); + goto out_cp; + } + + list = &cp->cp_snapshot_list; + next = le64_to_cpu(list->ssl_next); + prev = le64_to_cpu(list->ssl_prev); + kunmap_atomic(kaddr, KM_USER0); + + ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); + if (ret < 0) + goto out_cp; + if (next != 0) { + ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, + &next_bh); + if (ret < 0) + goto out_header; + } else { + next_bh = header_bh; + get_bh(next_bh); + } + if (prev != 0) { + ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, + &prev_bh); + if (ret < 0) + goto out_next; + } else { + prev_bh = header_bh; + get_bh(prev_bh); + } + + kaddr = kmap_atomic(next_bh->b_page, KM_USER0); + list = nilfs_cpfile_block_get_snapshot_list( + cpfile, next, next_bh, kaddr); + list->ssl_prev = cpu_to_le64(prev); + kunmap_atomic(kaddr, KM_USER0); + + kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); + list = nilfs_cpfile_block_get_snapshot_list( + cpfile, prev, prev_bh, kaddr); + list->ssl_next = cpu_to_le64(next); + kunmap_atomic(kaddr, KM_USER0); + + kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); + cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + cp->cp_snapshot_list.ssl_next = cpu_to_le64(0); + cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0); + nilfs_checkpoint_clear_snapshot(cp); + kunmap_atomic(kaddr, KM_USER0); + + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); + le64_add_cpu(&header->ch_nsnapshots, -1); + kunmap_atomic(kaddr, KM_USER0); + + nilfs_mdt_mark_buffer_dirty(next_bh); + nilfs_mdt_mark_buffer_dirty(prev_bh); + nilfs_mdt_mark_buffer_dirty(cp_bh); + nilfs_mdt_mark_buffer_dirty(header_bh); + nilfs_mdt_mark_dirty(cpfile); + + brelse(prev_bh); + + out_next: + brelse(next_bh); + + out_header: + brelse(header_bh); + + out_cp: + brelse(cp_bh); + + out_sem: + up_write(&NILFS_MDT(cpfile)->mi_sem); + return ret; +} + +/** + * nilfs_cpfile_is_snapshot - + * @cpfile: inode of checkpoint file + * @cno: checkpoint number + * + * Description: + * + * Return Value: On success, 1 is returned if the checkpoint specified by + * @cno is a snapshot, or 0 if not. On error, one of the following negative + * error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - No such checkpoint. + */ +int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) +{ + struct buffer_head *bh; + struct nilfs_checkpoint *cp; + void *kaddr; + int ret; + + down_read(&NILFS_MDT(cpfile)->mi_sem); + + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); + if (ret < 0) + goto out; + kaddr = kmap_atomic(bh->b_page, KM_USER0); + cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); + ret = nilfs_checkpoint_snapshot(cp); + kunmap_atomic(kaddr, KM_USER0); + brelse(bh); + + out: + up_read(&NILFS_MDT(cpfile)->mi_sem); + return ret; +} + +/** + * nilfs_cpfile_change_cpmode - change checkpoint mode + * @cpfile: inode of checkpoint file + * @cno: checkpoint number + * @status: mode of checkpoint + * + * Description: nilfs_change_cpmode() changes the mode of the checkpoint + * specified by @cno. The mode @mode is NILFS_CHECKPOINT or NILFS_SNAPSHOT. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - No such checkpoint. + */ +int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) +{ + struct the_nilfs *nilfs; + int ret; + + nilfs = NILFS_MDT(cpfile)->mi_nilfs; + + switch (mode) { + case NILFS_CHECKPOINT: + /* + * Check for protecting existing snapshot mounts: + * bd_mount_sem is used to make this operation atomic and + * exclusive with a new mount job. Though it doesn't cover + * umount, it's enough for the purpose. + */ + down(&nilfs->ns_bdev->bd_mount_sem); + if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) { + /* Current implementation does not have to protect + plain read-only mounts since they are exclusive + with a read/write mount and are protected from the + cleaner. */ + ret = -EBUSY; + } else + ret = nilfs_cpfile_clear_snapshot(cpfile, cno); + up(&nilfs->ns_bdev->bd_mount_sem); + return ret; + case NILFS_SNAPSHOT: + return nilfs_cpfile_set_snapshot(cpfile, cno); + default: + return -EINVAL; + } +} + +/** + * nilfs_cpfile_get_stat - get checkpoint statistics + * @cpfile: inode of checkpoint file + * @stat: pointer to a structure of checkpoint statistics + * + * Description: nilfs_cpfile_get_stat() returns information about checkpoints. + * + * Return Value: On success, 0 is returned, and checkpoints information is + * stored in the place pointed by @stat. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) +{ + struct buffer_head *bh; + struct nilfs_cpfile_header *header; + void *kaddr; + int ret; + + down_read(&NILFS_MDT(cpfile)->mi_sem); + + ret = nilfs_cpfile_get_header_block(cpfile, &bh); + if (ret < 0) + goto out_sem; + kaddr = kmap_atomic(bh->b_page, KM_USER0); + header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); + cpstat->cs_cno = nilfs_mdt_cno(cpfile); + cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints); + cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots); + kunmap_atomic(kaddr, KM_USER0); + brelse(bh); + + out_sem: + up_read(&NILFS_MDT(cpfile)->mi_sem); + return ret; +} diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h new file mode 100644 index 00000000000..f097e90fcb2 --- /dev/null +++ b/fs/nilfs2/cpfile.h @@ -0,0 +1,45 @@ +/* + * cpfile.h - NILFS checkpoint file. + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#ifndef _NILFS_CPFILE_H +#define _NILFS_CPFILE_H + +#include +#include +#include + +#define NILFS_CPFILE_GFP NILFS_MDT_GFP + + +int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int, + struct nilfs_checkpoint **, + struct buffer_head **); +void nilfs_cpfile_put_checkpoint(struct inode *, __u64, struct buffer_head *); +int nilfs_cpfile_delete_checkpoints(struct inode *, __u64, __u64); +int nilfs_cpfile_delete_checkpoint(struct inode *, __u64); +int nilfs_cpfile_change_cpmode(struct inode *, __u64, int); +int nilfs_cpfile_is_snapshot(struct inode *, __u64); +int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); +ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64, int, + struct nilfs_cpinfo *, size_t); + +#endif /* _NILFS_CPFILE_H */ -- cgit v1.2.3 From 6c98cd4ecb0ae35f0368c5c2190712689c2064aa Mon Sep 17 00:00:00 2001 From: Koji Sato Date: Mon, 6 Apr 2009 19:01:32 -0700 Subject: nilfs2: segment usage file This adds a meta data file which stores the allocation state of segments. [konishi.ryusuke@lab.ntt.co.jp: fix wrong counting of checkpoints and dirty segments] Signed-off-by: Koji Sato Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/cpfile.c | 2 +- fs/nilfs2/sufile.c | 627 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/sufile.h | 54 +++++ 3 files changed, 682 insertions(+), 1 deletion(-) create mode 100644 fs/nilfs2/sufile.c create mode 100644 fs/nilfs2/sufile.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 991633aad1d..82462acd06e 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -357,7 +357,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, kaddr = kmap_atomic(header_bh->b_page, KM_USER0); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); - le64_add_cpu(&header->ch_ncheckpoints, -tnicps); + le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); nilfs_mdt_mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); kunmap_atomic(kaddr, KM_USER0); diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c new file mode 100644 index 00000000000..b3674a8162a --- /dev/null +++ b/fs/nilfs2/sufile.c @@ -0,0 +1,627 @@ +/* + * sufile.c - NILFS segment usage file. + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#include +#include +#include +#include +#include +#include +#include "mdt.h" +#include "sufile.h" + + +static inline unsigned long +nilfs_sufile_segment_usages_per_block(const struct inode *sufile) +{ + return NILFS_MDT(sufile)->mi_entries_per_block; +} + +static unsigned long +nilfs_sufile_get_blkoff(const struct inode *sufile, __u64 segnum) +{ + __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; + do_div(t, nilfs_sufile_segment_usages_per_block(sufile)); + return (unsigned long)t; +} + +static unsigned long +nilfs_sufile_get_offset(const struct inode *sufile, __u64 segnum) +{ + __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; + return do_div(t, nilfs_sufile_segment_usages_per_block(sufile)); +} + +static unsigned long +nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr, + __u64 max) +{ + return min_t(unsigned long, + nilfs_sufile_segment_usages_per_block(sufile) - + nilfs_sufile_get_offset(sufile, curr), + max - curr + 1); +} + +static inline struct nilfs_sufile_header * +nilfs_sufile_block_get_header(const struct inode *sufile, + struct buffer_head *bh, + void *kaddr) +{ + return kaddr + bh_offset(bh); +} + +static struct nilfs_segment_usage * +nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, + struct buffer_head *bh, void *kaddr) +{ + return kaddr + bh_offset(bh) + + nilfs_sufile_get_offset(sufile, segnum) * + NILFS_MDT(sufile)->mi_entry_size; +} + +static inline int nilfs_sufile_get_header_block(struct inode *sufile, + struct buffer_head **bhp) +{ + return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); +} + +static inline int +nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, + int create, struct buffer_head **bhp) +{ + return nilfs_mdt_get_block(sufile, + nilfs_sufile_get_blkoff(sufile, segnum), + create, NULL, bhp); +} + +/** + * nilfs_sufile_alloc - allocate a segment + * @sufile: inode of segment usage file + * @segnump: pointer to segment number + * + * Description: nilfs_sufile_alloc() allocates a clean segment. + * + * Return Value: On success, 0 is returned and the segment number of the + * allocated segment is stored in the place pointed by @segnump. On error, one + * of the following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOSPC - No clean segment left. + */ +int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) +{ + struct buffer_head *header_bh, *su_bh; + struct the_nilfs *nilfs; + struct nilfs_sufile_header *header; + struct nilfs_segment_usage *su; + size_t susz = NILFS_MDT(sufile)->mi_entry_size; + __u64 segnum, maxsegnum, last_alloc; + void *kaddr; + unsigned long nsegments, ncleansegs, nsus; + int ret, i, j; + + down_write(&NILFS_MDT(sufile)->mi_sem); + + nilfs = NILFS_MDT(sufile)->mi_nilfs; + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out_sem; + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); + ncleansegs = le64_to_cpu(header->sh_ncleansegs); + last_alloc = le64_to_cpu(header->sh_last_alloc); + kunmap_atomic(kaddr, KM_USER0); + + nsegments = nilfs_sufile_get_nsegments(sufile); + segnum = last_alloc + 1; + maxsegnum = nsegments - 1; + for (i = 0; i < nsegments; i += nsus) { + if (segnum >= nsegments) { + /* wrap around */ + segnum = 0; + maxsegnum = last_alloc; + } + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, + &su_bh); + if (ret < 0) + goto out_header; + kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + su = nilfs_sufile_block_get_segment_usage( + sufile, segnum, su_bh, kaddr); + + nsus = nilfs_sufile_segment_usages_in_block( + sufile, segnum, maxsegnum); + for (j = 0; j < nsus; j++, su = (void *)su + susz, segnum++) { + if (!nilfs_segment_usage_clean(su)) + continue; + /* found a clean segment */ + nilfs_segment_usage_set_active(su); + nilfs_segment_usage_set_dirty(su); + kunmap_atomic(kaddr, KM_USER0); + + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = nilfs_sufile_block_get_header( + sufile, header_bh, kaddr); + le64_add_cpu(&header->sh_ncleansegs, -1); + le64_add_cpu(&header->sh_ndirtysegs, 1); + header->sh_last_alloc = cpu_to_le64(segnum); + kunmap_atomic(kaddr, KM_USER0); + + nilfs_mdt_mark_buffer_dirty(header_bh); + nilfs_mdt_mark_buffer_dirty(su_bh); + nilfs_mdt_mark_dirty(sufile); + brelse(su_bh); + *segnump = segnum; + goto out_header; + } + + kunmap_atomic(kaddr, KM_USER0); + brelse(su_bh); + } + + /* no segments left */ + ret = -ENOSPC; + + out_header: + brelse(header_bh); + + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** + * nilfs_sufile_cancel_free - + * @sufile: inode of segment usage file + * @segnum: segment number + * + * Description: + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) +{ + struct buffer_head *header_bh, *su_bh; + struct the_nilfs *nilfs; + struct nilfs_sufile_header *header; + struct nilfs_segment_usage *su; + void *kaddr; + int ret; + + down_write(&NILFS_MDT(sufile)->mi_sem); + + nilfs = NILFS_MDT(sufile)->mi_nilfs; + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out_sem; + + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); + if (ret < 0) + goto out_header; + + kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + su = nilfs_sufile_block_get_segment_usage( + sufile, segnum, su_bh, kaddr); + if (!nilfs_segment_usage_clean(su)) { + printk(KERN_CRIT "%s: segment %llu must be clean\n", + __func__, (unsigned long long)segnum); + BUG(); + } + nilfs_segment_usage_set_dirty(su); + kunmap_atomic(kaddr, KM_USER0); + + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); + le64_add_cpu(&header->sh_ncleansegs, -1); + le64_add_cpu(&header->sh_ndirtysegs, 1); + kunmap_atomic(kaddr, KM_USER0); + + nilfs_mdt_mark_buffer_dirty(header_bh); + nilfs_mdt_mark_buffer_dirty(su_bh); + nilfs_mdt_mark_dirty(sufile); + + brelse(su_bh); + + out_header: + brelse(header_bh); + + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** + * nilfs_sufile_freev - free segments + * @sufile: inode of segment usage file + * @segnum: array of segment numbers + * @nsegs: number of segments + * + * Description: nilfs_sufile_freev() frees segments specified by @segnum and + * @nsegs, which must have been returned by a previous call to + * nilfs_sufile_alloc(). + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +#define NILFS_SUFILE_FREEV_PREALLOC 16 +int nilfs_sufile_freev(struct inode *sufile, __u64 *segnum, size_t nsegs) +{ + struct buffer_head *header_bh, **su_bh, + *su_bh_prealloc[NILFS_SUFILE_FREEV_PREALLOC]; + struct the_nilfs *nilfs; + struct nilfs_sufile_header *header; + struct nilfs_segment_usage *su; + void *kaddr; + int ret, i; + + down_write(&NILFS_MDT(sufile)->mi_sem); + + nilfs = NILFS_MDT(sufile)->mi_nilfs; + + /* prepare resources */ + if (nsegs <= NILFS_SUFILE_FREEV_PREALLOC) + su_bh = su_bh_prealloc; + else { + su_bh = kmalloc(sizeof(*su_bh) * nsegs, GFP_NOFS); + if (su_bh == NULL) { + ret = -ENOMEM; + goto out_sem; + } + } + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out_su_bh; + for (i = 0; i < nsegs; i++) { + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum[i], + 0, &su_bh[i]); + if (ret < 0) + goto out_bh; + } + + /* free segments */ + for (i = 0; i < nsegs; i++) { + kaddr = kmap_atomic(su_bh[i]->b_page, KM_USER0); + su = nilfs_sufile_block_get_segment_usage( + sufile, segnum[i], su_bh[i], kaddr); + BUG_ON(nilfs_segment_usage_error(su)); + nilfs_segment_usage_set_clean(su); + kunmap_atomic(kaddr, KM_USER0); + nilfs_mdt_mark_buffer_dirty(su_bh[i]); + } + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); + le64_add_cpu(&header->sh_ncleansegs, nsegs); + le64_add_cpu(&header->sh_ndirtysegs, -(u64)nsegs); + kunmap_atomic(kaddr, KM_USER0); + nilfs_mdt_mark_buffer_dirty(header_bh); + nilfs_mdt_mark_dirty(sufile); + + out_bh: + for (i--; i >= 0; i--) + brelse(su_bh[i]); + brelse(header_bh); + + out_su_bh: + if (su_bh != su_bh_prealloc) + kfree(su_bh); + + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** + * nilfs_sufile_free - + * @sufile: + * @segnum: + */ +int nilfs_sufile_free(struct inode *sufile, __u64 segnum) +{ + return nilfs_sufile_freev(sufile, &segnum, 1); +} + +/** + * nilfs_sufile_get_segment_usage - get a segment usage + * @sufile: inode of segment usage file + * @segnum: segment number + * @sup: pointer to segment usage + * @bhp: pointer to buffer head + * + * Description: nilfs_sufile_get_segment_usage() acquires the segment usage + * specified by @segnum. + * + * Return Value: On success, 0 is returned, and the segment usage and the + * buffer head of the buffer on which the segment usage is located are stored + * in the place pointed by @sup and @bhp, respectively. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - Invalid segment usage number. + */ +int nilfs_sufile_get_segment_usage(struct inode *sufile, __u64 segnum, + struct nilfs_segment_usage **sup, + struct buffer_head **bhp) +{ + struct buffer_head *bh; + struct nilfs_segment_usage *su; + void *kaddr; + int ret; + + /* segnum is 0 origin */ + BUG_ON(segnum >= nilfs_sufile_get_nsegments(sufile)); + + down_write(&NILFS_MDT(sufile)->mi_sem); + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &bh); + if (ret < 0) + goto out_sem; + kaddr = kmap(bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + if (nilfs_segment_usage_error(su)) { + kunmap(bh->b_page); + brelse(bh); + ret = -EINVAL; + goto out_sem; + } + + if (sup != NULL) + *sup = su; + *bhp = bh; + + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** + * nilfs_sufile_put_segment_usage - put a segment usage + * @sufile: inode of segment usage file + * @segnum: segment number + * @bh: buffer head + * + * Description: nilfs_sufile_put_segment_usage() releases the segment usage + * specified by @segnum. @bh must be the buffer head which have been returned + * by a previous call to nilfs_sufile_get_segment_usage() with @segnum. + */ +void nilfs_sufile_put_segment_usage(struct inode *sufile, __u64 segnum, + struct buffer_head *bh) +{ + kunmap(bh->b_page); + brelse(bh); +} + +/** + * nilfs_sufile_get_stat - get segment usage statistics + * @sufile: inode of segment usage file + * @stat: pointer to a structure of segment usage statistics + * + * Description: nilfs_sufile_get_stat() returns information about segment + * usage. + * + * Return Value: On success, 0 is returned, and segment usage information is + * stored in the place pointed by @stat. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) +{ + struct buffer_head *header_bh; + struct nilfs_sufile_header *header; + void *kaddr; + int ret; + + down_read(&NILFS_MDT(sufile)->mi_sem); + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out_sem; + + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); + sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); + sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); + sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); + sustat->ss_ctime = NILFS_MDT(sufile)->mi_nilfs->ns_ctime; + sustat->ss_nongc_ctime = NILFS_MDT(sufile)->mi_nilfs->ns_nongc_ctime; + kunmap_atomic(kaddr, KM_USER0); + brelse(header_bh); + + out_sem: + up_read(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** + * nilfs_sufile_get_ncleansegs - get the number of clean segments + * @sufile: inode of segment usage file + * @nsegsp: pointer to the number of clean segments + * + * Description: nilfs_sufile_get_ncleansegs() acquires the number of clean + * segments. + * + * Return Value: On success, 0 is returned and the number of clean segments is + * stored in the place pointed by @nsegsp. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp) +{ + struct nilfs_sustat sustat; + int ret; + + ret = nilfs_sufile_get_stat(sufile, &sustat); + if (ret == 0) + *nsegsp = sustat.ss_ncleansegs; + return ret; +} + +/** + * nilfs_sufile_set_error - mark a segment as erroneous + * @sufile: inode of segment usage file + * @segnum: segment number + * + * Description: nilfs_sufile_set_error() marks the segment specified by + * @segnum as erroneous. The error segment will never be used again. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) +{ + struct buffer_head *header_bh, *su_bh; + struct nilfs_segment_usage *su; + struct nilfs_sufile_header *header; + void *kaddr; + int ret; + + BUG_ON(segnum >= nilfs_sufile_get_nsegments(sufile)); + + down_write(&NILFS_MDT(sufile)->mi_sem); + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out_sem; + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); + if (ret < 0) + goto out_header; + + kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + if (nilfs_segment_usage_error(su)) { + kunmap_atomic(kaddr, KM_USER0); + brelse(su_bh); + goto out_header; + } + + nilfs_segment_usage_set_error(su); + kunmap_atomic(kaddr, KM_USER0); + brelse(su_bh); + + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); + le64_add_cpu(&header->sh_ndirtysegs, -1); + kunmap_atomic(kaddr, KM_USER0); + nilfs_mdt_mark_buffer_dirty(header_bh); + nilfs_mdt_mark_buffer_dirty(su_bh); + nilfs_mdt_mark_dirty(sufile); + brelse(su_bh); + + out_header: + brelse(header_bh); + + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** + * nilfs_sufile_get_suinfo - + * @sufile: inode of segment usage file + * @segnum: segment number to start looking + * @si: array of suinfo + * @nsi: size of suinfo array + * + * Description: + * + * Return Value: On success, 0 is returned and .... On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, + struct nilfs_suinfo *si, size_t nsi) +{ + struct buffer_head *su_bh; + struct nilfs_segment_usage *su; + size_t susz = NILFS_MDT(sufile)->mi_entry_size; + void *kaddr; + unsigned long nsegs, segusages_per_block; + ssize_t n; + int ret, i, j; + + down_read(&NILFS_MDT(sufile)->mi_sem); + + segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile); + nsegs = min_t(unsigned long, + nilfs_sufile_get_nsegments(sufile) - segnum, + nsi); + for (i = 0; i < nsegs; i += n, segnum += n) { + n = min_t(unsigned long, + segusages_per_block - + nilfs_sufile_get_offset(sufile, segnum), + nsegs - i); + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, + &su_bh); + if (ret < 0) { + if (ret != -ENOENT) + goto out; + /* hole */ + memset(&si[i], 0, sizeof(struct nilfs_suinfo) * n); + continue; + } + + kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + su = nilfs_sufile_block_get_segment_usage( + sufile, segnum, su_bh, kaddr); + for (j = 0; j < n; j++, su = (void *)su + susz) { + si[i + j].sui_lastmod = le64_to_cpu(su->su_lastmod); + si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); + si[i + j].sui_flags = le32_to_cpu(su->su_flags); + } + kunmap_atomic(kaddr, KM_USER0); + brelse(su_bh); + } + ret = nsegs; + + out: + up_read(&NILFS_MDT(sufile)->mi_sem); + return ret; +} diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h new file mode 100644 index 00000000000..d595f33a768 --- /dev/null +++ b/fs/nilfs2/sufile.h @@ -0,0 +1,54 @@ +/* + * sufile.h - NILFS segment usage file. + * + * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#ifndef _NILFS_SUFILE_H +#define _NILFS_SUFILE_H + +#include +#include +#include +#include "mdt.h" + +#define NILFS_SUFILE_GFP NILFS_MDT_GFP + +static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) +{ + return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments; +} + +int nilfs_sufile_alloc(struct inode *, __u64 *); +int nilfs_sufile_cancel_free(struct inode *, __u64); +int nilfs_sufile_freev(struct inode *, __u64 *, size_t); +int nilfs_sufile_free(struct inode *, __u64); +int nilfs_sufile_get_segment_usage(struct inode *, __u64, + struct nilfs_segment_usage **, + struct buffer_head **); +void nilfs_sufile_put_segment_usage(struct inode *, __u64, + struct buffer_head *); +int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); +int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); +int nilfs_sufile_set_error(struct inode *, __u64); +ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *, + size_t); + + +#endif /* _NILFS_SUFILE_H */ -- cgit v1.2.3 From 05fe58fdc10df9ebea04c0eaed57adc47af5c184 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:32 -0700 Subject: nilfs2: inode operations This adds inode level operations of the nilfs2 file system. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/inode.c | 819 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 819 insertions(+) create mode 100644 fs/nilfs2/inode.c (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c new file mode 100644 index 00000000000..b4697d9d7e5 --- /dev/null +++ b/fs/nilfs2/inode.c @@ -0,0 +1,819 @@ +/* + * inode.c - NILFS inode operations. + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + * + */ + +#include +#include +#include +#include "nilfs.h" +#include "segment.h" +#include "page.h" +#include "mdt.h" +#include "cpfile.h" +#include "ifile.h" + + +/** + * nilfs_get_block() - get a file block on the filesystem (callback function) + * @inode - inode struct of the target file + * @blkoff - file block number + * @bh_result - buffer head to be mapped on + * @create - indicate whether allocating the block or not when it has not + * been allocated yet. + * + * This function does not issue actual read request of the specified data + * block. It is done by VFS. + * Bulk read for direct-io is not supported yet. (should be supported) + */ +int nilfs_get_block(struct inode *inode, sector_t blkoff, + struct buffer_head *bh_result, int create) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + unsigned long blknum = 0; + int err = 0, ret; + struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode)); + + /* This exclusion control is a workaround; should be revised */ + down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + ret = nilfs_bmap_lookup(ii->i_bmap, (unsigned long)blkoff, &blknum); + up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + if (ret == 0) { /* found */ + map_bh(bh_result, inode->i_sb, blknum); + goto out; + } + if (unlikely(ret == 1)) { + printk(KERN_ERR "nilfs_get_block: bmap_lookup returns " + "buffer_head pointer (blkoff=%llu, blknum=%lu)\n", + (unsigned long long)blkoff, blknum); + BUG(); + } + /* data block was not found */ + if (ret == -ENOENT && create) { + struct nilfs_transaction_info ti; + + bh_result->b_blocknr = 0; + err = nilfs_transaction_begin(inode->i_sb, &ti, 1); + if (unlikely(err)) + goto out; + err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff, + (unsigned long)bh_result); + nilfs_transaction_end(inode->i_sb, !err); + if (unlikely(err != 0)) { + if (err == -EEXIST) { + /* + * The get_block() function could be called + * from multiple callers for an inode. + * However, the page having this block must + * be locked in this case. + */ + printk(KERN_ERR + "nilfs_get_block: a race condition " + "while inserting a data block. " + "(inode number=%lu, file block " + "offset=%llu)\n", + inode->i_ino, + (unsigned long long)blkoff); + BUG(); + } else if (err == -EINVAL) { + nilfs_error(inode->i_sb, __func__, + "broken bmap (inode=%lu)\n", + inode->i_ino); + err = -EIO; + } + goto out; + } + /* Error handling should be detailed */ + set_buffer_new(bh_result); + map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed + to proper value */ + } else if (ret == -ENOENT) { + /* not found is not error (e.g. hole); must return without + the mapped state flag. */ + ; + } else { + err = ret; + } + + out: + return err; +} + +/** + * nilfs_readpage() - implement readpage() method of nilfs_aops {} + * address_space_operations. + * @file - file struct of the file to be read + * @page - the page to be read + */ +static int nilfs_readpage(struct file *file, struct page *page) +{ + return mpage_readpage(page, nilfs_get_block); +} + +/** + * nilfs_readpages() - implement readpages() method of nilfs_aops {} + * address_space_operations. + * @file - file struct of the file to be read + * @mapping - address_space struct used for reading multiple pages + * @pages - the pages to be read + * @nr_pages - number of pages to be read + */ +static int nilfs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block); +} + +static int nilfs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + /* This empty method is required not to call generic_writepages() */ + return 0; +} + +static int nilfs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + int err; + + redirty_page_for_writepage(wbc, page); + unlock_page(page); + + if (wbc->sync_mode == WB_SYNC_ALL) { + err = nilfs_construct_segment(inode->i_sb); + if (unlikely(err)) + return err; + } else if (wbc->for_reclaim) + nilfs_flush_segment(inode->i_sb, inode->i_ino); + + return 0; +} + +static int nilfs_set_page_dirty(struct page *page) +{ + int ret = __set_page_dirty_buffers(page); + + if (ret) { + struct inode *inode = page->mapping->host; + struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); + unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); + + nilfs_set_file_dirty(sbi, inode, nr_dirty); + } + return ret; +} + +static int nilfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) + +{ + struct inode *inode = mapping->host; + int err = nilfs_transaction_begin(inode->i_sb, NULL, 1); + + if (unlikely(err)) + return err; + + *pagep = NULL; + err = block_write_begin(file, mapping, pos, len, flags, pagep, + fsdata, nilfs_get_block); + if (unlikely(err)) + nilfs_transaction_end(inode->i_sb, 0); + return err; +} + +static int nilfs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + unsigned start = pos & (PAGE_CACHE_SIZE - 1); + unsigned nr_dirty; + int err; + + nr_dirty = nilfs_page_count_clean_buffers(page, start, + start + copied); + copied = generic_write_end(file, mapping, pos, len, copied, page, + fsdata); + nilfs_set_file_dirty(NILFS_SB(inode->i_sb), inode, nr_dirty); + err = nilfs_transaction_end(inode->i_sb, 1); + return err ? : copied; +} + +static ssize_t +nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + ssize_t size; + int err; + + err = nilfs_construct_dsync_segment(inode->i_sb, inode); + if (unlikely(err)) + return err; + + if (rw == WRITE) + return 0; + + /* Needs synchronization with the cleaner */ + size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, nilfs_get_block, NULL); + return size; +} + +struct address_space_operations nilfs_aops = { + .writepage = nilfs_writepage, + .readpage = nilfs_readpage, + /* .sync_page = nilfs_sync_page, */ + .writepages = nilfs_writepages, + .set_page_dirty = nilfs_set_page_dirty, + .readpages = nilfs_readpages, + .write_begin = nilfs_write_begin, + .write_end = nilfs_write_end, + /* .releasepage = nilfs_releasepage, */ + .invalidatepage = block_invalidatepage, + .direct_IO = nilfs_direct_IO, +}; + +struct inode *nilfs_new_inode(struct inode *dir, int mode) +{ + struct super_block *sb = dir->i_sb; + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct inode *inode; + struct nilfs_inode_info *ii; + int err = -ENOMEM; + ino_t ino; + + inode = new_inode(sb); + if (unlikely(!inode)) + goto failed; + + mapping_set_gfp_mask(inode->i_mapping, + mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + + ii = NILFS_I(inode); + ii->i_state = 1 << NILFS_I_NEW; + + err = nilfs_ifile_create_inode(sbi->s_ifile, &ino, &ii->i_bh); + if (unlikely(err)) + goto failed_ifile_create_inode; + /* reference count of i_bh inherits from nilfs_mdt_read_block() */ + + atomic_inc(&sbi->s_inodes_count); + + inode->i_uid = current_fsuid(); + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else + inode->i_gid = current_fsgid(); + + inode->i_mode = mode; + inode->i_ino = ino; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { + err = nilfs_bmap_read(ii->i_bmap, NULL); + if (err < 0) + goto failed_bmap; + + set_bit(NILFS_I_BMAP, &ii->i_state); + /* No lock is needed; iget() ensures it. */ + } + + ii->i_flags = NILFS_I(dir)->i_flags; + if (S_ISLNK(mode)) + ii->i_flags &= ~(NILFS_IMMUTABLE_FL | NILFS_APPEND_FL); + if (!S_ISDIR(mode)) + ii->i_flags &= ~NILFS_DIRSYNC_FL; + + /* ii->i_file_acl = 0; */ + /* ii->i_dir_acl = 0; */ + ii->i_dtime = 0; + ii->i_dir_start_lookup = 0; +#ifdef CONFIG_NILFS_FS_POSIX_ACL + ii->i_acl = NULL; + ii->i_default_acl = NULL; +#endif + ii->i_cno = 0; + nilfs_set_inode_flags(inode); + spin_lock(&sbi->s_next_gen_lock); + inode->i_generation = sbi->s_next_generation++; + spin_unlock(&sbi->s_next_gen_lock); + insert_inode_hash(inode); + + err = nilfs_init_acl(inode, dir); + if (unlikely(err)) + goto failed_acl; /* never occur. When supporting + nilfs_init_acl(), proper cancellation of + above jobs should be considered */ + + mark_inode_dirty(inode); + return inode; + + failed_acl: + failed_bmap: + inode->i_nlink = 0; + iput(inode); /* raw_inode will be deleted through + generic_delete_inode() */ + goto failed; + + failed_ifile_create_inode: + make_bad_inode(inode); + iput(inode); /* if i_nlink == 1, generic_forget_inode() will be + called */ + failed: + return ERR_PTR(err); +} + +void nilfs_free_inode(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + struct nilfs_sb_info *sbi = NILFS_SB(sb); + + clear_inode(inode); + /* XXX: check error code? Is there any thing I can do? */ + (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino); + atomic_dec(&sbi->s_inodes_count); +} + +void nilfs_set_inode_flags(struct inode *inode) +{ + unsigned int flags = NILFS_I(inode)->i_flags; + + inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | + S_DIRSYNC); + if (flags & NILFS_SYNC_FL) + inode->i_flags |= S_SYNC; + if (flags & NILFS_APPEND_FL) + inode->i_flags |= S_APPEND; + if (flags & NILFS_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; +#ifndef NILFS_ATIME_DISABLE + if (flags & NILFS_NOATIME_FL) +#endif + inode->i_flags |= S_NOATIME; + if (flags & NILFS_DIRSYNC_FL) + inode->i_flags |= S_DIRSYNC; + mapping_set_gfp_mask(inode->i_mapping, + mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); +} + +int nilfs_read_inode_common(struct inode *inode, + struct nilfs_inode *raw_inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + int err; + + inode->i_mode = le16_to_cpu(raw_inode->i_mode); + inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); + inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); + inode->i_size = le64_to_cpu(raw_inode->i_size); + inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); + inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); + inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); + inode->i_atime.tv_nsec = 0; + inode->i_ctime.tv_nsec = 0; + inode->i_mtime.tv_nsec = 0; + ii->i_dtime = le64_to_cpu(raw_inode->i_dtime); + if (inode->i_nlink == 0 && (inode->i_mode == 0 || ii->i_dtime)) + return -EINVAL; /* this inode is deleted */ + + inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); + ii->i_flags = le32_to_cpu(raw_inode->i_flags); +#if 0 + ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); + ii->i_dir_acl = S_ISREG(inode->i_mode) ? + 0 : le32_to_cpu(raw_inode->i_dir_acl); +#endif + ii->i_cno = 0; + inode->i_generation = le32_to_cpu(raw_inode->i_generation); + + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode)) { + err = nilfs_bmap_read(ii->i_bmap, raw_inode); + if (err < 0) + return err; + set_bit(NILFS_I_BMAP, &ii->i_state); + /* No lock is needed; iget() ensures it. */ + } + return 0; +} + +static int nilfs_read_sketch_inode(struct inode *inode) +{ + struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); + int err = 0; + + if (sbi->s_snapshot_cno) { + struct the_nilfs *nilfs = sbi->s_nilfs; + struct buffer_head *bh_cp; + struct nilfs_checkpoint *raw_cp; + + err = nilfs_cpfile_get_checkpoint( + nilfs->ns_cpfile, sbi->s_snapshot_cno, 0, &raw_cp, + &bh_cp); + if (likely(!err)) { + if (!nilfs_checkpoint_sketch(raw_cp)) + inode->i_size = 0; + nilfs_cpfile_put_checkpoint( + nilfs->ns_cpfile, sbi->s_snapshot_cno, bh_cp); + } + inode->i_flags |= S_NOCMTIME; + } + return err; +} + +static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, + struct inode *inode) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct inode *dat = nilfs_dat_inode(sbi->s_nilfs); + struct buffer_head *bh; + struct nilfs_inode *raw_inode; + int err; + + down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + err = nilfs_ifile_get_inode_block(sbi->s_ifile, ino, &bh); + if (unlikely(err)) + goto bad_inode; + + raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); + +#ifdef CONFIG_NILFS_FS_POSIX_ACL + ii->i_acl = NILFS_ACL_NOT_CACHED; + ii->i_default_acl = NILFS_ACL_NOT_CACHED; +#endif + if (nilfs_read_inode_common(inode, raw_inode)) + goto failed_unmap; + + if (S_ISREG(inode->i_mode)) { + inode->i_op = &nilfs_file_inode_operations; + inode->i_fop = &nilfs_file_operations; + inode->i_mapping->a_ops = &nilfs_aops; + if (unlikely(inode->i_ino == NILFS_SKETCH_INO)) { + err = nilfs_read_sketch_inode(inode); + if (unlikely(err)) + goto failed_unmap; + } + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &nilfs_dir_inode_operations; + inode->i_fop = &nilfs_dir_operations; + inode->i_mapping->a_ops = &nilfs_aops; + } else if (S_ISLNK(inode->i_mode)) { + inode->i_op = &nilfs_symlink_inode_operations; + inode->i_mapping->a_ops = &nilfs_aops; + } else { + inode->i_op = &nilfs_special_inode_operations; + init_special_inode( + inode, inode->i_mode, + new_decode_dev(le64_to_cpu(raw_inode->i_device_code))); + } + nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); + brelse(bh); + up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + nilfs_set_inode_flags(inode); + return 0; + + failed_unmap: + nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); + brelse(bh); + + bad_inode: + up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + return err; +} + +struct inode *nilfs_iget(struct super_block *sb, unsigned long ino) +{ + struct inode *inode; + int err; + + inode = iget_locked(sb, ino); + if (unlikely(!inode)) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + err = __nilfs_read_inode(sb, ino, inode); + if (unlikely(err)) { + iget_failed(inode); + return ERR_PTR(err); + } + unlock_new_inode(inode); + return inode; +} + +void nilfs_write_inode_common(struct inode *inode, + struct nilfs_inode *raw_inode, int has_bmap) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + + raw_inode->i_mode = cpu_to_le16(inode->i_mode); + raw_inode->i_uid = cpu_to_le32(inode->i_uid); + raw_inode->i_gid = cpu_to_le32(inode->i_gid); + raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); + raw_inode->i_size = cpu_to_le64(inode->i_size); + raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); + raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); + + raw_inode->i_dtime = cpu_to_le64(ii->i_dtime); + raw_inode->i_flags = cpu_to_le32(ii->i_flags); + raw_inode->i_generation = cpu_to_le32(inode->i_generation); + + if (has_bmap) + nilfs_bmap_write(ii->i_bmap, raw_inode); + else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + raw_inode->i_device_code = + cpu_to_le64(new_encode_dev(inode->i_rdev)); + /* When extending inode, nilfs->ns_inode_size should be checked + for substitutions of appended fields */ +} + +void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) +{ + ino_t ino = inode->i_ino; + struct nilfs_inode_info *ii = NILFS_I(inode); + struct super_block *sb = inode->i_sb; + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_inode *raw_inode; + + raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh); + + /* The buffer is guarded with lock_buffer() by the caller */ + if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) + memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size); + set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); + + nilfs_write_inode_common(inode, raw_inode, 0); + /* XXX: call with has_bmap = 0 is a workaround to avoid + deadlock of bmap. This delays update of i_bmap to just + before writing */ + nilfs_ifile_unmap_inode(sbi->s_ifile, ino, ibh); +} + +#define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ + +static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, + unsigned long from) +{ + unsigned long b; + int ret; + + if (!test_bit(NILFS_I_BMAP, &ii->i_state)) + return; + repeat: + ret = nilfs_bmap_last_key(ii->i_bmap, &b); + if (ret == -ENOENT) + return; + else if (ret < 0) + goto failed; + + if (b < from) + return; + + b -= min_t(unsigned long, NILFS_MAX_TRUNCATE_BLOCKS, b - from); + ret = nilfs_bmap_truncate(ii->i_bmap, b); + nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); + if (!ret || (ret == -ENOMEM && + nilfs_bmap_truncate(ii->i_bmap, b) == 0)) + goto repeat; + + failed: + if (ret == -EINVAL) + nilfs_error(ii->vfs_inode.i_sb, __func__, + "bmap is broken (ino=%lu)", ii->vfs_inode.i_ino); + else + nilfs_warning(ii->vfs_inode.i_sb, __func__, + "failed to truncate bmap (ino=%lu, err=%d)", + ii->vfs_inode.i_ino, ret); +} + +void nilfs_truncate(struct inode *inode) +{ + unsigned long blkoff; + unsigned int blocksize; + struct nilfs_transaction_info ti; + struct super_block *sb = inode->i_sb; + struct nilfs_inode_info *ii = NILFS_I(inode); + int ret; + + if (!test_bit(NILFS_I_BMAP, &ii->i_state)) + return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + + blocksize = sb->s_blocksize; + blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; + ret = nilfs_transaction_begin(sb, &ti, 0); + BUG_ON(ret); + + block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); + + nilfs_truncate_bmap(ii, blkoff); + + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + if (IS_SYNC(inode)) + nilfs_set_transaction_flag(NILFS_TI_SYNC); + + nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); + nilfs_transaction_end(sb, 1); + /* May construct a logical segment and may fail in sync mode. + But truncate has no return value. */ +} + +void nilfs_delete_inode(struct inode *inode) +{ + struct nilfs_transaction_info ti; + struct super_block *sb = inode->i_sb; + struct nilfs_inode_info *ii = NILFS_I(inode); + int err; + + if (unlikely(is_bad_inode(inode))) { + if (inode->i_data.nrpages) + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); + return; + } + err = nilfs_transaction_begin(sb, &ti, 0); + BUG_ON(err); + if (inode->i_data.nrpages) + truncate_inode_pages(&inode->i_data, 0); + + nilfs_truncate_bmap(ii, 0); + nilfs_free_inode(inode); + /* nilfs_free_inode() marks inode buffer dirty */ + if (IS_SYNC(inode)) + nilfs_set_transaction_flag(NILFS_TI_SYNC); + nilfs_transaction_end(sb, 1); + /* May construct a logical segment and may fail in sync mode. + But delete_inode has no return value. */ +} + +int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct nilfs_transaction_info ti; + struct inode *inode = dentry->d_inode; + struct super_block *sb = inode->i_sb; + int err, err2; + + err = inode_change_ok(inode, iattr); + if (err) + return err; + + err = nilfs_transaction_begin(sb, &ti, 0); + if (unlikely(err)) + return err; + err = inode_setattr(inode, iattr); + if (!err && (iattr->ia_valid & ATTR_MODE)) + err = nilfs_acl_chmod(inode); + err2 = nilfs_transaction_end(sb, 1); + return err ? : err2; +} + +int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, + struct buffer_head **pbh) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + int err; + + spin_lock(&sbi->s_inode_lock); + /* Caller of this function MUST lock s_inode_lock */ + if (ii->i_bh == NULL) { + spin_unlock(&sbi->s_inode_lock); + err = nilfs_ifile_get_inode_block(sbi->s_ifile, inode->i_ino, + pbh); + if (unlikely(err)) + return err; + spin_lock(&sbi->s_inode_lock); + if (ii->i_bh == NULL) + ii->i_bh = *pbh; + else { + brelse(*pbh); + *pbh = ii->i_bh; + } + } else + *pbh = ii->i_bh; + + get_bh(*pbh); + spin_unlock(&sbi->s_inode_lock); + return 0; +} + +int nilfs_inode_dirty(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); + int ret = 0; + + if (!list_empty(&ii->i_dirty)) { + spin_lock(&sbi->s_inode_lock); + ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || + test_bit(NILFS_I_BUSY, &ii->i_state); + spin_unlock(&sbi->s_inode_lock); + } + return ret; +} + +int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode, + unsigned nr_dirty) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + + atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks); + + if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state) || + unlikely(inode->i_ino == NILFS_SKETCH_INO)) + return 0; + + spin_lock(&sbi->s_inode_lock); + if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && + !test_bit(NILFS_I_BUSY, &ii->i_state)) { + /* Because this routine may race with nilfs_dispose_list(), + we have to check NILFS_I_QUEUED here, too. */ + if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { + /* This will happen when somebody is freeing + this inode. */ + nilfs_warning(sbi->s_super, __func__, + "cannot get inode (ino=%lu)\n", + inode->i_ino); + spin_unlock(&sbi->s_inode_lock); + return -EINVAL; /* NILFS_I_DIRTY may remain for + freeing inode */ + } + list_del(&ii->i_dirty); + list_add_tail(&ii->i_dirty, &sbi->s_dirty_files); + set_bit(NILFS_I_QUEUED, &ii->i_state); + } + spin_unlock(&sbi->s_inode_lock); + return 0; +} + +int nilfs_mark_inode_dirty(struct inode *inode) +{ + struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); + struct buffer_head *ibh; + int err; + + err = nilfs_load_inode_block(sbi, inode, &ibh); + if (unlikely(err)) { + nilfs_warning(inode->i_sb, __func__, + "failed to reget inode block.\n"); + return err; + } + lock_buffer(ibh); + nilfs_update_inode(inode, ibh); + unlock_buffer(ibh); + nilfs_mdt_mark_buffer_dirty(ibh); + nilfs_mdt_mark_dirty(sbi->s_ifile); + brelse(ibh); + return 0; +} + +/** + * nilfs_dirty_inode - reflect changes on given inode to an inode block. + * @inode: inode of the file to be registered. + * + * nilfs_dirty_inode() loads a inode block containing the specified + * @inode and copies data from a nilfs_inode to a corresponding inode + * entry in the inode block. This operation is excluded from the segment + * construction. This function can be called both as a single operation + * and as a part of indivisible file operations. + */ +void nilfs_dirty_inode(struct inode *inode) +{ + struct nilfs_transaction_info ti; + + if (is_bad_inode(inode)) { + nilfs_warning(inode->i_sb, __func__, + "tried to mark bad_inode dirty. ignored.\n"); + dump_stack(); + return; + } + nilfs_transaction_begin(inode->i_sb, &ti, 0); + if (likely(inode->i_ino != NILFS_SKETCH_INO)) + nilfs_mark_inode_dirty(inode); + nilfs_transaction_end(inode->i_sb, 1); /* never fails */ +} -- cgit v1.2.3 From f183ff4f05317b7929337455b233496f68217c1a Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:33 -0700 Subject: nilfs2: file operations This adds primitives for regular file handling. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/file.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 fs/nilfs2/file.c (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c new file mode 100644 index 00000000000..7ddd42e24f7 --- /dev/null +++ b/fs/nilfs2/file.c @@ -0,0 +1,125 @@ +/* + * file.c - NILFS regular file handling primitives including fsync(). + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Amagai Yoshiji , + * Ryusuke Konishi + */ + +#include +#include +#include +#include "nilfs.h" +#include "segment.h" + +int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync) +{ + /* + * Called from fsync() system call + * This is the only entry point that can catch write and synch + * timing for both data blocks and intermediate blocks. + * + * This function should be implemented when the writeback function + * will be implemented. + */ + struct inode *inode = dentry->d_inode; + int err; + + if (!nilfs_inode_dirty(inode)) + return 0; + + if (datasync) + err = nilfs_construct_dsync_segment(inode->i_sb, inode); + else + err = nilfs_construct_segment(inode->i_sb); + + return err; +} + +static ssize_t +nilfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_dentry->d_inode; + ssize_t ret; + + ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + if (ret <= 0) + return ret; + + if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { + int err; + + err = nilfs_construct_dsync_segment(inode->i_sb, inode); + if (unlikely(err)) + return err; + } + return ret; +} + +static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + if (!(vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) + return -EPERM; + SetPageChecked(page); + wait_on_page_writeback(page); + return 0; +} + +struct vm_operations_struct nilfs_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = nilfs_page_mkwrite, +}; + +static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + file_accessed(file); + vma->vm_ops = &nilfs_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; + return 0; +} + +/* + * We have mostly NULL's here: the current defaults are ok for + * the nilfs filesystem. + */ +struct file_operations nilfs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = nilfs_file_aio_write, + .ioctl = nilfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = nilfs_compat_ioctl, +#endif /* CONFIG_COMPAT */ + .mmap = nilfs_file_mmap, + .open = generic_file_open, + /* .release = nilfs_release_file, */ + .fsync = nilfs_sync_file, + .splice_read = generic_file_splice_read, +}; + +struct inode_operations nilfs_file_inode_operations = { + .truncate = nilfs_truncate, + .setattr = nilfs_setattr, + .permission = nilfs_permission, +}; + +/* end of file */ -- cgit v1.2.3 From 2ba466d74ed74f073257f86e61519cb8f8f46184 Mon Sep 17 00:00:00 2001 From: Yoshiji Amagai Date: Mon, 6 Apr 2009 19:01:34 -0700 Subject: nilfs2: directory entry operations This adds directory handling functions, most of which comes from the ext2 file system. Signed-off-by: Ryusuke Konishi Signed-off-by: Yoshiji Amagai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/dir.c | 711 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 711 insertions(+) create mode 100644 fs/nilfs2/dir.c (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c new file mode 100644 index 00000000000..1b7e6ddabbe --- /dev/null +++ b/fs/nilfs2/dir.c @@ -0,0 +1,711 @@ +/* + * dir.c - NILFS directory entry operations + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Modified for NILFS by Amagai Yoshiji + */ +/* + * linux/fs/ext2/dir.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/dir.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2 directory handling functions + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + * + * All code that works with directory layout had been switched to pagecache + * and moved here. AV + */ + +#include +#include +#include "nilfs.h" +#include "page.h" + +/* + * nilfs uses block-sized chunks. Arguably, sector-sized ones would be + * more robust, but we have what we have + */ +static inline unsigned nilfs_chunk_size(struct inode *inode) +{ + return inode->i_sb->s_blocksize; +} + +static inline void nilfs_put_page(struct page *page) +{ + kunmap(page); + page_cache_release(page); +} + +static inline unsigned long dir_pages(struct inode *inode) +{ + return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; +} + +/* + * Return the offset into page `page_nr' of the last valid + * byte in that page, plus one. + */ +static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr) +{ + unsigned last_byte = inode->i_size; + + last_byte -= page_nr << PAGE_CACHE_SHIFT; + if (last_byte > PAGE_CACHE_SIZE) + last_byte = PAGE_CACHE_SIZE; + return last_byte; +} + +static int nilfs_prepare_chunk_uninterruptible(struct page *page, + struct address_space *mapping, + unsigned from, unsigned to) +{ + loff_t pos = page_offset(page) + from; + return block_write_begin(NULL, mapping, pos, to - from, + AOP_FLAG_UNINTERRUPTIBLE, &page, + NULL, nilfs_get_block); +} + +static int nilfs_prepare_chunk(struct page *page, + struct address_space *mapping, + unsigned from, unsigned to) +{ + loff_t pos = page_offset(page) + from; + return block_write_begin(NULL, mapping, pos, to - from, 0, &page, + NULL, nilfs_get_block); +} + +static int nilfs_commit_chunk(struct page *page, + struct address_space *mapping, + unsigned from, unsigned to) +{ + struct inode *dir = mapping->host; + struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb); + loff_t pos = page_offset(page) + from; + unsigned len = to - from; + unsigned nr_dirty, copied; + int err; + + nr_dirty = nilfs_page_count_clean_buffers(page, from, to); + copied = block_write_end(NULL, mapping, pos, len, len, page, NULL); + if (pos + copied > dir->i_size) { + i_size_write(dir, pos + copied); + mark_inode_dirty(dir); + } + if (IS_DIRSYNC(dir)) + nilfs_set_transaction_flag(NILFS_TI_SYNC); + err = nilfs_set_file_dirty(sbi, dir, nr_dirty); + unlock_page(page); + return err; +} + +static void nilfs_check_page(struct page *page) +{ + struct inode *dir = page->mapping->host; + struct super_block *sb = dir->i_sb; + unsigned chunk_size = nilfs_chunk_size(dir); + char *kaddr = page_address(page); + unsigned offs, rec_len; + unsigned limit = PAGE_CACHE_SIZE; + struct nilfs_dir_entry *p; + char *error; + + if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { + limit = dir->i_size & ~PAGE_CACHE_MASK; + if (limit & (chunk_size - 1)) + goto Ebadsize; + if (!limit) + goto out; + } + for (offs = 0; offs <= limit - NILFS_DIR_REC_LEN(1); offs += rec_len) { + p = (struct nilfs_dir_entry *)(kaddr + offs); + rec_len = le16_to_cpu(p->rec_len); + + if (rec_len < NILFS_DIR_REC_LEN(1)) + goto Eshort; + if (rec_len & 3) + goto Ealign; + if (rec_len < NILFS_DIR_REC_LEN(p->name_len)) + goto Enamelen; + if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) + goto Espan; + } + if (offs != limit) + goto Eend; +out: + SetPageChecked(page); + return; + + /* Too bad, we had an error */ + +Ebadsize: + nilfs_error(sb, "nilfs_check_page", + "size of directory #%lu is not a multiple of chunk size", + dir->i_ino + ); + goto fail; +Eshort: + error = "rec_len is smaller than minimal"; + goto bad_entry; +Ealign: + error = "unaligned directory entry"; + goto bad_entry; +Enamelen: + error = "rec_len is too small for name_len"; + goto bad_entry; +Espan: + error = "directory entry across blocks"; +bad_entry: + nilfs_error(sb, "nilfs_check_page", "bad entry in directory #%lu: %s - " + "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", + dir->i_ino, error, (page->index<inode), + rec_len, p->name_len); + goto fail; +Eend: + p = (struct nilfs_dir_entry *)(kaddr + offs); + nilfs_error(sb, "nilfs_check_page", + "entry in directory #%lu spans the page boundary" + "offset=%lu, inode=%lu", + dir->i_ino, (page->index<inode)); +fail: + SetPageChecked(page); + SetPageError(page); +} + +static struct page *nilfs_get_page(struct inode *dir, unsigned long n) +{ + struct address_space *mapping = dir->i_mapping; + struct page *page = read_cache_page(mapping, n, + (filler_t *)mapping->a_ops->readpage, NULL); + if (!IS_ERR(page)) { + wait_on_page_locked(page); + kmap(page); + if (!PageUptodate(page)) + goto fail; + if (!PageChecked(page)) + nilfs_check_page(page); + if (PageError(page)) + goto fail; + } + return page; + +fail: + nilfs_put_page(page); + return ERR_PTR(-EIO); +} + +/* + * NOTE! unlike strncmp, nilfs_match returns 1 for success, 0 for failure. + * + * len <= NILFS_NAME_LEN and de != NULL are guaranteed by caller. + */ +static int +nilfs_match(int len, const char * const name, struct nilfs_dir_entry *de) +{ + if (len != de->name_len) + return 0; + if (!de->inode) + return 0; + return !memcmp(name, de->name, len); +} + +/* + * p is at least 6 bytes before the end of page + */ +static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) +{ + return (struct nilfs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len)); +} + +static unsigned char +nilfs_filetype_table[NILFS_FT_MAX] = { + [NILFS_FT_UNKNOWN] = DT_UNKNOWN, + [NILFS_FT_REG_FILE] = DT_REG, + [NILFS_FT_DIR] = DT_DIR, + [NILFS_FT_CHRDEV] = DT_CHR, + [NILFS_FT_BLKDEV] = DT_BLK, + [NILFS_FT_FIFO] = DT_FIFO, + [NILFS_FT_SOCK] = DT_SOCK, + [NILFS_FT_SYMLINK] = DT_LNK, +}; + +#define S_SHIFT 12 +static unsigned char +nilfs_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, + [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK, + [S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK, +}; + +static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) +{ + mode_t mode = inode->i_mode; + + de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; +} + +static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + loff_t pos = filp->f_pos; + struct inode *inode = filp->f_dentry->d_inode; + struct super_block *sb = inode->i_sb; + unsigned int offset = pos & ~PAGE_CACHE_MASK; + unsigned long n = pos >> PAGE_CACHE_SHIFT; + unsigned long npages = dir_pages(inode); +/* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */ + unsigned char *types = NULL; + int ret; + + if (pos > inode->i_size - NILFS_DIR_REC_LEN(1)) + goto success; + + types = nilfs_filetype_table; + + for ( ; n < npages; n++, offset = 0) { + char *kaddr, *limit; + struct nilfs_dir_entry *de; + struct page *page = nilfs_get_page(inode, n); + + if (IS_ERR(page)) { + nilfs_error(sb, __func__, "bad page in #%lu", + inode->i_ino); + filp->f_pos += PAGE_CACHE_SIZE - offset; + ret = -EIO; + goto done; + } + kaddr = page_address(page); + de = (struct nilfs_dir_entry *)(kaddr + offset); + limit = kaddr + nilfs_last_byte(inode, n) - + NILFS_DIR_REC_LEN(1); + for ( ; (char *)de <= limit; de = nilfs_next_entry(de)) { + if (de->rec_len == 0) { + nilfs_error(sb, __func__, + "zero-length directory entry"); + ret = -EIO; + nilfs_put_page(page); + goto done; + } + if (de->inode) { + int over; + unsigned char d_type = DT_UNKNOWN; + + if (types && de->file_type < NILFS_FT_MAX) + d_type = types[de->file_type]; + + offset = (char *)de - kaddr; + over = filldir(dirent, de->name, de->name_len, + (n<inode), d_type); + if (over) { + nilfs_put_page(page); + goto success; + } + } + filp->f_pos += le16_to_cpu(de->rec_len); + } + nilfs_put_page(page); + } + +success: + ret = 0; +done: + return ret; +} + +/* + * nilfs_find_entry() + * + * finds an entry in the specified directory with the wanted name. It + * returns the page in which the entry was found, and the entry itself + * (as a parameter - res_dir). Page is returned mapped and unlocked. + * Entry is guaranteed to be valid. + */ +struct nilfs_dir_entry * +nilfs_find_entry(struct inode *dir, struct dentry *dentry, + struct page **res_page) +{ + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + unsigned reclen = NILFS_DIR_REC_LEN(namelen); + unsigned long start, n; + unsigned long npages = dir_pages(dir); + struct page *page = NULL; + struct nilfs_inode_info *ei = NILFS_I(dir); + struct nilfs_dir_entry *de; + + if (npages == 0) + goto out; + + /* OFFSET_CACHE */ + *res_page = NULL; + + start = ei->i_dir_start_lookup; + if (start >= npages) + start = 0; + n = start; + do { + char *kaddr; + page = nilfs_get_page(dir, n); + if (!IS_ERR(page)) { + kaddr = page_address(page); + de = (struct nilfs_dir_entry *)kaddr; + kaddr += nilfs_last_byte(dir, n) - reclen; + while ((char *) de <= kaddr) { + if (de->rec_len == 0) { + nilfs_error(dir->i_sb, __func__, + "zero-length directory entry"); + nilfs_put_page(page); + goto out; + } + if (nilfs_match(namelen, name, de)) + goto found; + de = nilfs_next_entry(de); + } + nilfs_put_page(page); + } + if (++n >= npages) + n = 0; + /* next page is past the blocks we've got */ + if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) { + nilfs_error(dir->i_sb, __func__, + "dir %lu size %lld exceeds block cout %llu", + dir->i_ino, dir->i_size, + (unsigned long long)dir->i_blocks); + goto out; + } + } while (n != start); +out: + return NULL; + +found: + *res_page = page; + ei->i_dir_start_lookup = n; + return de; +} + +struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p) +{ + struct page *page = nilfs_get_page(dir, 0); + struct nilfs_dir_entry *de = NULL; + + if (!IS_ERR(page)) { + de = nilfs_next_entry( + (struct nilfs_dir_entry *)page_address(page)); + *p = page; + } + return de; +} + +ino_t nilfs_inode_by_name(struct inode *dir, struct dentry *dentry) +{ + ino_t res = 0; + struct nilfs_dir_entry *de; + struct page *page; + + de = nilfs_find_entry(dir, dentry, &page); + if (de) { + res = le64_to_cpu(de->inode); + kunmap(page); + page_cache_release(page); + } + return res; +} + +/* Releases the page */ +void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, + struct page *page, struct inode *inode) +{ + unsigned from = (char *) de - (char *) page_address(page); + unsigned to = from + le16_to_cpu(de->rec_len); + struct address_space *mapping = page->mapping; + int err; + + lock_page(page); + err = nilfs_prepare_chunk_uninterruptible(page, mapping, from, to); + BUG_ON(err); + de->inode = cpu_to_le64(inode->i_ino); + nilfs_set_de_type(de, inode); + err = nilfs_commit_chunk(page, mapping, from, to); + nilfs_put_page(page); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; +/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ + mark_inode_dirty(dir); +} + +/* + * Parent is locked. + */ +int nilfs_add_link(struct dentry *dentry, struct inode *inode) +{ + struct inode *dir = dentry->d_parent->d_inode; + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + unsigned chunk_size = nilfs_chunk_size(dir); + unsigned reclen = NILFS_DIR_REC_LEN(namelen); + unsigned short rec_len, name_len; + struct page *page = NULL; + struct nilfs_dir_entry *de; + unsigned long npages = dir_pages(dir); + unsigned long n; + char *kaddr; + unsigned from, to; + int err; + + /* + * We take care of directory expansion in the same loop. + * This code plays outside i_size, so it locks the page + * to protect that region. + */ + for (n = 0; n <= npages; n++) { + char *dir_end; + + page = nilfs_get_page(dir, n); + err = PTR_ERR(page); + if (IS_ERR(page)) + goto out; + lock_page(page); + kaddr = page_address(page); + dir_end = kaddr + nilfs_last_byte(dir, n); + de = (struct nilfs_dir_entry *)kaddr; + kaddr += PAGE_CACHE_SIZE - reclen; + while ((char *)de <= kaddr) { + if ((char *)de == dir_end) { + /* We hit i_size */ + name_len = 0; + rec_len = chunk_size; + de->rec_len = cpu_to_le16(chunk_size); + de->inode = 0; + goto got_it; + } + if (de->rec_len == 0) { + nilfs_error(dir->i_sb, __func__, + "zero-length directory entry"); + err = -EIO; + goto out_unlock; + } + err = -EEXIST; + if (nilfs_match(namelen, name, de)) + goto out_unlock; + name_len = NILFS_DIR_REC_LEN(de->name_len); + rec_len = le16_to_cpu(de->rec_len); + if (!de->inode && rec_len >= reclen) + goto got_it; + if (rec_len >= name_len + reclen) + goto got_it; + de = (struct nilfs_dir_entry *)((char *)de + rec_len); + } + unlock_page(page); + nilfs_put_page(page); + } + BUG(); + return -EINVAL; + +got_it: + from = (char *)de - (char *)page_address(page); + to = from + rec_len; + err = nilfs_prepare_chunk(page, page->mapping, from, to); + if (err) + goto out_unlock; + if (de->inode) { + struct nilfs_dir_entry *de1; + + de1 = (struct nilfs_dir_entry *)((char *)de + name_len); + de1->rec_len = cpu_to_le16(rec_len - name_len); + de->rec_len = cpu_to_le16(name_len); + de = de1; + } + de->name_len = namelen; + memcpy(de->name, name, namelen); + de->inode = cpu_to_le64(inode->i_ino); + nilfs_set_de_type(de, inode); + err = nilfs_commit_chunk(page, page->mapping, from, to); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; +/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ + mark_inode_dirty(dir); + /* OFFSET_CACHE */ +out_put: + nilfs_put_page(page); +out: + return err; +out_unlock: + unlock_page(page); + goto out_put; +} + +/* + * nilfs_delete_entry deletes a directory entry by merging it with the + * previous entry. Page is up-to-date. Releases the page. + */ +int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + char *kaddr = page_address(page); + unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1); + unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len); + struct nilfs_dir_entry *pde = NULL; + struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from); + int err; + + while ((char *)de < (char *)dir) { + if (de->rec_len == 0) { + nilfs_error(inode->i_sb, __func__, + "zero-length directory entry"); + err = -EIO; + goto out; + } + pde = de; + de = nilfs_next_entry(de); + } + if (pde) + from = (char *)pde - (char *)page_address(page); + lock_page(page); + err = nilfs_prepare_chunk(page, mapping, from, to); + BUG_ON(err); + if (pde) + pde->rec_len = cpu_to_le16(to - from); + dir->inode = 0; + err = nilfs_commit_chunk(page, mapping, from, to); + inode->i_ctime = inode->i_mtime = CURRENT_TIME; +/* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */ + mark_inode_dirty(inode); +out: + nilfs_put_page(page); + return err; +} + +/* + * Set the first fragment of directory. + */ +int nilfs_make_empty(struct inode *inode, struct inode *parent) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page = grab_cache_page(mapping, 0); + unsigned chunk_size = nilfs_chunk_size(inode); + struct nilfs_dir_entry *de; + int err; + void *kaddr; + + if (!page) + return -ENOMEM; + + err = nilfs_prepare_chunk(page, mapping, 0, chunk_size); + if (unlikely(err)) { + unlock_page(page); + goto fail; + } + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr, 0, chunk_size); + de = (struct nilfs_dir_entry *)kaddr; + de->name_len = 1; + de->rec_len = cpu_to_le16(NILFS_DIR_REC_LEN(1)); + memcpy(de->name, ".\0\0", 4); + de->inode = cpu_to_le64(inode->i_ino); + nilfs_set_de_type(de, inode); + + de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); + de->name_len = 2; + de->rec_len = cpu_to_le16(chunk_size - NILFS_DIR_REC_LEN(1)); + de->inode = cpu_to_le64(parent->i_ino); + memcpy(de->name, "..\0", 4); + nilfs_set_de_type(de, inode); + kunmap_atomic(kaddr, KM_USER0); + err = nilfs_commit_chunk(page, mapping, 0, chunk_size); +fail: + page_cache_release(page); + return err; +} + +/* + * routine to check that the specified directory is empty (for rmdir) + */ +int nilfs_empty_dir(struct inode *inode) +{ + struct page *page = NULL; + unsigned long i, npages = dir_pages(inode); + + for (i = 0; i < npages; i++) { + char *kaddr; + struct nilfs_dir_entry *de; + + page = nilfs_get_page(inode, i); + if (IS_ERR(page)) + continue; + + kaddr = page_address(page); + de = (struct nilfs_dir_entry *)kaddr; + kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1); + + while ((char *)de <= kaddr) { + if (de->rec_len == 0) { + nilfs_error(inode->i_sb, __func__, + "zero-length directory entry " + "(kaddr=%p, de=%p)\n", kaddr, de); + goto not_empty; + } + if (de->inode != 0) { + /* check for . and .. */ + if (de->name[0] != '.') + goto not_empty; + if (de->name_len > 2) + goto not_empty; + if (de->name_len < 2) { + if (de->inode != + cpu_to_le64(inode->i_ino)) + goto not_empty; + } else if (de->name[1] != '.') + goto not_empty; + } + de = nilfs_next_entry(de); + } + nilfs_put_page(page); + } + return 1; + +not_empty: + nilfs_put_page(page); + return 0; +} + +struct file_operations nilfs_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = nilfs_readdir, + .ioctl = nilfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = nilfs_compat_ioctl, +#endif /* CONFIG_COMPAT */ + .fsync = nilfs_sync_file, + +}; -- cgit v1.2.3 From d25006523d0b9e49fd097b2e974e7c8c05bd7f54 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:34 -0700 Subject: nilfs2: pathname operations This adds pathname operations, most of which comes from the ext2 file system. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/namei.c | 446 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 446 insertions(+) create mode 100644 fs/nilfs2/namei.c (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c new file mode 100644 index 00000000000..95d1b29bff3 --- /dev/null +++ b/fs/nilfs2/namei.c @@ -0,0 +1,446 @@ +/* + * namei.c - NILFS pathname lookup operations. + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Modified for NILFS by Amagai Yoshiji , + * Ryusuke Konishi + */ +/* + * linux/fs/ext2/namei.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/namei.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + */ + +#include +#include "nilfs.h" + + +static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) +{ + int err = nilfs_add_link(dentry, inode); + if (!err) { + d_instantiate(dentry, inode); + return 0; + } + inode_dec_link_count(inode); + iput(inode); + return err; +} + +/* + * Methods themselves. + */ + +static struct dentry * +nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode; + ino_t ino; + + if (dentry->d_name.len > NILFS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + ino = nilfs_inode_by_name(dir, dentry); + inode = NULL; + if (ino) { + inode = nilfs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + } + return d_splice_alias(inode, dentry); +} + +struct dentry *nilfs_get_parent(struct dentry *child) +{ + unsigned long ino; + struct inode *inode; + struct dentry dotdot; + + dotdot.d_name.name = ".."; + dotdot.d_name.len = 2; + + ino = nilfs_inode_by_name(child->d_inode, &dotdot); + if (!ino) + return ERR_PTR(-ENOENT); + + inode = nilfs_iget(child->d_inode->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + return d_obtain_alias(inode); +} + +/* + * By the time this is called, we already have created + * the directory cache entry for the new file, but it + * is so far negative - it has no inode. + * + * If the create succeeds, we fill in the inode information + * with d_instantiate(). + */ +static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct inode *inode; + struct nilfs_transaction_info ti; + int err, err2; + + err = nilfs_transaction_begin(dir->i_sb, &ti, 1); + if (err) + return err; + inode = nilfs_new_inode(dir, mode); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + inode->i_op = &nilfs_file_inode_operations; + inode->i_fop = &nilfs_file_operations; + inode->i_mapping->a_ops = &nilfs_aops; + mark_inode_dirty(inode); + err = nilfs_add_nondir(dentry, inode); + } + err2 = nilfs_transaction_end(dir->i_sb, !err); + return err ? : err2; +} + +static int +nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) +{ + struct inode *inode; + struct nilfs_transaction_info ti; + int err, err2; + + if (!new_valid_dev(rdev)) + return -EINVAL; + + err = nilfs_transaction_begin(dir->i_sb, &ti, 1); + if (err) + return err; + inode = nilfs_new_inode(dir, mode); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + init_special_inode(inode, inode->i_mode, rdev); + mark_inode_dirty(inode); + err = nilfs_add_nondir(dentry, inode); + } + err2 = nilfs_transaction_end(dir->i_sb, !err); + return err ? : err2; +} + +static int nilfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct nilfs_transaction_info ti; + struct super_block *sb = dir->i_sb; + unsigned l = strlen(symname)+1; + struct inode *inode; + int err, err2; + + if (l > sb->s_blocksize) + return -ENAMETOOLONG; + + err = nilfs_transaction_begin(dir->i_sb, &ti, 1); + if (err) + return err; + + inode = nilfs_new_inode(dir, S_IFLNK | S_IRWXUGO); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out; + + /* slow symlink */ + inode->i_op = &nilfs_symlink_inode_operations; + inode->i_mapping->a_ops = &nilfs_aops; + err = page_symlink(inode, symname, l); + if (err) + goto out_fail; + + /* mark_inode_dirty(inode); */ + /* nilfs_new_inode() and page_symlink() do this */ + + err = nilfs_add_nondir(dentry, inode); +out: + err2 = nilfs_transaction_end(dir->i_sb, !err); + return err ? : err2; + +out_fail: + inode_dec_link_count(inode); + iput(inode); + goto out; +} + +static int nilfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + struct nilfs_transaction_info ti; + int err, err2; + + if (inode->i_nlink >= NILFS_LINK_MAX) + return -EMLINK; + + err = nilfs_transaction_begin(dir->i_sb, &ti, 1); + if (err) + return err; + + inode->i_ctime = CURRENT_TIME; + inode_inc_link_count(inode); + atomic_inc(&inode->i_count); + + err = nilfs_add_nondir(dentry, inode); + err2 = nilfs_transaction_end(dir->i_sb, !err); + return err ? : err2; +} + +static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct inode *inode; + struct nilfs_transaction_info ti; + int err, err2; + + if (dir->i_nlink >= NILFS_LINK_MAX) + return -EMLINK; + + err = nilfs_transaction_begin(dir->i_sb, &ti, 1); + if (err) + return err; + + inode_inc_link_count(dir); + + inode = nilfs_new_inode(dir, S_IFDIR | mode); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_dir; + + inode->i_op = &nilfs_dir_inode_operations; + inode->i_fop = &nilfs_dir_operations; + inode->i_mapping->a_ops = &nilfs_aops; + + inode_inc_link_count(inode); + + err = nilfs_make_empty(inode, dir); + if (err) + goto out_fail; + + err = nilfs_add_link(dentry, inode); + if (err) + goto out_fail; + + d_instantiate(dentry, inode); +out: + err2 = nilfs_transaction_end(dir->i_sb, !err); + return err ? : err2; + +out_fail: + inode_dec_link_count(inode); + inode_dec_link_count(inode); + iput(inode); +out_dir: + inode_dec_link_count(dir); + goto out; +} + +static int nilfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode; + struct nilfs_dir_entry *de; + struct page *page; + struct nilfs_transaction_info ti; + int err, err2; + + err = nilfs_transaction_begin(dir->i_sb, &ti, 0); + if (err) + return err; + + err = -ENOENT; + de = nilfs_find_entry(dir, dentry, &page); + if (!de) + goto out; + + inode = dentry->d_inode; + err = -EIO; + if (le64_to_cpu(de->inode) != inode->i_ino) + goto out; + + if (!inode->i_nlink) { + nilfs_warning(inode->i_sb, __func__, + "deleting nonexistent file (%lu), %d\n", + inode->i_ino, inode->i_nlink); + inode->i_nlink = 1; + } + err = nilfs_delete_entry(de, page); + if (err) + goto out; + + inode->i_ctime = dir->i_ctime; + inode_dec_link_count(inode); + err = 0; +out: + err2 = nilfs_transaction_end(dir->i_sb, !err); + return err ? : err2; +} + +static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct nilfs_transaction_info ti; + int err, err2; + + err = nilfs_transaction_begin(dir->i_sb, &ti, 0); + if (err) + return err; + + err = -ENOTEMPTY; + if (nilfs_empty_dir(inode)) { + err = nilfs_unlink(dir, dentry); + if (!err) { + inode->i_size = 0; + inode_dec_link_count(inode); + inode_dec_link_count(dir); + } + } + err2 = nilfs_transaction_end(dir->i_sb, !err); + return err ? : err2; +} + +static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + struct page *dir_page = NULL; + struct nilfs_dir_entry *dir_de = NULL; + struct page *old_page; + struct nilfs_dir_entry *old_de; + struct nilfs_transaction_info ti; + int err; + + err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); + if (unlikely(err)) + return err; + + err = -ENOENT; + old_de = nilfs_find_entry(old_dir, old_dentry, &old_page); + if (!old_de) + goto out; + + if (S_ISDIR(old_inode->i_mode)) { + err = -EIO; + dir_de = nilfs_dotdot(old_inode, &dir_page); + if (!dir_de) + goto out_old; + } + + if (new_inode) { + struct page *new_page; + struct nilfs_dir_entry *new_de; + + err = -ENOTEMPTY; + if (dir_de && !nilfs_empty_dir(new_inode)) + goto out_dir; + + err = -ENOENT; + new_de = nilfs_find_entry(new_dir, new_dentry, &new_page); + if (!new_de) + goto out_dir; + inode_inc_link_count(old_inode); + nilfs_set_link(new_dir, new_de, new_page, old_inode); + new_inode->i_ctime = CURRENT_TIME; + if (dir_de) + drop_nlink(new_inode); + inode_dec_link_count(new_inode); + } else { + if (dir_de) { + err = -EMLINK; + if (new_dir->i_nlink >= NILFS_LINK_MAX) + goto out_dir; + } + inode_inc_link_count(old_inode); + err = nilfs_add_link(new_dentry, old_inode); + if (err) { + inode_dec_link_count(old_inode); + goto out_dir; + } + if (dir_de) + inode_inc_link_count(new_dir); + } + + /* + * Like most other Unix systems, set the ctime for inodes on a + * rename. + * inode_dec_link_count() will mark the inode dirty. + */ + old_inode->i_ctime = CURRENT_TIME; + + nilfs_delete_entry(old_de, old_page); + inode_dec_link_count(old_inode); + + if (dir_de) { + nilfs_set_link(old_inode, dir_de, dir_page, new_dir); + inode_dec_link_count(old_dir); + } + + err = nilfs_transaction_end(old_dir->i_sb, 1); + return err; + +out_dir: + if (dir_de) { + kunmap(dir_page); + page_cache_release(dir_page); + } +out_old: + kunmap(old_page); + page_cache_release(old_page); +out: + nilfs_transaction_end(old_dir->i_sb, 0); + return err; +} + +struct inode_operations nilfs_dir_inode_operations = { + .create = nilfs_create, + .lookup = nilfs_lookup, + .link = nilfs_link, + .unlink = nilfs_unlink, + .symlink = nilfs_symlink, + .mkdir = nilfs_mkdir, + .rmdir = nilfs_rmdir, + .mknod = nilfs_mknod, + .rename = nilfs_rename, + .setattr = nilfs_setattr, + .permission = nilfs_permission, +}; + +struct inode_operations nilfs_special_inode_operations = { + .setattr = nilfs_setattr, + .permission = nilfs_permission, +}; + +struct inode_operations nilfs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, +}; -- cgit v1.2.3 From 8a9d2191e9f43bbcd256a9a6871bd73434c83f2f Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:35 -0700 Subject: nilfs2: operations for the_nilfs core object This adds functions on the_nilfs object, which keeps shared resources and states among a read/write mount and snapshots mounts going individually. the_nilfs is allocated per block device; it is created when user first mount a snapshot or a read/write mount on the device, then it is reused for successive mounts. It will be freed when all mount instances on the device are detached. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/the_nilfs.c | 524 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 524 insertions(+) create mode 100644 fs/nilfs2/the_nilfs.c (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c new file mode 100644 index 00000000000..852e0bf3a3c --- /dev/null +++ b/fs/nilfs2/the_nilfs.c @@ -0,0 +1,524 @@ +/* + * the_nilfs.c - the_nilfs shared structure. + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + * + */ + +#include +#include +#include +#include +#include "nilfs.h" +#include "segment.h" +#include "alloc.h" +#include "cpfile.h" +#include "sufile.h" +#include "dat.h" +#include "seglist.h" +#include "segbuf.h" + +void nilfs_set_last_segment(struct the_nilfs *nilfs, + sector_t start_blocknr, u64 seq, __u64 cno) +{ + spin_lock(&nilfs->ns_last_segment_lock); + nilfs->ns_last_pseg = start_blocknr; + nilfs->ns_last_seq = seq; + nilfs->ns_last_cno = cno; + spin_unlock(&nilfs->ns_last_segment_lock); +} + +/** + * alloc_nilfs - allocate the_nilfs structure + * @bdev: block device to which the_nilfs is related + * + * alloc_nilfs() allocates memory for the_nilfs and + * initializes its reference count and locks. + * + * Return Value: On success, pointer to the_nilfs is returned. + * On error, NULL is returned. + */ +struct the_nilfs *alloc_nilfs(struct block_device *bdev) +{ + struct the_nilfs *nilfs; + + nilfs = kzalloc(sizeof(*nilfs), GFP_KERNEL); + if (!nilfs) + return NULL; + + nilfs->ns_bdev = bdev; + atomic_set(&nilfs->ns_count, 1); + atomic_set(&nilfs->ns_writer_refcount, -1); + atomic_set(&nilfs->ns_ndirtyblks, 0); + init_rwsem(&nilfs->ns_sem); + mutex_init(&nilfs->ns_writer_mutex); + INIT_LIST_HEAD(&nilfs->ns_supers); + spin_lock_init(&nilfs->ns_last_segment_lock); + nilfs->ns_gc_inodes_h = NULL; + INIT_LIST_HEAD(&nilfs->ns_used_segments); + init_rwsem(&nilfs->ns_segctor_sem); + init_waitqueue_head(&nilfs->ns_cleanerd_wq); + + return nilfs; +} + +/** + * put_nilfs - release a reference to the_nilfs + * @nilfs: the_nilfs structure to be released + * + * put_nilfs() decrements a reference counter of the_nilfs. + * If the reference count reaches zero, the_nilfs is freed. + */ +void put_nilfs(struct the_nilfs *nilfs) +{ + if (!atomic_dec_and_test(&nilfs->ns_count)) + return; + /* + * Increment of ns_count never occur below because the caller + * of get_nilfs() holds at least one reference to the_nilfs. + * Thus its exclusion control is not required here. + */ + might_sleep(); + if (nilfs_loaded(nilfs)) { + nilfs_dispose_used_segments(nilfs); + nilfs_mdt_clear(nilfs->ns_sufile); + nilfs_mdt_destroy(nilfs->ns_sufile); + nilfs_mdt_clear(nilfs->ns_cpfile); + nilfs_mdt_destroy(nilfs->ns_cpfile); + nilfs_mdt_clear(nilfs->ns_dat); + nilfs_mdt_destroy(nilfs->ns_dat); + /* XXX: how and when to clear nilfs->ns_gc_dat? */ + nilfs_mdt_destroy(nilfs->ns_gc_dat); + } + if (nilfs_init(nilfs)) { + nilfs_destroy_gccache(nilfs); + brelse(nilfs->ns_sbh); + } + kfree(nilfs); +} + +static int nilfs_load_super_root(struct the_nilfs *nilfs, + struct nilfs_sb_info *sbi, sector_t sr_block) +{ + struct buffer_head *bh_sr; + struct nilfs_super_root *raw_sr; + unsigned dat_entry_size, segment_usage_size, checkpoint_size; + unsigned inode_size; + int err; + + err = nilfs_read_super_root_block(sbi->s_super, sr_block, &bh_sr, 1); + if (unlikely(err)) + return err; + + down_read(&nilfs->ns_sem); + dat_entry_size = le16_to_cpu(nilfs->ns_sbp->s_dat_entry_size); + checkpoint_size = le16_to_cpu(nilfs->ns_sbp->s_checkpoint_size); + segment_usage_size = le16_to_cpu(nilfs->ns_sbp->s_segment_usage_size); + up_read(&nilfs->ns_sem); + + inode_size = nilfs->ns_inode_size; + + err = -ENOMEM; + nilfs->ns_dat = nilfs_mdt_new( + nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP); + if (unlikely(!nilfs->ns_dat)) + goto failed; + + nilfs->ns_gc_dat = nilfs_mdt_new( + nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP); + if (unlikely(!nilfs->ns_gc_dat)) + goto failed_dat; + + nilfs->ns_cpfile = nilfs_mdt_new( + nilfs, NULL, NILFS_CPFILE_INO, NILFS_CPFILE_GFP); + if (unlikely(!nilfs->ns_cpfile)) + goto failed_gc_dat; + + nilfs->ns_sufile = nilfs_mdt_new( + nilfs, NULL, NILFS_SUFILE_INO, NILFS_SUFILE_GFP); + if (unlikely(!nilfs->ns_sufile)) + goto failed_cpfile; + + err = nilfs_palloc_init_blockgroup(nilfs->ns_dat, dat_entry_size); + if (unlikely(err)) + goto failed_sufile; + + err = nilfs_palloc_init_blockgroup(nilfs->ns_gc_dat, dat_entry_size); + if (unlikely(err)) + goto failed_sufile; + + nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); + nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size, + sizeof(struct nilfs_cpfile_header)); + nilfs_mdt_set_entry_size(nilfs->ns_sufile, segment_usage_size, + sizeof(struct nilfs_sufile_header)); + + err = nilfs_mdt_read_inode_direct( + nilfs->ns_dat, bh_sr, NILFS_SR_DAT_OFFSET(inode_size)); + if (unlikely(err)) + goto failed_sufile; + + err = nilfs_mdt_read_inode_direct( + nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(inode_size)); + if (unlikely(err)) + goto failed_sufile; + + err = nilfs_mdt_read_inode_direct( + nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(inode_size)); + if (unlikely(err)) + goto failed_sufile; + + raw_sr = (struct nilfs_super_root *)bh_sr->b_data; + nilfs->ns_nongc_ctime = le64_to_cpu(raw_sr->sr_nongc_ctime); + + failed: + brelse(bh_sr); + return err; + + failed_sufile: + nilfs_mdt_destroy(nilfs->ns_sufile); + + failed_cpfile: + nilfs_mdt_destroy(nilfs->ns_cpfile); + + failed_gc_dat: + nilfs_mdt_destroy(nilfs->ns_gc_dat); + + failed_dat: + nilfs_mdt_destroy(nilfs->ns_dat); + goto failed; +} + +static void nilfs_init_recovery_info(struct nilfs_recovery_info *ri) +{ + memset(ri, 0, sizeof(*ri)); + INIT_LIST_HEAD(&ri->ri_used_segments); +} + +static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri) +{ + nilfs_dispose_segment_list(&ri->ri_used_segments); +} + +/** + * load_nilfs - load and recover the nilfs + * @nilfs: the_nilfs structure to be released + * @sbi: nilfs_sb_info used to recover past segment + * + * load_nilfs() searches and load the latest super root, + * attaches the last segment, and does recovery if needed. + * The caller must call this exclusively for simultaneous mounts. + */ +int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) +{ + struct nilfs_recovery_info ri; + unsigned int s_flags = sbi->s_super->s_flags; + int really_read_only = bdev_read_only(nilfs->ns_bdev); + unsigned valid_fs; + int err = 0; + + nilfs_init_recovery_info(&ri); + + down_write(&nilfs->ns_sem); + valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); + up_write(&nilfs->ns_sem); + + if (!valid_fs && (s_flags & MS_RDONLY)) { + printk(KERN_INFO "NILFS: INFO: recovery " + "required for readonly filesystem.\n"); + if (really_read_only) { + printk(KERN_ERR "NILFS: write access " + "unavailable, cannot proceed.\n"); + err = -EROFS; + goto failed; + } + printk(KERN_INFO "NILFS: write access will " + "be enabled during recovery.\n"); + sbi->s_super->s_flags &= ~MS_RDONLY; + } + + err = nilfs_search_super_root(nilfs, sbi, &ri); + if (unlikely(err)) { + printk(KERN_ERR "NILFS: error searching super root.\n"); + goto failed; + } + + err = nilfs_load_super_root(nilfs, sbi, ri.ri_super_root); + if (unlikely(err)) { + printk(KERN_ERR "NILFS: error loading super root.\n"); + goto failed; + } + + if (!valid_fs) { + err = nilfs_recover_logical_segments(nilfs, sbi, &ri); + if (unlikely(err)) { + nilfs_mdt_destroy(nilfs->ns_cpfile); + nilfs_mdt_destroy(nilfs->ns_sufile); + nilfs_mdt_destroy(nilfs->ns_dat); + goto failed; + } + if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED) { + down_write(&nilfs->ns_sem); + nilfs_update_last_segment(sbi, 0); + up_write(&nilfs->ns_sem); + } + } + + set_nilfs_loaded(nilfs); + + failed: + nilfs_clear_recovery_info(&ri); + sbi->s_super->s_flags = s_flags; + return err; +} + +static unsigned long long nilfs_max_size(unsigned int blkbits) +{ + unsigned int max_bits; + unsigned long long res = MAX_LFS_FILESIZE; /* page cache limit */ + + max_bits = blkbits + NILFS_BMAP_KEY_BIT; /* bmap size limit */ + if (max_bits < 64) + res = min_t(unsigned long long, res, (1ULL << max_bits) - 1); + return res; +} + +static int +nilfs_store_disk_layout(struct the_nilfs *nilfs, struct super_block *sb, + struct nilfs_super_block *sbp) +{ + if (le32_to_cpu(sbp->s_rev_level) != NILFS_CURRENT_REV) { + printk(KERN_ERR "NILFS: revision mismatch " + "(superblock rev.=%d.%d, current rev.=%d.%d). " + "Please check the version of mkfs.nilfs.\n", + le32_to_cpu(sbp->s_rev_level), + le16_to_cpu(sbp->s_minor_rev_level), + NILFS_CURRENT_REV, NILFS_MINOR_REV); + return -EINVAL; + } + nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); + nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); + + nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); + if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { + printk(KERN_ERR "NILFS: too short segment. \n"); + return -EINVAL; + } + + nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); + nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments); + nilfs->ns_r_segments_percentage = + le32_to_cpu(sbp->s_r_segments_percentage); + nilfs->ns_nrsvsegs = + max_t(unsigned long, NILFS_MIN_NRSVSEGS, + DIV_ROUND_UP(nilfs->ns_nsegments * + nilfs->ns_r_segments_percentage, 100)); + nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); + return 0; +} + +/** + * init_nilfs - initialize a NILFS instance. + * @nilfs: the_nilfs structure + * @sbi: nilfs_sb_info + * @sb: super block + * @data: mount options + * + * init_nilfs() performs common initialization per block device (e.g. + * reading the super block, getting disk layout information, initializing + * shared fields in the_nilfs). It takes on some portion of the jobs + * typically done by a fill_super() routine. This division arises from + * the nature that multiple NILFS instances may be simultaneously + * mounted on a device. + * For multiple mounts on the same device, only the first mount + * invokes these tasks. + * + * Return Value: On success, 0 is returned. On error, a negative error + * code is returned. + */ +int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) +{ + struct super_block *sb = sbi->s_super; + struct buffer_head *sbh; + struct nilfs_super_block *sbp; + struct backing_dev_info *bdi; + int blocksize; + int err = 0; + + down_write(&nilfs->ns_sem); + if (nilfs_init(nilfs)) { + /* Load values from existing the_nilfs */ + sbp = nilfs->ns_sbp; + err = nilfs_store_magic_and_option(sb, sbp, data); + if (err) + goto out; + + blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); + if (sb->s_blocksize != blocksize && + !sb_set_blocksize(sb, blocksize)) { + printk(KERN_ERR "NILFS: blocksize %d unfit to device\n", + blocksize); + err = -EINVAL; + } + sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); + goto out; + } + + sbp = nilfs_load_super_block(sb, &sbh); + if (!sbp) { + err = -EINVAL; + goto out; + } + err = nilfs_store_magic_and_option(sb, sbp, data); + if (err) + goto failed_sbh; + + blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); + if (sb->s_blocksize != blocksize) { + sbp = nilfs_reload_super_block(sb, &sbh, blocksize); + if (!sbp) { + err = -EINVAL; + goto out; + /* not failed_sbh; sbh is released automatically + when reloading fails. */ + } + } + nilfs->ns_blocksize_bits = sb->s_blocksize_bits; + + err = nilfs_store_disk_layout(nilfs, sb, sbp); + if (err) + goto failed_sbh; + + sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); + + nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); + nilfs->ns_sbh = sbh; + nilfs->ns_sbp = sbp; + + bdi = nilfs->ns_bdev->bd_inode_backing_dev_info; + if (!bdi) + bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; + nilfs->ns_bdi = bdi ? : &default_backing_dev_info; + + /* Finding last segment */ + nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg); + nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno); + nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq); + + nilfs->ns_seg_seq = nilfs->ns_last_seq; + nilfs->ns_segnum = + nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg); + nilfs->ns_cno = nilfs->ns_last_cno + 1; + if (nilfs->ns_segnum >= nilfs->ns_nsegments) { + printk(KERN_ERR "NILFS invalid last segment number.\n"); + err = -EINVAL; + goto failed_sbh; + } + /* Dummy values */ + nilfs->ns_free_segments_count = + nilfs->ns_nsegments - (nilfs->ns_segnum + 1); + + /* Initialize gcinode cache */ + err = nilfs_init_gccache(nilfs); + if (err) + goto failed_sbh; + + set_nilfs_init(nilfs); + err = 0; + out: + up_write(&nilfs->ns_sem); + return err; + + failed_sbh: + brelse(sbh); + goto out; +} + +int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) +{ + struct inode *dat = nilfs_dat_inode(nilfs); + unsigned long ncleansegs; + int err; + + down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + err = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile, &ncleansegs); + up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ + if (likely(!err)) + *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; + return err; +} + +void nilfs_dispose_used_segments(struct the_nilfs *nilfs) +{ + struct nilfs_segment_entry *ent, *n; + + /* nilfs->sem must be locked by the caller. */ + if (!nilfs_loaded(nilfs)) + return; + + list_for_each_entry_safe(ent, n, &nilfs->ns_used_segments, list) { + list_del_init(&ent->list); + nilfs_segment_usage_clear_volatile_active(ent->raw_su); + nilfs_close_segment_entry(ent, nilfs->ns_sufile); + nilfs_free_segment_entry(ent); + } +} + +int nilfs_near_disk_full(struct the_nilfs *nilfs) +{ + struct inode *sufile = nilfs->ns_sufile; + unsigned long ncleansegs, nincsegs; + int ret; + + ret = nilfs_sufile_get_ncleansegs(sufile, &ncleansegs); + if (likely(!ret)) { + nincsegs = atomic_read(&nilfs->ns_ndirtyblks) / + nilfs->ns_blocks_per_segment + 1; + if (ncleansegs <= nilfs->ns_nrsvsegs + nincsegs) + ret++; + } + return ret; +} + +int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, + int snapshot_mount) +{ + struct nilfs_sb_info *sbi; + int ret = 0; + + down_read(&nilfs->ns_sem); + if (cno == 0 || cno > nilfs->ns_cno) + goto out_unlock; + + list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { + if (sbi->s_snapshot_cno == cno && + (!snapshot_mount || nilfs_test_opt(sbi, SNAPSHOT))) { + /* exclude read-only mounts */ + ret++; + break; + } + } + /* for protecting recent checkpoints */ + if (cno >= nilfs_last_cno(nilfs)) + ret++; + + out_unlock: + up_read(&nilfs->ns_sem); + return ret; +} -- cgit v1.2.3 From 783f61843e9d14724f5f79ae955a28c27e300467 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:35 -0700 Subject: nilfs2: super block operations This adds super block operations for the nilfs2 file system. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/super.c | 1366 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1366 insertions(+) create mode 100644 fs/nilfs2/super.c (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c new file mode 100644 index 00000000000..10e82c00aed --- /dev/null +++ b/fs/nilfs2/super.c @@ -0,0 +1,1366 @@ +/* + * super.c - NILFS module and super block management. + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + */ +/* + * linux/fs/ext2/super.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nilfs.h" +#include "mdt.h" +#include "alloc.h" +#include "page.h" +#include "cpfile.h" +#include "ifile.h" +#include "dat.h" +#include "segment.h" +#include "segbuf.h" + +MODULE_AUTHOR("NTT Corp."); +MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " + "(NILFS)"); +MODULE_VERSION(NILFS_VERSION); +MODULE_LICENSE("GPL"); + +static int nilfs_remount(struct super_block *sb, int *flags, char *data); +static int test_exclusive_mount(struct file_system_type *fs_type, + struct block_device *bdev, int flags); + +/** + * nilfs_error() - report failure condition on a filesystem + * + * nilfs_error() sets an ERROR_FS flag on the superblock as well as + * reporting an error message. It should be called when NILFS detects + * incoherences or defects of meta data on disk. As for sustainable + * errors such as a single-shot I/O error, nilfs_warning() or the printk() + * function should be used instead. + * + * The segment constructor must not call this function because it can + * kill itself. + */ +void nilfs_error(struct super_block *sb, const char *function, + const char *fmt, ...) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + va_list args; + + va_start(args, fmt); + printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function); + vprintk(fmt, args); + printk("\n"); + va_end(args); + + if (!(sb->s_flags & MS_RDONLY)) { + struct the_nilfs *nilfs = sbi->s_nilfs; + + if (!nilfs_test_opt(sbi, ERRORS_CONT)) + nilfs_detach_segment_constructor(sbi); + + down_write(&nilfs->ns_sem); + if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { + nilfs->ns_mount_state |= NILFS_ERROR_FS; + nilfs->ns_sbp->s_state |= cpu_to_le16(NILFS_ERROR_FS); + nilfs_commit_super(sbi); + } + up_write(&nilfs->ns_sem); + + if (nilfs_test_opt(sbi, ERRORS_RO)) { + printk(KERN_CRIT "Remounting filesystem read-only\n"); + sb->s_flags |= MS_RDONLY; + } + } + + if (nilfs_test_opt(sbi, ERRORS_PANIC)) + panic("NILFS (device %s): panic forced after error\n", + sb->s_id); +} + +void nilfs_warning(struct super_block *sb, const char *function, + const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + printk(KERN_WARNING "NILFS warning (device %s): %s: ", + sb->s_id, function); + vprintk(fmt, args); + printk("\n"); + va_end(args); +} + +static struct kmem_cache *nilfs_inode_cachep; + +struct inode *nilfs_alloc_inode(struct super_block *sb) +{ + struct nilfs_inode_info *ii; + + ii = kmem_cache_alloc(nilfs_inode_cachep, GFP_NOFS); + if (!ii) + return NULL; + ii->i_bh = NULL; + ii->i_state = 0; + ii->vfs_inode.i_version = 1; + nilfs_btnode_cache_init(&ii->i_btnode_cache); + return &ii->vfs_inode; +} + +void nilfs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); +} + +static void init_once(void *obj) +{ + struct nilfs_inode_info *ii = obj; + + INIT_LIST_HEAD(&ii->i_dirty); +#ifdef CONFIG_NILFS_XATTR + init_rwsem(&ii->xattr_sem); +#endif + nilfs_btnode_cache_init_once(&ii->i_btnode_cache); + ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union; + inode_init_once(&ii->vfs_inode); +} + +static int nilfs_init_inode_cache(void) +{ + nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache", + sizeof(struct nilfs_inode_info), + 0, SLAB_RECLAIM_ACCOUNT, + init_once); + + return (nilfs_inode_cachep == NULL) ? -ENOMEM : 0; +} + +static inline void nilfs_destroy_inode_cache(void) +{ + kmem_cache_destroy(nilfs_inode_cachep); +} + +static void nilfs_clear_inode(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_transaction_info ti; + struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); + +#ifdef CONFIG_NILFS_POSIX_ACL + if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) { + posix_acl_release(ii->i_acl); + ii->i_acl = NILFS_ACL_NOT_CACHED; + } + if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) { + posix_acl_release(ii->i_default_acl); + ii->i_default_acl = NILFS_ACL_NOT_CACHED; + } +#endif + /* + * Free resources allocated in nilfs_read_inode(), here. + */ + nilfs_transaction_begin(inode->i_sb, &ti, 0); + + spin_lock(&sbi->s_inode_lock); + if (!list_empty(&ii->i_dirty)) + list_del_init(&ii->i_dirty); + brelse(ii->i_bh); + ii->i_bh = NULL; + spin_unlock(&sbi->s_inode_lock); + + if (test_bit(NILFS_I_BMAP, &ii->i_state)) + nilfs_bmap_clear(ii->i_bmap); + + nilfs_btnode_cache_clear(&ii->i_btnode_cache); + + nilfs_transaction_end(inode->i_sb, 0); +} + +/** + * nilfs_update_last_segment - change pointer to the latest segment + * @sbi: nilfs_sb_info + * @update_cno: flag whether to update checkpoint number. + * + * nilfs_update_last_segment() changes information in the super block + * after a partial segment is written out successfully. The super + * block is marked dirty. It will be written out at the next VFS sync + * operations such as sync_supers() and generic_shutdown_super(). + */ +void nilfs_update_last_segment(struct nilfs_sb_info *sbi, int update_cno) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_super_block *sbp = nilfs->ns_sbp; + + /* nilfs->sem must be locked by the caller. */ + spin_lock(&nilfs->ns_last_segment_lock); + if (update_cno) + nilfs->ns_last_cno = nilfs->ns_cno++; + sbp->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); + sbp->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); + sbp->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); + spin_unlock(&nilfs->ns_last_segment_lock); + + sbi->s_super->s_dirt = 1; /* must be set if delaying the call of + nilfs_commit_super() */ +} + +static int nilfs_sync_super(struct nilfs_sb_info *sbi) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + int err; + int barrier_done = 0; + + if (nilfs_test_opt(sbi, BARRIER)) { + set_buffer_ordered(nilfs->ns_sbh); + barrier_done = 1; + } + retry: + set_buffer_dirty(nilfs->ns_sbh); + err = sync_dirty_buffer(nilfs->ns_sbh); + if (err == -EOPNOTSUPP && barrier_done) { + nilfs_warning(sbi->s_super, __func__, + "barrier-based sync failed. " + "disabling barriers\n"); + nilfs_clear_opt(sbi, BARRIER); + barrier_done = 0; + clear_buffer_ordered(nilfs->ns_sbh); + goto retry; + } + if (unlikely(err)) + printk(KERN_ERR + "NILFS: unable to write superblock (err=%d)\n", err); + else { + nilfs_dispose_used_segments(nilfs); + clear_nilfs_discontinued(nilfs); + } + + return err; +} + +int nilfs_commit_super(struct nilfs_sb_info *sbi) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_super_block *sbp = nilfs->ns_sbp; + sector_t nfreeblocks; + int err; + + /* nilfs->sem must be locked by the caller. */ + err = nilfs_count_free_blocks(nilfs, &nfreeblocks); + if (unlikely(err)) { + printk(KERN_ERR "NILFS: failed to count free blocks\n"); + return err; + } + sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks); + sbp->s_wtime = cpu_to_le64(get_seconds()); + sbp->s_sum = 0; + sbp->s_sum = crc32_le(nilfs->ns_crc_seed, (unsigned char *)sbp, + le16_to_cpu(sbp->s_bytes)); + + sbi->s_super->s_dirt = 0; + return nilfs_sync_super(sbi); +} + +static void nilfs_put_super(struct super_block *sb) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sbi->s_nilfs; + + nilfs_detach_segment_constructor(sbi); + + if (!(sb->s_flags & MS_RDONLY)) { + down_write(&nilfs->ns_sem); + nilfs->ns_sbp->s_state = cpu_to_le16(nilfs->ns_mount_state); + nilfs_commit_super(sbi); + up_write(&nilfs->ns_sem); + } + + nilfs_detach_checkpoint(sbi); + put_nilfs(sbi->s_nilfs); + sbi->s_super = NULL; + sb->s_fs_info = NULL; + kfree(sbi); +} + +/** + * nilfs_write_super - write super block(s) of NILFS + * @sb: super_block + * + * nilfs_write_super() gets a fs-dependent lock, writes super block(s), and + * clears s_dirt. This function is called in the section protected by + * lock_super(). + * + * The s_dirt flag is managed by each filesystem and we protect it by ns_sem + * of the struct the_nilfs. Lock order must be as follows: + * + * 1. lock_super() + * 2. down_write(&nilfs->ns_sem) + * + * Inside NILFS, locking ns_sem is enough to protect s_dirt and the buffer + * of the super block (nilfs->ns_sbp). + * + * In most cases, VFS functions call lock_super() before calling these + * methods. So we must be careful not to bring on deadlocks when using + * lock_super(); see generic_shutdown_super(), write_super(), and so on. + * + * Note that order of lock_kernel() and lock_super() depends on contexts + * of VFS. We should also note that lock_kernel() can be used in its + * protective section and only the outermost one has an effect. + */ +static void nilfs_write_super(struct super_block *sb) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sbi->s_nilfs; + + down_write(&nilfs->ns_sem); + if (!(sb->s_flags & MS_RDONLY)) + nilfs_commit_super(sbi); + sb->s_dirt = 0; + up_write(&nilfs->ns_sem); +} + +static int nilfs_sync_fs(struct super_block *sb, int wait) +{ + int err = 0; + + /* This function is called when super block should be written back */ + if (wait) + err = nilfs_construct_segment(sb); + return err; +} + +int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_checkpoint *raw_cp; + struct buffer_head *bh_cp; + int err; + + down_write(&nilfs->ns_sem); + list_add(&sbi->s_list, &nilfs->ns_supers); + up_write(&nilfs->ns_sem); + + sbi->s_ifile = nilfs_mdt_new( + nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP); + if (!sbi->s_ifile) + return -ENOMEM; + + err = nilfs_palloc_init_blockgroup(sbi->s_ifile, nilfs->ns_inode_size); + if (unlikely(err)) + goto failed; + + err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, + &bh_cp); + if (unlikely(err)) { + if (err == -ENOENT || err == -EINVAL) { + printk(KERN_ERR + "NILFS: Invalid checkpoint " + "(checkpoint number=%llu)\n", + (unsigned long long)cno); + err = -EINVAL; + } + goto failed; + } + err = nilfs_read_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode); + if (unlikely(err)) + goto failed_bh; + atomic_set(&sbi->s_inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); + atomic_set(&sbi->s_blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); + + nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); + return 0; + + failed_bh: + nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); + failed: + nilfs_mdt_destroy(sbi->s_ifile); + sbi->s_ifile = NULL; + + down_write(&nilfs->ns_sem); + list_del_init(&sbi->s_list); + up_write(&nilfs->ns_sem); + + return err; +} + +void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + + nilfs_mdt_clear(sbi->s_ifile); + nilfs_mdt_destroy(sbi->s_ifile); + sbi->s_ifile = NULL; + down_write(&nilfs->ns_sem); + list_del_init(&sbi->s_list); + up_write(&nilfs->ns_sem); +} + +static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + int err = 0; + + down_write(&nilfs->ns_sem); + if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { + nilfs->ns_mount_state |= NILFS_VALID_FS; + err = nilfs_commit_super(sbi); + if (likely(!err)) + printk(KERN_INFO "NILFS: recovery complete.\n"); + } + up_write(&nilfs->ns_sem); + return err; +} + +static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct nilfs_sb_info *sbi = NILFS_SB(sb); + unsigned long long blocks; + unsigned long overhead; + unsigned long nrsvblocks; + sector_t nfreeblocks; + struct the_nilfs *nilfs = sbi->s_nilfs; + int err; + + /* + * Compute all of the segment blocks + * + * The blocks before first segment and after last segment + * are excluded. + */ + blocks = nilfs->ns_blocks_per_segment * nilfs->ns_nsegments + - nilfs->ns_first_data_block; + nrsvblocks = nilfs->ns_nrsvsegs * nilfs->ns_blocks_per_segment; + + /* + * Compute the overhead + * + * When distributing meta data blocks outside semgent structure, + * We must count them as the overhead. + */ + overhead = 0; + + err = nilfs_count_free_blocks(nilfs, &nfreeblocks); + if (unlikely(err)) + return err; + + buf->f_type = NILFS_SUPER_MAGIC; + buf->f_bsize = sb->s_blocksize; + buf->f_blocks = blocks - overhead; + buf->f_bfree = nfreeblocks; + buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? + (buf->f_bfree - nrsvblocks) : 0; + buf->f_files = atomic_read(&sbi->s_inodes_count); + buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ + buf->f_namelen = NILFS_NAME_LEN; + return 0; +} + +static struct super_operations nilfs_sops = { + .alloc_inode = nilfs_alloc_inode, + .destroy_inode = nilfs_destroy_inode, + .dirty_inode = nilfs_dirty_inode, + /* .write_inode = nilfs_write_inode, */ + /* .put_inode = nilfs_put_inode, */ + /* .drop_inode = nilfs_drop_inode, */ + .delete_inode = nilfs_delete_inode, + .put_super = nilfs_put_super, + .write_super = nilfs_write_super, + .sync_fs = nilfs_sync_fs, + /* .write_super_lockfs */ + /* .unlockfs */ + .statfs = nilfs_statfs, + .remount_fs = nilfs_remount, + .clear_inode = nilfs_clear_inode, + /* .umount_begin */ + /* .show_options */ +}; + +static struct inode * +nilfs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) +{ + struct inode *inode; + + if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO && + ino != NILFS_SKETCH_INO) + return ERR_PTR(-ESTALE); + + inode = nilfs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { + iput(inode); + return ERR_PTR(-ESTALE); + } + + return inode; +} + +static struct dentry * +nilfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, + int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + nilfs_nfs_get_inode); +} + +static struct dentry * +nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, + int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + nilfs_nfs_get_inode); +} + +static struct export_operations nilfs_export_ops = { + .fh_to_dentry = nilfs_fh_to_dentry, + .fh_to_parent = nilfs_fh_to_parent, + .get_parent = nilfs_get_parent, +}; + +enum { + Opt_err_cont, Opt_err_panic, Opt_err_ro, + Opt_barrier, Opt_snapshot, Opt_order, + Opt_err, +}; + +static match_table_t tokens = { + {Opt_err_cont, "errors=continue"}, + {Opt_err_panic, "errors=panic"}, + {Opt_err_ro, "errors=remount-ro"}, + {Opt_barrier, "barrier=%s"}, + {Opt_snapshot, "cp=%u"}, + {Opt_order, "order=%s"}, + {Opt_err, NULL} +}; + +static int match_bool(substring_t *s, int *result) +{ + int len = s->to - s->from; + + if (strncmp(s->from, "on", len) == 0) + *result = 1; + else if (strncmp(s->from, "off", len) == 0) + *result = 0; + else + return 1; + return 0; +} + +static int parse_options(char *options, struct super_block *sb) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + + if (!options) + return 1; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_barrier: + if (match_bool(&args[0], &option)) + return 0; + if (option) + nilfs_set_opt(sbi, BARRIER); + else + nilfs_clear_opt(sbi, BARRIER); + break; + case Opt_order: + if (strcmp(args[0].from, "relaxed") == 0) + /* Ordered data semantics */ + nilfs_clear_opt(sbi, STRICT_ORDER); + else if (strcmp(args[0].from, "strict") == 0) + /* Strict in-order semantics */ + nilfs_set_opt(sbi, STRICT_ORDER); + else + return 0; + break; + case Opt_err_panic: + nilfs_write_opt(sbi, ERROR_MODE, ERRORS_PANIC); + break; + case Opt_err_ro: + nilfs_write_opt(sbi, ERROR_MODE, ERRORS_RO); + break; + case Opt_err_cont: + nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT); + break; + case Opt_snapshot: + if (match_int(&args[0], &option) || option <= 0) + return 0; + if (!(sb->s_flags & MS_RDONLY)) + return 0; + sbi->s_snapshot_cno = option; + nilfs_set_opt(sbi, SNAPSHOT); + break; + default: + printk(KERN_ERR + "NILFS: Unrecognized mount option \"%s\"\n", p); + return 0; + } + } + return 1; +} + +static inline void +nilfs_set_default_options(struct nilfs_sb_info *sbi, + struct nilfs_super_block *sbp) +{ + sbi->s_mount_opt = + NILFS_MOUNT_ERRORS_CONT | NILFS_MOUNT_BARRIER; +} + +static int nilfs_setup_super(struct nilfs_sb_info *sbi) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_super_block *sbp = nilfs->ns_sbp; + int max_mnt_count = le16_to_cpu(sbp->s_max_mnt_count); + int mnt_count = le16_to_cpu(sbp->s_mnt_count); + + /* nilfs->sem must be locked by the caller. */ + if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { + printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); + } else if (nilfs->ns_mount_state & NILFS_ERROR_FS) { + printk(KERN_WARNING + "NILFS warning: mounting fs with errors\n"); +#if 0 + } else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) { + printk(KERN_WARNING + "NILFS warning: maximal mount count reached\n"); +#endif + } + if (!max_mnt_count) + sbp->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); + + sbp->s_mnt_count = cpu_to_le16(mnt_count + 1); + sbp->s_state = cpu_to_le16(le16_to_cpu(sbp->s_state) & ~NILFS_VALID_FS); + sbp->s_mtime = cpu_to_le64(get_seconds()); + return nilfs_commit_super(sbi); +} + +struct nilfs_super_block * +nilfs_load_super_block(struct super_block *sb, struct buffer_head **pbh) +{ + int blocksize; + unsigned long offset, sb_index; + + /* + * Adjusting block size + * Blocksize will be enlarged when it is smaller than hardware + * sector size. + * Disk format of superblock does not change. + */ + blocksize = sb_min_blocksize(sb, BLOCK_SIZE); + if (!blocksize) { + printk(KERN_ERR + "NILFS: unable to set blocksize of superblock\n"); + return NULL; + } + sb_index = NILFS_SB_OFFSET_BYTES / blocksize; + offset = NILFS_SB_OFFSET_BYTES % blocksize; + + *pbh = sb_bread(sb, sb_index); + if (!*pbh) { + printk(KERN_ERR "NILFS: unable to read superblock\n"); + return NULL; + } + return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); +} + +struct nilfs_super_block * +nilfs_reload_super_block(struct super_block *sb, struct buffer_head **pbh, + int blocksize) +{ + struct nilfs_super_block *sbp; + unsigned long offset, sb_index; + int hw_blocksize = bdev_hardsect_size(sb->s_bdev); + + if (blocksize < hw_blocksize) { + printk(KERN_ERR + "NILFS: blocksize %d too small for device " + "(sector-size = %d).\n", + blocksize, hw_blocksize); + goto failed_sbh; + } + brelse(*pbh); + sb_set_blocksize(sb, blocksize); + + sb_index = NILFS_SB_OFFSET_BYTES / blocksize; + offset = NILFS_SB_OFFSET_BYTES % blocksize; + + *pbh = sb_bread(sb, sb_index); + if (!*pbh) { + printk(KERN_ERR + "NILFS: cannot read superblock on 2nd try.\n"); + goto failed; + } + + sbp = (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); + if (sbp->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { + printk(KERN_ERR + "NILFS: !? Magic mismatch on 2nd try.\n"); + goto failed_sbh; + } + return sbp; + + failed_sbh: + brelse(*pbh); + + failed: + return NULL; +} + +int nilfs_store_magic_and_option(struct super_block *sb, + struct nilfs_super_block *sbp, + char *data) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + + /* trying to fill super (1st stage) */ + sb->s_magic = le16_to_cpu(sbp->s_magic); + + /* FS independent flags */ +#ifdef NILFS_ATIME_DISABLE + sb->s_flags |= MS_NOATIME; +#endif + + if (sb->s_magic != NILFS_SUPER_MAGIC) { + printk("NILFS: Can't find nilfs on dev %s.\n", sb->s_id); + return -EINVAL; + } + + nilfs_set_default_options(sbi, sbp); + + sbi->s_resuid = le16_to_cpu(sbp->s_def_resuid); + sbi->s_resgid = le16_to_cpu(sbp->s_def_resgid); + sbi->s_interval = le32_to_cpu(sbp->s_c_interval); + sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); + + if (!parse_options(data, sb)) + return -EINVAL; + + return 0; +} + +/** + * nilfs_fill_super() - initialize a super block instance + * @sb: super_block + * @data: mount options + * @silent: silent mode flag + * @nilfs: the_nilfs struct + * + * This function is called exclusively by bd_mount_mutex. + * So, the recovery process is protected from other simultaneous mounts. + */ +static int +nilfs_fill_super(struct super_block *sb, void *data, int silent, + struct the_nilfs *nilfs) +{ + struct nilfs_sb_info *sbi; + struct inode *root; + __u64 cno; + int err; + + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + sb->s_fs_info = sbi; + + get_nilfs(nilfs); + sbi->s_nilfs = nilfs; + sbi->s_super = sb; + + err = init_nilfs(nilfs, sbi, (char *)data); + if (err) + goto failed_sbi; + + spin_lock_init(&sbi->s_inode_lock); + INIT_LIST_HEAD(&sbi->s_dirty_files); + INIT_LIST_HEAD(&sbi->s_list); + + /* + * Following initialization is overlapped because + * nilfs_sb_info structure has been cleared at the beginning. + * But we reserve them to keep our interest and make ready + * for the future change. + */ + get_random_bytes(&sbi->s_next_generation, + sizeof(sbi->s_next_generation)); + spin_lock_init(&sbi->s_next_gen_lock); + + sb->s_op = &nilfs_sops; + sb->s_export_op = &nilfs_export_ops; + sb->s_root = NULL; + + if (!nilfs_loaded(nilfs)) { + err = load_nilfs(nilfs, sbi); + if (err) + goto failed_sbi; + } + cno = nilfs_last_cno(nilfs); + + if (sb->s_flags & MS_RDONLY) { + if (nilfs_test_opt(sbi, SNAPSHOT)) { + if (!nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, + sbi->s_snapshot_cno)) { + printk(KERN_ERR + "NILFS: The specified checkpoint is " + "not a snapshot " + "(checkpoint number=%llu).\n", + (unsigned long long)sbi->s_snapshot_cno); + err = -EINVAL; + goto failed_sbi; + } + cno = sbi->s_snapshot_cno; + } else + /* Read-only mount */ + sbi->s_snapshot_cno = cno; + } + + err = nilfs_attach_checkpoint(sbi, cno); + if (err) { + printk(KERN_ERR "NILFS: error loading a checkpoint" + " (checkpoint number=%llu).\n", (unsigned long long)cno); + goto failed_sbi; + } + + if (!(sb->s_flags & MS_RDONLY)) { + err = nilfs_attach_segment_constructor(sbi, NULL); + if (err) + goto failed_checkpoint; + } + + root = nilfs_iget(sb, NILFS_ROOT_INO); + if (IS_ERR(root)) { + printk(KERN_ERR "NILFS: get root inode failed\n"); + err = PTR_ERR(root); + goto failed_segctor; + } + if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { + iput(root); + printk(KERN_ERR "NILFS: corrupt root inode.\n"); + err = -EINVAL; + goto failed_segctor; + } + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + iput(root); + printk(KERN_ERR "NILFS: get root dentry failed\n"); + err = -ENOMEM; + goto failed_segctor; + } + + if (!(sb->s_flags & MS_RDONLY)) { + down_write(&nilfs->ns_sem); + nilfs_setup_super(sbi); + up_write(&nilfs->ns_sem); + } + + err = nilfs_mark_recovery_complete(sbi); + if (unlikely(err)) { + printk(KERN_ERR "NILFS: recovery failed.\n"); + goto failed_root; + } + + return 0; + + failed_root: + dput(sb->s_root); + sb->s_root = NULL; + + failed_segctor: + nilfs_detach_segment_constructor(sbi); + + failed_checkpoint: + nilfs_detach_checkpoint(sbi); + + failed_sbi: + put_nilfs(nilfs); + sb->s_fs_info = NULL; + kfree(sbi); + return err; +} + +static int nilfs_remount(struct super_block *sb, int *flags, char *data) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_super_block *sbp; + struct the_nilfs *nilfs = sbi->s_nilfs; + unsigned long old_sb_flags; + struct nilfs_mount_options old_opts; + int err; + + old_sb_flags = sb->s_flags; + old_opts.mount_opt = sbi->s_mount_opt; + old_opts.snapshot_cno = sbi->s_snapshot_cno; + + if (!parse_options(data, sb)) { + err = -EINVAL; + goto restore_opts; + } + sb->s_flags = (sb->s_flags & ~MS_POSIXACL); + + if ((*flags & MS_RDONLY) && + sbi->s_snapshot_cno != old_opts.snapshot_cno) { + printk(KERN_WARNING "NILFS (device %s): couldn't " + "remount to a different snapshot. \n", + sb->s_id); + err = -EINVAL; + goto restore_opts; + } + + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + goto out; + if (*flags & MS_RDONLY) { + /* Shutting down the segment constructor */ + nilfs_detach_segment_constructor(sbi); + sb->s_flags |= MS_RDONLY; + + sbi->s_snapshot_cno = nilfs_last_cno(nilfs); + /* nilfs_set_opt(sbi, SNAPSHOT); */ + + /* + * Remounting a valid RW partition RDONLY, so set + * the RDONLY flag and then mark the partition as valid again. + */ + down_write(&nilfs->ns_sem); + sbp = nilfs->ns_sbp; + if (!(sbp->s_state & le16_to_cpu(NILFS_VALID_FS)) && + (nilfs->ns_mount_state & NILFS_VALID_FS)) + sbp->s_state = cpu_to_le16(nilfs->ns_mount_state); + sbp->s_mtime = cpu_to_le64(get_seconds()); + nilfs_commit_super(sbi); + up_write(&nilfs->ns_sem); + } else { + /* + * Mounting a RDONLY partition read-write, so reread and + * store the current valid flag. (It may have been changed + * by fsck since we originally mounted the partition.) + */ + down(&sb->s_bdev->bd_mount_sem); + /* Check existing RW-mount */ + if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) { + printk(KERN_WARNING "NILFS (device %s): couldn't " + "remount because a RW-mount exists.\n", + sb->s_id); + err = -EBUSY; + goto rw_remount_failed; + } + if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) { + printk(KERN_WARNING "NILFS (device %s): couldn't " + "remount because the current RO-mount is not " + "the latest one.\n", + sb->s_id); + err = -EINVAL; + goto rw_remount_failed; + } + sb->s_flags &= ~MS_RDONLY; + nilfs_clear_opt(sbi, SNAPSHOT); + sbi->s_snapshot_cno = 0; + + err = nilfs_attach_segment_constructor(sbi, NULL); + if (err) + goto rw_remount_failed; + + down_write(&nilfs->ns_sem); + nilfs_setup_super(sbi); + up_write(&nilfs->ns_sem); + + up(&sb->s_bdev->bd_mount_sem); + } + out: + return 0; + + rw_remount_failed: + up(&sb->s_bdev->bd_mount_sem); + restore_opts: + sb->s_flags = old_sb_flags; + sbi->s_mount_opt = old_opts.mount_opt; + sbi->s_snapshot_cno = old_opts.snapshot_cno; + return err; +} + +struct nilfs_super_data { + struct block_device *bdev; + __u64 cno; + int flags; +}; + +/** + * nilfs_identify - pre-read mount options needed to identify mount instance + * @data: mount options + * @sd: nilfs_super_data + */ +static int nilfs_identify(char *data, struct nilfs_super_data *sd) +{ + char *p, *options = data; + substring_t args[MAX_OPT_ARGS]; + int option, token; + int ret = 0; + + do { + p = strsep(&options, ","); + if (p != NULL && *p) { + token = match_token(p, tokens, args); + if (token == Opt_snapshot) { + if (!(sd->flags & MS_RDONLY)) + ret++; + else { + ret = match_int(&args[0], &option); + if (!ret) { + if (option > 0) + sd->cno = option; + else + ret++; + } + } + } + if (ret) + printk(KERN_ERR + "NILFS: invalid mount option: %s\n", p); + } + if (!options) + break; + BUG_ON(options == data); + *(options - 1) = ','; + } while (!ret); + return ret; +} + +static int nilfs_set_bdev_super(struct super_block *s, void *data) +{ + struct nilfs_super_data *sd = data; + + s->s_bdev = sd->bdev; + s->s_dev = s->s_bdev->bd_dev; + return 0; +} + +static int nilfs_test_bdev_super(struct super_block *s, void *data) +{ + struct nilfs_super_data *sd = data; + + return s->s_bdev == sd->bdev; +} + +static int nilfs_test_bdev_super2(struct super_block *s, void *data) +{ + struct nilfs_super_data *sd = data; + int ret; + + if (s->s_bdev != sd->bdev) + return 0; + + if (!((s->s_flags | sd->flags) & MS_RDONLY)) + return 1; /* Reuse an old R/W-mode super_block */ + + if (s->s_flags & sd->flags & MS_RDONLY) { + if (down_read_trylock(&s->s_umount)) { + ret = s->s_root && + (sd->cno == NILFS_SB(s)->s_snapshot_cno); + up_read(&s->s_umount); + /* + * This path is locked with sb_lock by sget(). + * So, drop_super() causes deadlock. + */ + return ret; + } + } + return 0; +} + +static int +nilfs_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) +{ + struct nilfs_super_data sd; + struct super_block *s, *s2; + struct the_nilfs *nilfs = NULL; + int err, need_to_close = 1; + + sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type); + if (IS_ERR(sd.bdev)) + return PTR_ERR(sd.bdev); + + /* + * To get mount instance using sget() vfs-routine, NILFS needs + * much more information than normal filesystems to identify mount + * instance. For snapshot mounts, not only a mount type (ro-mount + * or rw-mount) but also a checkpoint number is required. + * The results are passed in sget() using nilfs_super_data. + */ + sd.cno = 0; + sd.flags = flags; + if (nilfs_identify((char *)data, &sd)) { + err = -EINVAL; + goto failed; + } + + /* + * once the super is inserted into the list by sget, s_umount + * will protect the lockfs code from trying to start a snapshot + * while we are mounting + */ + down(&sd.bdev->bd_mount_sem); + if (!sd.cno && + (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) { + err = (err < 0) ? : -EBUSY; + goto failed_unlock; + } + + /* + * Phase-1: search any existent instance and get the_nilfs + */ + s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); + if (IS_ERR(s)) + goto error_s; + + if (!s->s_root) { + err = -ENOMEM; + nilfs = alloc_nilfs(sd.bdev); + if (!nilfs) + goto cancel_new; + } else { + struct nilfs_sb_info *sbi = NILFS_SB(s); + + BUG_ON(!sbi || !sbi->s_nilfs); + /* + * s_umount protects super_block from unmount process; + * It covers pointers of nilfs_sb_info and the_nilfs. + */ + nilfs = sbi->s_nilfs; + get_nilfs(nilfs); + up_write(&s->s_umount); + + /* + * Phase-2: search specified snapshot or R/W mode super_block + */ + if (!sd.cno) + /* trying to get the latest checkpoint. */ + sd.cno = nilfs_last_cno(nilfs); + + s2 = sget(fs_type, nilfs_test_bdev_super2, + nilfs_set_bdev_super, &sd); + deactivate_super(s); + /* + * Although deactivate_super() invokes close_bdev_exclusive() at + * kill_block_super(). Here, s is an existent mount; we need + * one more close_bdev_exclusive() call. + */ + s = s2; + if (IS_ERR(s)) + goto error_s; + } + + if (!s->s_root) { + char b[BDEVNAME_SIZE]; + + s->s_flags = flags; + strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); + sb_set_blocksize(s, block_size(sd.bdev)); + + err = nilfs_fill_super(s, data, flags & MS_VERBOSE, nilfs); + if (err) + goto cancel_new; + + s->s_flags |= MS_ACTIVE; + need_to_close = 0; + } else if (!(s->s_flags & MS_RDONLY)) { + err = -EBUSY; + } + + up(&sd.bdev->bd_mount_sem); + put_nilfs(nilfs); + if (need_to_close) + close_bdev_exclusive(sd.bdev, flags); + simple_set_mnt(mnt, s); + return 0; + + error_s: + up(&sd.bdev->bd_mount_sem); + if (nilfs) + put_nilfs(nilfs); + close_bdev_exclusive(sd.bdev, flags); + return PTR_ERR(s); + + failed_unlock: + up(&sd.bdev->bd_mount_sem); + failed: + close_bdev_exclusive(sd.bdev, flags); + + return err; + + cancel_new: + /* Abandoning the newly allocated superblock */ + up(&sd.bdev->bd_mount_sem); + if (nilfs) + put_nilfs(nilfs); + up_write(&s->s_umount); + deactivate_super(s); + /* + * deactivate_super() invokes close_bdev_exclusive(). + * We must finish all post-cleaning before this call; + * put_nilfs() and unlocking bd_mount_sem need the block device. + */ + return err; +} + +static int nilfs_test_bdev_super3(struct super_block *s, void *data) +{ + struct nilfs_super_data *sd = data; + int ret; + + if (s->s_bdev != sd->bdev) + return 0; + if (down_read_trylock(&s->s_umount)) { + ret = (s->s_flags & MS_RDONLY) && s->s_root && + nilfs_test_opt(NILFS_SB(s), SNAPSHOT); + up_read(&s->s_umount); + if (ret) + return 0; /* ignore snapshot mounts */ + } + return !((sd->flags ^ s->s_flags) & MS_RDONLY); +} + +static int __false_bdev_super(struct super_block *s, void *data) +{ +#if 0 /* XXX: workaround for lock debug. This is not good idea */ + up_write(&s->s_umount); +#endif + return -EFAULT; +} + +/** + * test_exclusive_mount - check whether an exclusive RW/RO mount exists or not. + * fs_type: filesystem type + * bdev: block device + * flag: 0 (check rw-mount) or MS_RDONLY (check ro-mount) + * res: pointer to an integer to store result + * + * This function must be called within a section protected by bd_mount_mutex. + */ +static int test_exclusive_mount(struct file_system_type *fs_type, + struct block_device *bdev, int flags) +{ + struct super_block *s; + struct nilfs_super_data sd = { .flags = flags, .bdev = bdev }; + + s = sget(fs_type, nilfs_test_bdev_super3, __false_bdev_super, &sd); + if (IS_ERR(s)) { + if (PTR_ERR(s) != -EFAULT) + return PTR_ERR(s); + return 0; /* Not found */ + } + up_write(&s->s_umount); + deactivate_super(s); + return 1; /* Found */ +} + +struct file_system_type nilfs_fs_type = { + .owner = THIS_MODULE, + .name = "nilfs2", + .get_sb = nilfs_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_nilfs_fs(void) +{ + int err; + + err = nilfs_init_inode_cache(); + if (err) + goto failed; + + err = nilfs_init_transaction_cache(); + if (err) + goto failed_inode_cache; + + err = nilfs_init_segbuf_cache(); + if (err) + goto failed_transaction_cache; + + err = nilfs_btree_path_cache_init(); + if (err) + goto failed_segbuf_cache; + + err = register_filesystem(&nilfs_fs_type); + if (err) + goto failed_btree_path_cache; + + return 0; + + failed_btree_path_cache: + nilfs_btree_path_cache_destroy(); + + failed_segbuf_cache: + nilfs_destroy_segbuf_cache(); + + failed_transaction_cache: + nilfs_destroy_transaction_cache(); + + failed_inode_cache: + nilfs_destroy_inode_cache(); + + failed: + return err; +} + +static void __exit exit_nilfs_fs(void) +{ + nilfs_destroy_segbuf_cache(); + nilfs_destroy_transaction_cache(); + nilfs_destroy_inode_cache(); + nilfs_btree_path_cache_destroy(); + unregister_filesystem(&nilfs_fs_type); +} + +module_init(init_nilfs_fs) +module_exit(exit_nilfs_fs) -- cgit v1.2.3 From 64b5a32e0b3680a9655b3f2e668a646068e71d33 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:36 -0700 Subject: nilfs2: segment buffer This adds the segment buffer which is used to constuct logs. [akpm@linux-foundation.org: BIO_RW_SYNC got removed] Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/segbuf.c | 461 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/segbuf.h | 203 +++++++++++++++++++++++ 2 files changed, 664 insertions(+) create mode 100644 fs/nilfs2/segbuf.c create mode 100644 fs/nilfs2/segbuf.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c new file mode 100644 index 00000000000..3d3ea8351f6 --- /dev/null +++ b/fs/nilfs2/segbuf.c @@ -0,0 +1,461 @@ +/* + * segbuf.c - NILFS segment buffer + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + * + */ + +#include +#include +#include +#include "page.h" +#include "segbuf.h" +#include "seglist.h" + + +static struct kmem_cache *nilfs_segbuf_cachep; + +static void nilfs_segbuf_init_once(void *obj) +{ + memset(obj, 0, sizeof(struct nilfs_segment_buffer)); +} + +int __init nilfs_init_segbuf_cache(void) +{ + nilfs_segbuf_cachep = + kmem_cache_create("nilfs2_segbuf_cache", + sizeof(struct nilfs_segment_buffer), + 0, SLAB_RECLAIM_ACCOUNT, + nilfs_segbuf_init_once); + + return (nilfs_segbuf_cachep == NULL) ? -ENOMEM : 0; +} + +void nilfs_destroy_segbuf_cache(void) +{ + kmem_cache_destroy(nilfs_segbuf_cachep); +} + +struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb) +{ + struct nilfs_segment_buffer *segbuf; + + segbuf = kmem_cache_alloc(nilfs_segbuf_cachep, GFP_NOFS); + if (unlikely(!segbuf)) + return NULL; + + segbuf->sb_super = sb; + INIT_LIST_HEAD(&segbuf->sb_list); + INIT_LIST_HEAD(&segbuf->sb_segsum_buffers); + INIT_LIST_HEAD(&segbuf->sb_payload_buffers); + segbuf->sb_segent = NULL; + return segbuf; +} + +void nilfs_segbuf_free(struct nilfs_segment_buffer *segbuf) +{ + struct nilfs_segment_entry *ent = segbuf->sb_segent; + + if (ent != NULL && list_empty(&ent->list)) { + /* free isolated segment list head */ + nilfs_free_segment_entry(segbuf->sb_segent); + segbuf->sb_segent = NULL; + } + kmem_cache_free(nilfs_segbuf_cachep, segbuf); +} + +int nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum, + unsigned long offset, struct the_nilfs *nilfs) +{ + struct nilfs_segment_entry *ent; + + segbuf->sb_segnum = segnum; + nilfs_get_segment_range(nilfs, segnum, &segbuf->sb_fseg_start, + &segbuf->sb_fseg_end); + + segbuf->sb_pseg_start = segbuf->sb_fseg_start + offset; + segbuf->sb_rest_blocks = + segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; + + /* Attach a segment list head */ + ent = segbuf->sb_segent; + if (ent == NULL) { + segbuf->sb_segent = nilfs_alloc_segment_entry(segnum); + if (unlikely(!segbuf->sb_segent)) + return -ENOMEM; + } else { + BUG_ON(ent->bh_su || !list_empty(&ent->list)); + ent->segnum = segnum; + } + return 0; +} + +void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf, + __u64 nextnum, struct the_nilfs *nilfs) +{ + segbuf->sb_nextnum = nextnum; + segbuf->sb_sum.next = nilfs_get_segment_start_blocknr(nilfs, nextnum); +} + +int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf) +{ + struct buffer_head *bh; + + bh = sb_getblk(segbuf->sb_super, + segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk); + if (unlikely(!bh)) + return -ENOMEM; + + nilfs_segbuf_add_segsum_buffer(segbuf, bh); + return 0; +} + +int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf, + struct buffer_head **bhp) +{ + struct buffer_head *bh; + + bh = sb_getblk(segbuf->sb_super, + segbuf->sb_pseg_start + segbuf->sb_sum.nblocks); + if (unlikely(!bh)) + return -ENOMEM; + + nilfs_segbuf_add_payload_buffer(segbuf, bh); + *bhp = bh; + return 0; +} + +int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags, + time_t ctime) +{ + int err; + + segbuf->sb_sum.nblocks = segbuf->sb_sum.nsumblk = 0; + err = nilfs_segbuf_extend_segsum(segbuf); + if (unlikely(err)) + return err; + + segbuf->sb_sum.flags = flags; + segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary); + segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0; + segbuf->sb_sum.ctime = ctime; + + segbuf->sb_io_error = 0; + return 0; +} + +/* + * Setup segument summary + */ +void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *segbuf) +{ + struct nilfs_segment_summary *raw_sum; + struct buffer_head *bh_sum; + + bh_sum = list_entry(segbuf->sb_segsum_buffers.next, + struct buffer_head, b_assoc_buffers); + raw_sum = (struct nilfs_segment_summary *)bh_sum->b_data; + + raw_sum->ss_magic = cpu_to_le32(NILFS_SEGSUM_MAGIC); + raw_sum->ss_bytes = cpu_to_le16(sizeof(*raw_sum)); + raw_sum->ss_flags = cpu_to_le16(segbuf->sb_sum.flags); + raw_sum->ss_seq = cpu_to_le64(segbuf->sb_sum.seg_seq); + raw_sum->ss_create = cpu_to_le64(segbuf->sb_sum.ctime); + raw_sum->ss_next = cpu_to_le64(segbuf->sb_sum.next); + raw_sum->ss_nblocks = cpu_to_le32(segbuf->sb_sum.nblocks); + raw_sum->ss_nfinfo = cpu_to_le32(segbuf->sb_sum.nfinfo); + raw_sum->ss_sumbytes = cpu_to_le32(segbuf->sb_sum.sumbytes); + raw_sum->ss_pad = 0; +} + +/* + * CRC calculation routines + */ +void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf, + u32 seed) +{ + struct buffer_head *bh; + struct nilfs_segment_summary *raw_sum; + unsigned long size, bytes = segbuf->sb_sum.sumbytes; + u32 crc; + + bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head, + b_assoc_buffers); + + raw_sum = (struct nilfs_segment_summary *)bh->b_data; + size = min_t(unsigned long, bytes, bh->b_size); + crc = crc32_le(seed, + (unsigned char *)raw_sum + + sizeof(raw_sum->ss_datasum) + sizeof(raw_sum->ss_sumsum), + size - (sizeof(raw_sum->ss_datasum) + + sizeof(raw_sum->ss_sumsum))); + + list_for_each_entry_continue(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { + bytes -= size; + size = min_t(unsigned long, bytes, bh->b_size); + crc = crc32_le(crc, bh->b_data, size); + } + raw_sum->ss_sumsum = cpu_to_le32(crc); +} + +void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, + u32 seed) +{ + struct buffer_head *bh; + struct nilfs_segment_summary *raw_sum; + void *kaddr; + u32 crc; + + bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head, + b_assoc_buffers); + raw_sum = (struct nilfs_segment_summary *)bh->b_data; + crc = crc32_le(seed, + (unsigned char *)raw_sum + sizeof(raw_sum->ss_datasum), + bh->b_size - sizeof(raw_sum->ss_datasum)); + + list_for_each_entry_continue(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { + crc = crc32_le(crc, bh->b_data, bh->b_size); + } + list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { + kaddr = kmap_atomic(bh->b_page, KM_USER0); + crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); + kunmap_atomic(kaddr, KM_USER0); + } + raw_sum->ss_datasum = cpu_to_le32(crc); +} + +void nilfs_release_buffers(struct list_head *list) +{ + struct buffer_head *bh, *n; + + list_for_each_entry_safe(bh, n, list, b_assoc_buffers) { + list_del_init(&bh->b_assoc_buffers); + if (buffer_nilfs_allocated(bh)) { + struct page *clone_page = bh->b_page; + + /* remove clone page */ + brelse(bh); + page_cache_release(clone_page); /* for each bh */ + if (page_count(clone_page) <= 2) { + lock_page(clone_page); + nilfs_free_private_page(clone_page); + } + continue; + } + brelse(bh); + } +} + +/* + * BIO operations + */ +static void nilfs_end_bio_write(struct bio *bio, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct nilfs_write_info *wi = bio->bi_private; + + if (err == -EOPNOTSUPP) { + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + bio_put(bio); + /* to be detected by submit_seg_bio() */ + } + + if (!uptodate) + atomic_inc(&wi->err); + + bio_put(bio); + complete(&wi->bio_event); +} + +static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) +{ + struct bio *bio = wi->bio; + int err; + + if (wi->nbio > 0 && bdi_write_congested(wi->bdi)) { + wait_for_completion(&wi->bio_event); + wi->nbio--; + if (unlikely(atomic_read(&wi->err))) { + bio_put(bio); + err = -EIO; + goto failed; + } + } + + bio->bi_end_io = nilfs_end_bio_write; + bio->bi_private = wi; + bio_get(bio); + submit_bio(mode, bio); + if (bio_flagged(bio, BIO_EOPNOTSUPP)) { + bio_put(bio); + err = -EOPNOTSUPP; + goto failed; + } + wi->nbio++; + bio_put(bio); + + wi->bio = NULL; + wi->rest_blocks -= wi->end - wi->start; + wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); + wi->start = wi->end; + return 0; + + failed: + wi->bio = NULL; + return err; +} + +/** + * nilfs_alloc_seg_bio - allocate a bio for writing segment. + * @sb: super block + * @start: beginning disk block number of this BIO. + * @nr_vecs: request size of page vector. + * + * alloc_seg_bio() allocates a new BIO structure and initialize it. + * + * Return Value: On success, pointer to the struct bio is returned. + * On error, NULL is returned. + */ +static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start, + int nr_vecs) +{ + struct bio *bio; + + bio = bio_alloc(GFP_NOWAIT, nr_vecs); + if (bio == NULL) { + while (!bio && (nr_vecs >>= 1)) + bio = bio_alloc(GFP_NOWAIT, nr_vecs); + } + if (likely(bio)) { + bio->bi_bdev = sb->s_bdev; + bio->bi_sector = (sector_t)start << (sb->s_blocksize_bits - 9); + } + return bio; +} + +void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, + struct nilfs_write_info *wi) +{ + wi->bio = NULL; + wi->rest_blocks = segbuf->sb_sum.nblocks; + wi->max_pages = bio_get_nr_vecs(wi->sb->s_bdev); + wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); + wi->start = wi->end = 0; + wi->nbio = 0; + wi->blocknr = segbuf->sb_pseg_start; + + atomic_set(&wi->err, 0); + init_completion(&wi->bio_event); +} + +static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh, + int mode) +{ + int len, err; + + BUG_ON(wi->nr_vecs <= 0); + repeat: + if (!wi->bio) { + wi->bio = nilfs_alloc_seg_bio(wi->sb, wi->blocknr + wi->end, + wi->nr_vecs); + if (unlikely(!wi->bio)) + return -ENOMEM; + } + + len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh)); + if (len == bh->b_size) { + wi->end++; + return 0; + } + /* bio is FULL */ + err = nilfs_submit_seg_bio(wi, mode); + /* never submit current bh */ + if (likely(!err)) + goto repeat; + return err; +} + +int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, + struct nilfs_write_info *wi) +{ + struct buffer_head *bh; + int res, rw = WRITE; + + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { + res = nilfs_submit_bh(wi, bh, rw); + if (unlikely(res)) + goto failed_bio; + } + + list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { + res = nilfs_submit_bh(wi, bh, rw); + if (unlikely(res)) + goto failed_bio; + } + + if (wi->bio) { + /* + * Last BIO is always sent through the following + * submission. + */ + rw |= (1 << BIO_RW_SYNCIO); + res = nilfs_submit_seg_bio(wi, rw); + if (unlikely(res)) + goto failed_bio; + } + + res = 0; + out: + return res; + + failed_bio: + atomic_inc(&wi->err); + goto out; +} + +/** + * nilfs_segbuf_wait - wait for completion of requested BIOs + * @wi: nilfs_write_info + * + * Return Value: On Success, 0 is returned. On Error, one of the following + * negative error code is returned. + * + * %-EIO - I/O error + */ +int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf, + struct nilfs_write_info *wi) +{ + int err = 0; + + if (!wi->nbio) + return 0; + + do { + wait_for_completion(&wi->bio_event); + } while (--wi->nbio > 0); + + if (unlikely(atomic_read(&wi->err) > 0)) { + printk(KERN_ERR "NILFS: IO error writing segment\n"); + err = -EIO; + segbuf->sb_io_error = 1; + } + return err; +} diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h new file mode 100644 index 00000000000..25f2a5faa48 --- /dev/null +++ b/fs/nilfs2/segbuf.h @@ -0,0 +1,203 @@ +/* + * segbuf.h - NILFS Segment buffer prototypes and definitions + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + * + */ +#ifndef _NILFS_SEGBUF_H +#define _NILFS_SEGBUF_H + +#include +#include +#include +#include +#include + +/** + * struct nilfs_segsum_info - On-memory segment summary + * @flags: Flags + * @nfinfo: Number of file information structures + * @nblocks: Number of blocks included in the partial segment + * @nsumblk: Number of summary blocks + * @sumbytes: Byte count of segment summary + * @nfileblk: Total number of file blocks + * @seg_seq: Segment sequence number + * @ctime: Creation time + * @next: Block number of the next full segment + */ +struct nilfs_segsum_info { + unsigned int flags; + unsigned long nfinfo; + unsigned long nblocks; + unsigned long nsumblk; + unsigned long sumbytes; + unsigned long nfileblk; + u64 seg_seq; + time_t ctime; + sector_t next; +}; + +/* macro for the flags */ +#define NILFS_SEG_HAS_SR(sum) ((sum)->flags & NILFS_SS_SR) +#define NILFS_SEG_LOGBGN(sum) ((sum)->flags & NILFS_SS_LOGBGN) +#define NILFS_SEG_LOGEND(sum) ((sum)->flags & NILFS_SS_LOGEND) +#define NILFS_SEG_DSYNC(sum) ((sum)->flags & NILFS_SS_SYNDT) +#define NILFS_SEG_SIMPLEX(sum) \ + (((sum)->flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) == \ + (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) + +#define NILFS_SEG_EMPTY(sum) ((sum)->nblocks == (sum)->nsumblk) + +/** + * struct nilfs_segment_buffer - Segment buffer + * @sb_super: back pointer to a superblock struct + * @sb_list: List head to chain this structure + * @sb_segent: Pointer for attaching a segment entry + * @sb_sum: On-memory segment summary + * @sb_segnum: Index number of the full segment + * @sb_nextnum: Index number of the next full segment + * @sb_fseg_start: Start block number of the full segment + * @sb_fseg_end: End block number of the full segment + * @sb_pseg_start: Disk block number of partial segment + * @sb_rest_blocks: Number of residual blocks in the current segment + * @sb_segsum_buffers: List of buffers for segment summaries + * @sb_payload_buffers: List of buffers for segment payload + * @sb_io_error: I/O error status + */ +struct nilfs_segment_buffer { + struct super_block *sb_super; + struct list_head sb_list; + struct nilfs_segment_entry *sb_segent; + + /* Segment information */ + struct nilfs_segsum_info sb_sum; + __u64 sb_segnum; + __u64 sb_nextnum; + sector_t sb_fseg_start, sb_fseg_end; + sector_t sb_pseg_start; + unsigned sb_rest_blocks; + + /* Buffers */ + struct list_head sb_segsum_buffers; + struct list_head sb_payload_buffers; /* including super root */ + + /* io status */ + int sb_io_error; +}; + +#define NILFS_LIST_SEGBUF(head) \ + list_entry((head), struct nilfs_segment_buffer, sb_list) +#define NILFS_NEXT_SEGBUF(segbuf) NILFS_LIST_SEGBUF((segbuf)->sb_list.next) +#define NILFS_PREV_SEGBUF(segbuf) NILFS_LIST_SEGBUF((segbuf)->sb_list.prev) +#define NILFS_LAST_SEGBUF(head) NILFS_LIST_SEGBUF((head)->prev) +#define NILFS_FIRST_SEGBUF(head) NILFS_LIST_SEGBUF((head)->next) +#define NILFS_SEGBUF_IS_LAST(segbuf, head) ((segbuf)->sb_list.next == (head)) + +#define nilfs_for_each_segbuf_before(s, t, h) \ + for ((s) = NILFS_FIRST_SEGBUF(h); (s) != (t); \ + (s) = NILFS_NEXT_SEGBUF(s)) + +#define NILFS_SEGBUF_FIRST_BH(head) \ + (list_entry((head)->next, struct buffer_head, b_assoc_buffers)) +#define NILFS_SEGBUF_NEXT_BH(bh) \ + (list_entry((bh)->b_assoc_buffers.next, struct buffer_head, \ + b_assoc_buffers)) +#define NILFS_SEGBUF_BH_IS_LAST(bh, head) ((bh)->b_assoc_buffers.next == head) + + +int __init nilfs_init_segbuf_cache(void); +void nilfs_destroy_segbuf_cache(void); +struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *); +void nilfs_segbuf_free(struct nilfs_segment_buffer *); +int nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, + struct the_nilfs *); +void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, + struct the_nilfs *); +int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t); +int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *); +int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *, + struct buffer_head **); +void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *); +void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *, u32); +void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *, u32); + +static inline void +nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf, + struct buffer_head *bh) +{ + list_add_tail(&bh->b_assoc_buffers, &segbuf->sb_segsum_buffers); + segbuf->sb_sum.nblocks++; + segbuf->sb_sum.nsumblk++; +} + +static inline void +nilfs_segbuf_add_payload_buffer(struct nilfs_segment_buffer *segbuf, + struct buffer_head *bh) +{ + list_add_tail(&bh->b_assoc_buffers, &segbuf->sb_payload_buffers); + segbuf->sb_sum.nblocks++; +} + +static inline void +nilfs_segbuf_add_file_buffer(struct nilfs_segment_buffer *segbuf, + struct buffer_head *bh) +{ + get_bh(bh); + nilfs_segbuf_add_payload_buffer(segbuf, bh); + segbuf->sb_sum.nfileblk++; +} + +void nilfs_release_buffers(struct list_head *); + +static inline void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf) +{ + nilfs_release_buffers(&segbuf->sb_segsum_buffers); + nilfs_release_buffers(&segbuf->sb_payload_buffers); +} + +struct nilfs_write_info { + struct bio *bio; + int start, end; /* The region to be submitted */ + int rest_blocks; + int max_pages; + int nr_vecs; + sector_t blocknr; + + int nbio; + atomic_t err; + struct completion bio_event; + /* completion event of segment write */ + + /* + * The following fields must be set explicitly + */ + struct super_block *sb; + struct backing_dev_info *bdi; /* backing dev info */ + struct buffer_head *bh_sr; +}; + + +void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *, + struct nilfs_write_info *); +int nilfs_segbuf_write(struct nilfs_segment_buffer *, + struct nilfs_write_info *); +int nilfs_segbuf_wait(struct nilfs_segment_buffer *, + struct nilfs_write_info *); + +#endif /* _NILFS_SEGBUF_H */ -- cgit v1.2.3 From 9ff05123e3bfbb1d2b68ba1d9bf1f7d1dffc1453 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:37 -0700 Subject: nilfs2: segment constructor This adds the segment constructor (also called log writer). The segment constructor collects dirty buffers for every dirty inode, makes summaries of the buffers, assigns disk block addresses to the buffers, and then submits BIOs for the buffers. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/file.c | 62 +- fs/nilfs2/seglist.h | 85 ++ fs/nilfs2/segment.c | 3187 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nilfs2/segment.h | 246 ++++ 4 files changed, 3577 insertions(+), 3 deletions(-) create mode 100644 fs/nilfs2/seglist.h create mode 100644 fs/nilfs2/segment.c create mode 100644 fs/nilfs2/segment.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 7ddd42e24f7..8031086db8d 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -73,10 +73,66 @@ nilfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return ret; } -static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { - if (!(vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) - return -EPERM; + struct page *page = vmf->page; + struct inode *inode = vma->vm_file->f_dentry->d_inode; + struct nilfs_transaction_info ti; + int ret; + + if (unlikely(nilfs_near_disk_full(NILFS_SB(inode->i_sb)->s_nilfs))) + return VM_FAULT_SIGBUS; /* -ENOSPC */ + + lock_page(page); + if (page->mapping != inode->i_mapping || + page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { + unlock_page(page); + return VM_FAULT_NOPAGE; /* make the VM retry the fault */ + } + + /* + * check to see if the page is mapped already (no holes) + */ + if (PageMappedToDisk(page)) { + unlock_page(page); + goto mapped; + } + if (page_has_buffers(page)) { + struct buffer_head *bh, *head; + int fully_mapped = 1; + + bh = head = page_buffers(page); + do { + if (!buffer_mapped(bh)) { + fully_mapped = 0; + break; + } + } while (bh = bh->b_this_page, bh != head); + + if (fully_mapped) { + SetPageMappedToDisk(page); + unlock_page(page); + goto mapped; + } + } + unlock_page(page); + + /* + * fill hole blocks + */ + ret = nilfs_transaction_begin(inode->i_sb, &ti, 1); + /* never returns -ENOMEM, but may return -ENOSPC */ + if (unlikely(ret)) + return VM_FAULT_SIGBUS; + + ret = block_page_mkwrite(vma, vmf, nilfs_get_block); + if (unlikely(ret)) { + nilfs_transaction_abort(inode->i_sb); + return ret; + } + nilfs_transaction_commit(inode->i_sb); + + mapped: SetPageChecked(page); wait_on_page_writeback(page); return 0; diff --git a/fs/nilfs2/seglist.h b/fs/nilfs2/seglist.h new file mode 100644 index 00000000000..d39df9144e9 --- /dev/null +++ b/fs/nilfs2/seglist.h @@ -0,0 +1,85 @@ +/* + * seglist.h - expediential structure and routines to handle list of segments + * (would be removed in a future release) + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + * + */ +#ifndef _NILFS_SEGLIST_H +#define _NILFS_SEGLIST_H + +#include +#include +#include +#include "sufile.h" + +struct nilfs_segment_entry { + __u64 segnum; + +#define NILFS_SLH_FREED 0x0001 /* The segment was freed provisonally. + It must be cancelled if + construction aborted */ + + unsigned flags; + struct list_head list; + struct buffer_head *bh_su; + struct nilfs_segment_usage *raw_su; +}; + + +void nilfs_dispose_segment_list(struct list_head *); + +static inline struct nilfs_segment_entry * +nilfs_alloc_segment_entry(__u64 segnum) +{ + struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS); + + if (likely(ent)) { + ent->segnum = segnum; + ent->flags = 0; + ent->bh_su = NULL; + ent->raw_su = NULL; + INIT_LIST_HEAD(&ent->list); + } + return ent; +} + +static inline int nilfs_open_segment_entry(struct nilfs_segment_entry *ent, + struct inode *sufile) +{ + return nilfs_sufile_get_segment_usage(sufile, ent->segnum, + &ent->raw_su, &ent->bh_su); +} + +static inline void nilfs_close_segment_entry(struct nilfs_segment_entry *ent, + struct inode *sufile) +{ + if (!ent->bh_su) + return; + nilfs_sufile_put_segment_usage(sufile, ent->segnum, ent->bh_su); + ent->bh_su = NULL; + ent->raw_su = NULL; +} + +static inline void nilfs_free_segment_entry(struct nilfs_segment_entry *ent) +{ + kfree(ent); +} + +#endif /* _NILFS_SEGLIST_H */ diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c new file mode 100644 index 00000000000..2c4c088059f --- /dev/null +++ b/fs/nilfs2/segment.c @@ -0,0 +1,3187 @@ +/* + * segment.c - NILFS segment constructor. + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nilfs.h" +#include "btnode.h" +#include "page.h" +#include "segment.h" +#include "sufile.h" +#include "cpfile.h" +#include "ifile.h" +#include "seglist.h" +#include "segbuf.h" + + +/* + * Segment constructor + */ +#define SC_N_INODEVEC 16 /* Size of locally allocated inode vector */ + +#define SC_MAX_SEGDELTA 64 /* Upper limit of the number of segments + appended in collection retry loop */ + +/* Construction mode */ +enum { + SC_LSEG_SR = 1, /* Make a logical segment having a super root */ + SC_LSEG_DSYNC, /* Flush data blocks of a given file and make + a logical segment without a super root */ + SC_FLUSH_FILE, /* Flush data files, leads to segment writes without + creating a checkpoint */ + SC_FLUSH_DAT, /* Flush DAT file. This also creates segments without + a checkpoint */ +}; + +/* Stage numbers of dirty block collection */ +enum { + NILFS_ST_INIT = 0, + NILFS_ST_GC, /* Collecting dirty blocks for GC */ + NILFS_ST_FILE, + NILFS_ST_SKETCH, + NILFS_ST_IFILE, + NILFS_ST_CPFILE, + NILFS_ST_SUFILE, + NILFS_ST_DAT, + NILFS_ST_SR, /* Super root */ + NILFS_ST_DSYNC, /* Data sync blocks */ + NILFS_ST_DONE, +}; + +/* State flags of collection */ +#define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ +#define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ +#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED) + +/* Operations depending on the construction mode and file type */ +struct nilfs_sc_operations { + int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *, + struct inode *); + int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *, + struct inode *); + int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *, + struct inode *); + void (*write_data_binfo)(struct nilfs_sc_info *, + struct nilfs_segsum_pointer *, + union nilfs_binfo *); + void (*write_node_binfo)(struct nilfs_sc_info *, + struct nilfs_segsum_pointer *, + union nilfs_binfo *); +}; + +/* + * Other definitions + */ +static void nilfs_segctor_start_timer(struct nilfs_sc_info *); +static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); +static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); +static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *, + int); + +#define nilfs_cnt32_gt(a, b) \ + (typecheck(__u32, a) && typecheck(__u32, b) && \ + ((__s32)(b) - (__s32)(a) < 0)) +#define nilfs_cnt32_ge(a, b) \ + (typecheck(__u32, a) && typecheck(__u32, b) && \ + ((__s32)(a) - (__s32)(b) >= 0)) +#define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a) +#define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a) + +/* + * Transaction + */ +static struct kmem_cache *nilfs_transaction_cachep; + +/** + * nilfs_init_transaction_cache - create a cache for nilfs_transaction_info + * + * nilfs_init_transaction_cache() creates a slab cache for the struct + * nilfs_transaction_info. + * + * Return Value: On success, it returns 0. On error, one of the following + * negative error code is returned. + * + * %-ENOMEM - Insufficient memory available. + */ +int nilfs_init_transaction_cache(void) +{ + nilfs_transaction_cachep = + kmem_cache_create("nilfs2_transaction_cache", + sizeof(struct nilfs_transaction_info), + 0, SLAB_RECLAIM_ACCOUNT, NULL); + return (nilfs_transaction_cachep == NULL) ? -ENOMEM : 0; +} + +/** + * nilfs_detroy_transaction_cache - destroy the cache for transaction info + * + * nilfs_destroy_transaction_cache() frees the slab cache for the struct + * nilfs_transaction_info. + */ +void nilfs_destroy_transaction_cache(void) +{ + kmem_cache_destroy(nilfs_transaction_cachep); +} + +static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) +{ + struct nilfs_transaction_info *cur_ti = current->journal_info; + void *save = NULL; + + if (cur_ti) { + if (cur_ti->ti_magic == NILFS_TI_MAGIC) + return ++cur_ti->ti_count; + else { + /* + * If journal_info field is occupied by other FS, + * we save it and restore on nilfs_transaction_end(). + * But this should never happen. + */ + printk(KERN_WARNING + "NILFS warning: journal info from a different " + "FS\n"); + save = current->journal_info; + } + } + if (!ti) { + ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS); + if (!ti) + return -ENOMEM; + ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC; + } else { + ti->ti_flags = 0; + } + ti->ti_count = 0; + ti->ti_save = save; + ti->ti_magic = NILFS_TI_MAGIC; + current->journal_info = ti; + return 0; +} + +/** + * nilfs_transaction_begin - start indivisible file operations. + * @sb: super block + * @ti: nilfs_transaction_info + * @vacancy_check: flags for vacancy rate checks + * + * nilfs_transaction_begin() acquires a reader/writer semaphore, called + * the segment semaphore, to make a segment construction and write tasks + * exclusive. The function is used with nilfs_transaction_end() in pairs. + * The region enclosed by these two functions can be nested. To avoid a + * deadlock, the semaphore is only acquired or released in the outermost call. + * + * This function allocates a nilfs_transaction_info struct to keep context + * information on it. It is initialized and hooked onto the current task in + * the outermost call. If a pre-allocated struct is given to @ti, it is used + * instead; othewise a new struct is assigned from a slab. + * + * When @vacancy_check flag is set, this function will check the amount of + * free space, and will wait for the GC to reclaim disk space if low capacity. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error code is returned. + * + * %-ENOMEM - Insufficient memory available. + * + * %-ERESTARTSYS - Interrupted + * + * %-ENOSPC - No space left on device + */ +int nilfs_transaction_begin(struct super_block *sb, + struct nilfs_transaction_info *ti, + int vacancy_check) +{ + struct nilfs_sb_info *sbi; + struct the_nilfs *nilfs; + int ret = nilfs_prepare_segment_lock(ti); + + if (unlikely(ret < 0)) + return ret; + if (ret > 0) + return 0; + + sbi = NILFS_SB(sb); + nilfs = sbi->s_nilfs; + down_read(&nilfs->ns_segctor_sem); + if (vacancy_check && nilfs_near_disk_full(nilfs)) { + up_read(&nilfs->ns_segctor_sem); + ret = -ENOSPC; + goto failed; + } + return 0; + + failed: + ti = current->journal_info; + current->journal_info = ti->ti_save; + if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) + kmem_cache_free(nilfs_transaction_cachep, ti); + return ret; +} + +/** + * nilfs_transaction_end - end indivisible file operations. + * @sb: super block + * @commit: commit flag (0 for no change) + * + * nilfs_transaction_end() releases the read semaphore which is + * acquired by nilfs_transaction_begin(). Its releasing is only done + * in outermost call of this function. If the nilfs_transaction_info + * was allocated dynamically, it is given back to a slab cache. + */ +int nilfs_transaction_end(struct super_block *sb, int commit) +{ + struct nilfs_transaction_info *ti = current->journal_info; + struct nilfs_sb_info *sbi; + struct nilfs_sc_info *sci; + int err = 0; + + BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); + + if (commit) + ti->ti_flags |= NILFS_TI_COMMIT; + if (ti->ti_count > 0) { + ti->ti_count--; + return 0; + } + sbi = NILFS_SB(sb); + sci = NILFS_SC(sbi); + if (sci != NULL) { + if (ti->ti_flags & NILFS_TI_COMMIT) + nilfs_segctor_start_timer(sci); + if (atomic_read(&sbi->s_nilfs->ns_ndirtyblks) > + sci->sc_watermark) + nilfs_segctor_do_flush(sci, 0); + } + up_read(&sbi->s_nilfs->ns_segctor_sem); + current->journal_info = ti->ti_save; + + if (ti->ti_flags & NILFS_TI_SYNC) + err = nilfs_construct_segment(sb); + if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) + kmem_cache_free(nilfs_transaction_cachep, ti); + return err; +} + +void nilfs_relax_pressure_in_lock(struct super_block *sb) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct the_nilfs *nilfs = sbi->s_nilfs; + + if (!sci || !sci->sc_flush_request) + return; + + set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); + up_read(&nilfs->ns_segctor_sem); + + down_write(&nilfs->ns_segctor_sem); + if (sci->sc_flush_request && + test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) { + struct nilfs_transaction_info *ti = current->journal_info; + + ti->ti_flags |= NILFS_TI_WRITER; + nilfs_segctor_do_immediate_flush(sci); + ti->ti_flags &= ~NILFS_TI_WRITER; + } + downgrade_write(&nilfs->ns_segctor_sem); +} + +static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, + struct nilfs_transaction_info *ti, + int gcflag) +{ + struct nilfs_transaction_info *cur_ti = current->journal_info; + + BUG_ON(cur_ti); + BUG_ON(!ti); + ti->ti_flags = NILFS_TI_WRITER; + ti->ti_count = 0; + ti->ti_save = cur_ti; + ti->ti_magic = NILFS_TI_MAGIC; + INIT_LIST_HEAD(&ti->ti_garbage); + current->journal_info = ti; + + for (;;) { + down_write(&sbi->s_nilfs->ns_segctor_sem); + if (!test_bit(NILFS_SC_PRIOR_FLUSH, &NILFS_SC(sbi)->sc_flags)) + break; + + nilfs_segctor_do_immediate_flush(NILFS_SC(sbi)); + + up_write(&sbi->s_nilfs->ns_segctor_sem); + yield(); + } + if (gcflag) + ti->ti_flags |= NILFS_TI_GC; +} + +static void nilfs_transaction_unlock(struct nilfs_sb_info *sbi) +{ + struct nilfs_transaction_info *ti = current->journal_info; + + BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); + BUG_ON(ti->ti_count > 0); + + up_write(&sbi->s_nilfs->ns_segctor_sem); + current->journal_info = ti->ti_save; + if (!list_empty(&ti->ti_garbage)) + nilfs_dispose_list(sbi, &ti->ti_garbage, 0); +} + +static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, + struct nilfs_segsum_pointer *ssp, + unsigned bytes) +{ + struct nilfs_segment_buffer *segbuf = sci->sc_curseg; + unsigned blocksize = sci->sc_super->s_blocksize; + void *p; + + if (unlikely(ssp->offset + bytes > blocksize)) { + ssp->offset = 0; + BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh, + &segbuf->sb_segsum_buffers)); + ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh); + } + p = ssp->bh->b_data + ssp->offset; + ssp->offset += bytes; + return p; +} + +/** + * nilfs_segctor_reset_segment_buffer - reset the current segment buffer + * @sci: nilfs_sc_info + */ +static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf = sci->sc_curseg; + struct buffer_head *sumbh; + unsigned sumbytes; + unsigned flags = 0; + int err; + + if (nilfs_doing_gc()) + flags = NILFS_SS_GC; + err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime); + if (unlikely(err)) + return err; + + sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); + sumbytes = segbuf->sb_sum.sumbytes; + sci->sc_finfo_ptr.bh = sumbh; sci->sc_finfo_ptr.offset = sumbytes; + sci->sc_binfo_ptr.bh = sumbh; sci->sc_binfo_ptr.offset = sumbytes; + sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; + return 0; +} + +static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) +{ + sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; + if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs)) + return -E2BIG; /* The current segment is filled up + (internal code) */ + sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); + return nilfs_segctor_reset_segment_buffer(sci); +} + +static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf = sci->sc_curseg; + int err; + + if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) { + err = nilfs_segctor_feed_segment(sci); + if (err) + return err; + segbuf = sci->sc_curseg; + } + err = nilfs_segbuf_extend_payload(segbuf, &sci->sc_super_root); + if (likely(!err)) + segbuf->sb_sum.flags |= NILFS_SS_SR; + return err; +} + +/* + * Functions for making segment summary and payloads + */ +static int nilfs_segctor_segsum_block_required( + struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp, + unsigned binfo_size) +{ + unsigned blocksize = sci->sc_super->s_blocksize; + /* Size of finfo and binfo is enough small against blocksize */ + + return ssp->offset + binfo_size + + (!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) > + blocksize; +} + +static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci, + struct inode *inode) +{ + sci->sc_curseg->sb_sum.nfinfo++; + sci->sc_binfo_ptr = sci->sc_finfo_ptr; + nilfs_segctor_map_segsum_entry( + sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); + /* skip finfo */ +} + +static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, + struct inode *inode) +{ + struct nilfs_finfo *finfo; + struct nilfs_inode_info *ii; + struct nilfs_segment_buffer *segbuf; + + if (sci->sc_blk_cnt == 0) + return; + + ii = NILFS_I(inode); + finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr, + sizeof(*finfo)); + finfo->fi_ino = cpu_to_le64(inode->i_ino); + finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt); + finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt); + finfo->fi_cno = cpu_to_le64(ii->i_cno); + + segbuf = sci->sc_curseg; + segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset + + sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1); + sci->sc_finfo_ptr = sci->sc_binfo_ptr; + sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; +} + +static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, + struct buffer_head *bh, + struct inode *inode, + unsigned binfo_size) +{ + struct nilfs_segment_buffer *segbuf; + int required, err = 0; + + retry: + segbuf = sci->sc_curseg; + required = nilfs_segctor_segsum_block_required( + sci, &sci->sc_binfo_ptr, binfo_size); + if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) { + nilfs_segctor_end_finfo(sci, inode); + err = nilfs_segctor_feed_segment(sci); + if (err) + return err; + goto retry; + } + if (unlikely(required)) { + err = nilfs_segbuf_extend_segsum(segbuf); + if (unlikely(err)) + goto failed; + } + if (sci->sc_blk_cnt == 0) + nilfs_segctor_begin_finfo(sci, inode); + + nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size); + /* Substitution to vblocknr is delayed until update_blocknr() */ + nilfs_segbuf_add_file_buffer(segbuf, bh); + sci->sc_blk_cnt++; + failed: + return err; +} + +static int nilfs_handle_bmap_error(int err, const char *fname, + struct inode *inode, struct super_block *sb) +{ + if (err == -EINVAL) { + nilfs_error(sb, fname, "broken bmap (inode=%lu)\n", + inode->i_ino); + err = -EIO; + } + return err; +} + +/* + * Callback functions that enumerate, mark, and collect dirty blocks + */ +static int nilfs_collect_file_data(struct nilfs_sc_info *sci, + struct buffer_head *bh, struct inode *inode) +{ + int err; + + /* BUG_ON(!buffer_dirty(bh)); */ + /* excluded by scan_dirty_data_buffers() */ + err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); + if (unlikely(err < 0)) + return nilfs_handle_bmap_error(err, __func__, inode, + sci->sc_super); + + err = nilfs_segctor_add_file_block(sci, bh, inode, + sizeof(struct nilfs_binfo_v)); + if (!err) + sci->sc_datablk_cnt++; + return err; +} + +static int nilfs_collect_file_node(struct nilfs_sc_info *sci, + struct buffer_head *bh, + struct inode *inode) +{ + int err; + + /* BUG_ON(!buffer_dirty(bh)); */ + /* excluded by scan_dirty_node_buffers() */ + err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); + if (unlikely(err < 0)) + return nilfs_handle_bmap_error(err, __func__, inode, + sci->sc_super); + return 0; +} + +static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, + struct buffer_head *bh, + struct inode *inode) +{ + BUG_ON(!buffer_dirty(bh)); + return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); +} + +static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci, + struct nilfs_segsum_pointer *ssp, + union nilfs_binfo *binfo) +{ + struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry( + sci, ssp, sizeof(*binfo_v)); + *binfo_v = binfo->bi_v; +} + +static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci, + struct nilfs_segsum_pointer *ssp, + union nilfs_binfo *binfo) +{ + __le64 *vblocknr = nilfs_segctor_map_segsum_entry( + sci, ssp, sizeof(*vblocknr)); + *vblocknr = binfo->bi_v.bi_vblocknr; +} + +struct nilfs_sc_operations nilfs_sc_file_ops = { + .collect_data = nilfs_collect_file_data, + .collect_node = nilfs_collect_file_node, + .collect_bmap = nilfs_collect_file_bmap, + .write_data_binfo = nilfs_write_file_data_binfo, + .write_node_binfo = nilfs_write_file_node_binfo, +}; + +static int nilfs_collect_dat_data(struct nilfs_sc_info *sci, + struct buffer_head *bh, struct inode *inode) +{ + int err; + + err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); + if (unlikely(err < 0)) + return nilfs_handle_bmap_error(err, __func__, inode, + sci->sc_super); + + err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); + if (!err) + sci->sc_datablk_cnt++; + return err; +} + +static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci, + struct buffer_head *bh, struct inode *inode) +{ + BUG_ON(!buffer_dirty(bh)); + return nilfs_segctor_add_file_block(sci, bh, inode, + sizeof(struct nilfs_binfo_dat)); +} + +static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci, + struct nilfs_segsum_pointer *ssp, + union nilfs_binfo *binfo) +{ + __le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp, + sizeof(*blkoff)); + *blkoff = binfo->bi_dat.bi_blkoff; +} + +static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci, + struct nilfs_segsum_pointer *ssp, + union nilfs_binfo *binfo) +{ + struct nilfs_binfo_dat *binfo_dat = + nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat)); + *binfo_dat = binfo->bi_dat; +} + +struct nilfs_sc_operations nilfs_sc_dat_ops = { + .collect_data = nilfs_collect_dat_data, + .collect_node = nilfs_collect_file_node, + .collect_bmap = nilfs_collect_dat_bmap, + .write_data_binfo = nilfs_write_dat_data_binfo, + .write_node_binfo = nilfs_write_dat_node_binfo, +}; + +struct nilfs_sc_operations nilfs_sc_dsync_ops = { + .collect_data = nilfs_collect_file_data, + .collect_node = NULL, + .collect_bmap = NULL, + .write_data_binfo = nilfs_write_file_data_binfo, + .write_node_binfo = NULL, +}; + +static int nilfs_lookup_dirty_data_buffers(struct inode *inode, + struct list_head *listp, + struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf = sci->sc_curseg; + struct address_space *mapping = inode->i_mapping; + struct pagevec pvec; + unsigned i, ndirties = 0, nlimit; + pgoff_t index = 0; + int err = 0; + + nlimit = sci->sc_segbuf_nblocks - + (sci->sc_nblk_this_inc + segbuf->sb_sum.nblocks); + pagevec_init(&pvec, 0); + repeat: + if (!pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, + PAGEVEC_SIZE)) + return 0; + + for (i = 0; i < pagevec_count(&pvec); i++) { + struct buffer_head *bh, *head; + struct page *page = pvec.pages[i]; + + if (mapping->host) { + lock_page(page); + if (!page_has_buffers(page)) + create_empty_buffers(page, + 1 << inode->i_blkbits, 0); + unlock_page(page); + } + + bh = head = page_buffers(page); + do { + if (buffer_dirty(bh)) { + if (ndirties > nlimit) { + err = -E2BIG; + break; + } + get_bh(bh); + list_add_tail(&bh->b_assoc_buffers, listp); + ndirties++; + } + bh = bh->b_this_page; + } while (bh != head); + } + pagevec_release(&pvec); + cond_resched(); + + if (!err) + goto repeat; + return err; +} + +static void nilfs_lookup_dirty_node_buffers(struct inode *inode, + struct list_head *listp) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct address_space *mapping = &ii->i_btnode_cache; + struct pagevec pvec; + struct buffer_head *bh, *head; + unsigned int i; + pgoff_t index = 0; + + pagevec_init(&pvec, 0); + + while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, + PAGEVEC_SIZE)) { + for (i = 0; i < pagevec_count(&pvec); i++) { + bh = head = page_buffers(pvec.pages[i]); + do { + if (buffer_dirty(bh)) { + get_bh(bh); + list_add_tail(&bh->b_assoc_buffers, + listp); + } + bh = bh->b_this_page; + } while (bh != head); + } + pagevec_release(&pvec); + cond_resched(); + } +} + +static void nilfs_dispose_list(struct nilfs_sb_info *sbi, + struct list_head *head, int force) +{ + struct nilfs_inode_info *ii, *n; + struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii; + unsigned nv = 0; + + while (!list_empty(head)) { + spin_lock(&sbi->s_inode_lock); + list_for_each_entry_safe(ii, n, head, i_dirty) { + list_del_init(&ii->i_dirty); + if (force) { + if (unlikely(ii->i_bh)) { + brelse(ii->i_bh); + ii->i_bh = NULL; + } + } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { + set_bit(NILFS_I_QUEUED, &ii->i_state); + list_add_tail(&ii->i_dirty, + &sbi->s_dirty_files); + continue; + } + ivec[nv++] = ii; + if (nv == SC_N_INODEVEC) + break; + } + spin_unlock(&sbi->s_inode_lock); + + for (pii = ivec; nv > 0; pii++, nv--) + iput(&(*pii)->vfs_inode); + } +} + +static int nilfs_test_metadata_dirty(struct nilfs_sb_info *sbi) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + int ret = 0; + + if (nilfs_mdt_fetch_dirty(sbi->s_ifile)) + ret++; + if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile)) + ret++; + if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile)) + ret++; + if (ret || nilfs_doing_gc()) + if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs))) + ret++; + return ret; +} + +static int nilfs_segctor_clean(struct nilfs_sc_info *sci) +{ + return list_empty(&sci->sc_dirty_files) && + !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) && + list_empty(&sci->sc_cleaning_segments) && + (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes)); +} + +static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + int ret = 0; + + if (nilfs_test_metadata_dirty(sbi)) + set_bit(NILFS_SC_DIRTY, &sci->sc_flags); + + spin_lock(&sbi->s_inode_lock); + if (list_empty(&sbi->s_dirty_files) && nilfs_segctor_clean(sci)) + ret++; + + spin_unlock(&sbi->s_inode_lock); + return ret; +} + +static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sbi->s_nilfs; + + nilfs_mdt_clear_dirty(sbi->s_ifile); + nilfs_mdt_clear_dirty(nilfs->ns_cpfile); + nilfs_mdt_clear_dirty(nilfs->ns_sufile); + nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); +} + +static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) +{ + struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; + struct buffer_head *bh_cp; + struct nilfs_checkpoint *raw_cp; + int err; + + /* XXX: this interface will be changed */ + err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1, + &raw_cp, &bh_cp); + if (likely(!err)) { + /* The following code is duplicated with cpfile. But, it is + needed to collect the checkpoint even if it was not newly + created */ + nilfs_mdt_mark_buffer_dirty(bh_cp); + nilfs_mdt_mark_dirty(nilfs->ns_cpfile); + nilfs_cpfile_put_checkpoint( + nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); + } else { + BUG_ON(err == -EINVAL || err == -ENOENT); + } + return err; +} + +static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sbi->s_nilfs; + struct buffer_head *bh_cp; + struct nilfs_checkpoint *raw_cp; + int err; + + err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, + &raw_cp, &bh_cp); + if (unlikely(err)) { + BUG_ON(err == -EINVAL || err == -ENOENT); + goto failed_ibh; + } + raw_cp->cp_snapshot_list.ssl_next = 0; + raw_cp->cp_snapshot_list.ssl_prev = 0; + raw_cp->cp_inodes_count = + cpu_to_le64(atomic_read(&sbi->s_inodes_count)); + raw_cp->cp_blocks_count = + cpu_to_le64(atomic_read(&sbi->s_blocks_count)); + raw_cp->cp_nblk_inc = + cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); + raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); + raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); + if (sci->sc_sketch_inode && i_size_read(sci->sc_sketch_inode) > 0) + nilfs_checkpoint_set_sketch(raw_cp); + nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1); + nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); + return 0; + + failed_ibh: + return err; +} + +static void nilfs_fill_in_file_bmap(struct inode *ifile, + struct nilfs_inode_info *ii) + +{ + struct buffer_head *ibh; + struct nilfs_inode *raw_inode; + + if (test_bit(NILFS_I_BMAP, &ii->i_state)) { + ibh = ii->i_bh; + BUG_ON(!ibh); + raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino, + ibh); + nilfs_bmap_write(ii->i_bmap, raw_inode); + nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh); + } +} + +static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci, + struct inode *ifile) +{ + struct nilfs_inode_info *ii; + + list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) { + nilfs_fill_in_file_bmap(ifile, ii); + set_bit(NILFS_I_COLLECTED, &ii->i_state); + } + if (sci->sc_sketch_inode) { + ii = NILFS_I(sci->sc_sketch_inode); + if (test_bit(NILFS_I_DIRTY, &ii->i_state)) + nilfs_fill_in_file_bmap(ifile, ii); + } +} + +/* + * CRC calculation routines + */ +static void nilfs_fill_in_super_root_crc(struct buffer_head *bh_sr, u32 seed) +{ + struct nilfs_super_root *raw_sr = + (struct nilfs_super_root *)bh_sr->b_data; + u32 crc; + + BUG_ON(NILFS_SR_BYTES > bh_sr->b_size); + crc = crc32_le(seed, + (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), + NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); + raw_sr->sr_sum = cpu_to_le32(crc); +} + +static void nilfs_segctor_fill_in_checksums(struct nilfs_sc_info *sci, + u32 seed) +{ + struct nilfs_segment_buffer *segbuf; + + if (sci->sc_super_root) + nilfs_fill_in_super_root_crc(sci->sc_super_root, seed); + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + nilfs_segbuf_fill_in_segsum_crc(segbuf, seed); + nilfs_segbuf_fill_in_data_crc(segbuf, seed); + } +} + +static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) +{ + struct buffer_head *bh_sr = sci->sc_super_root; + struct nilfs_super_root *raw_sr = + (struct nilfs_super_root *)bh_sr->b_data; + unsigned isz = nilfs->ns_inode_size; + + raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); + raw_sr->sr_nongc_ctime + = cpu_to_le64(nilfs_doing_gc() ? + nilfs->ns_nongc_ctime : sci->sc_seg_ctime); + raw_sr->sr_flags = 0; + + nilfs_mdt_write_inode_direct( + nilfs_dat_inode(nilfs), bh_sr, NILFS_SR_DAT_OFFSET(isz)); + nilfs_mdt_write_inode_direct( + nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(isz)); + nilfs_mdt_write_inode_direct( + nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(isz)); +} + +static void nilfs_redirty_inodes(struct list_head *head) +{ + struct nilfs_inode_info *ii; + + list_for_each_entry(ii, head, i_dirty) { + if (test_bit(NILFS_I_COLLECTED, &ii->i_state)) + clear_bit(NILFS_I_COLLECTED, &ii->i_state); + } +} + +static void nilfs_drop_collected_inodes(struct list_head *head) +{ + struct nilfs_inode_info *ii; + + list_for_each_entry(ii, head, i_dirty) { + if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state)) + continue; + + clear_bit(NILFS_I_INODE_DIRTY, &ii->i_state); + set_bit(NILFS_I_UPDATED, &ii->i_state); + } +} + +static void nilfs_segctor_cancel_free_segments(struct nilfs_sc_info *sci, + struct inode *sufile) + +{ + struct list_head *head = &sci->sc_cleaning_segments; + struct nilfs_segment_entry *ent; + int err; + + list_for_each_entry(ent, head, list) { + if (!(ent->flags & NILFS_SLH_FREED)) + break; + err = nilfs_sufile_cancel_free(sufile, ent->segnum); + BUG_ON(err); + + ent->flags &= ~NILFS_SLH_FREED; + } +} + +static int nilfs_segctor_prepare_free_segments(struct nilfs_sc_info *sci, + struct inode *sufile) +{ + struct list_head *head = &sci->sc_cleaning_segments; + struct nilfs_segment_entry *ent; + int err; + + list_for_each_entry(ent, head, list) { + err = nilfs_sufile_free(sufile, ent->segnum); + if (unlikely(err)) + return err; + ent->flags |= NILFS_SLH_FREED; + } + return 0; +} + +static void nilfs_segctor_commit_free_segments(struct nilfs_sc_info *sci) +{ + nilfs_dispose_segment_list(&sci->sc_cleaning_segments); +} + +static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, + struct inode *inode, + struct list_head *listp, + int (*collect)(struct nilfs_sc_info *, + struct buffer_head *, + struct inode *)) +{ + struct buffer_head *bh, *n; + int err = 0; + + if (collect) { + list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) { + list_del_init(&bh->b_assoc_buffers); + err = collect(sci, bh, inode); + brelse(bh); + if (unlikely(err)) + goto dispose_buffers; + } + return 0; + } + + dispose_buffers: + while (!list_empty(listp)) { + bh = list_entry(listp->next, struct buffer_head, + b_assoc_buffers); + list_del_init(&bh->b_assoc_buffers); + brelse(bh); + } + return err; +} + +static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci, + struct inode *inode, + struct nilfs_sc_operations *sc_ops) +{ + LIST_HEAD(data_buffers); + LIST_HEAD(node_buffers); + int err, err2; + + if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { + err = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, + sci); + if (err) { + err2 = nilfs_segctor_apply_buffers( + sci, inode, &data_buffers, + err == -E2BIG ? sc_ops->collect_data : NULL); + if (err == -E2BIG) + err = err2; + goto break_or_fail; + } + } + nilfs_lookup_dirty_node_buffers(inode, &node_buffers); + + if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { + err = nilfs_segctor_apply_buffers( + sci, inode, &data_buffers, sc_ops->collect_data); + if (unlikely(err)) { + /* dispose node list */ + nilfs_segctor_apply_buffers( + sci, inode, &node_buffers, NULL); + goto break_or_fail; + } + sci->sc_stage.flags |= NILFS_CF_NODE; + } + /* Collect node */ + err = nilfs_segctor_apply_buffers( + sci, inode, &node_buffers, sc_ops->collect_node); + if (unlikely(err)) + goto break_or_fail; + + nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers); + err = nilfs_segctor_apply_buffers( + sci, inode, &node_buffers, sc_ops->collect_bmap); + if (unlikely(err)) + goto break_or_fail; + + nilfs_segctor_end_finfo(sci, inode); + sci->sc_stage.flags &= ~NILFS_CF_NODE; + + break_or_fail: + return err; +} + +static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, + struct inode *inode) +{ + LIST_HEAD(data_buffers); + int err, err2; + + err = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, sci); + err2 = nilfs_segctor_apply_buffers(sci, inode, &data_buffers, + (!err || err == -E2BIG) ? + nilfs_collect_file_data : NULL); + if (err == -E2BIG) + err = err2; + if (!err) + nilfs_segctor_end_finfo(sci, inode); + return err; +} + +static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sbi->s_nilfs; + struct list_head *head; + struct nilfs_inode_info *ii; + int err = 0; + + switch (sci->sc_stage.scnt) { + case NILFS_ST_INIT: + /* Pre-processes */ + sci->sc_stage.flags = 0; + + if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) { + sci->sc_nblk_inc = 0; + sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN; + if (mode == SC_LSEG_DSYNC) { + sci->sc_stage.scnt = NILFS_ST_DSYNC; + goto dsync_mode; + } + } + + sci->sc_stage.dirty_file_ptr = NULL; + sci->sc_stage.gc_inode_ptr = NULL; + if (mode == SC_FLUSH_DAT) { + sci->sc_stage.scnt = NILFS_ST_DAT; + goto dat_stage; + } + sci->sc_stage.scnt++; /* Fall through */ + case NILFS_ST_GC: + if (nilfs_doing_gc()) { + head = &sci->sc_gc_inodes; + ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr, + head, i_dirty); + list_for_each_entry_continue(ii, head, i_dirty) { + err = nilfs_segctor_scan_file( + sci, &ii->vfs_inode, + &nilfs_sc_file_ops); + if (unlikely(err)) { + sci->sc_stage.gc_inode_ptr = list_entry( + ii->i_dirty.prev, + struct nilfs_inode_info, + i_dirty); + goto break_or_fail; + } + set_bit(NILFS_I_COLLECTED, &ii->i_state); + } + sci->sc_stage.gc_inode_ptr = NULL; + } + sci->sc_stage.scnt++; /* Fall through */ + case NILFS_ST_FILE: + head = &sci->sc_dirty_files; + ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head, + i_dirty); + list_for_each_entry_continue(ii, head, i_dirty) { + clear_bit(NILFS_I_DIRTY, &ii->i_state); + + err = nilfs_segctor_scan_file(sci, &ii->vfs_inode, + &nilfs_sc_file_ops); + if (unlikely(err)) { + sci->sc_stage.dirty_file_ptr = + list_entry(ii->i_dirty.prev, + struct nilfs_inode_info, + i_dirty); + goto break_or_fail; + } + /* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */ + /* XXX: required ? */ + } + sci->sc_stage.dirty_file_ptr = NULL; + if (mode == SC_FLUSH_FILE) { + sci->sc_stage.scnt = NILFS_ST_DONE; + return 0; + } + sci->sc_stage.scnt++; /* Fall through */ + case NILFS_ST_SKETCH: + if (mode == SC_LSEG_SR && sci->sc_sketch_inode) { + ii = NILFS_I(sci->sc_sketch_inode); + if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { + sci->sc_sketch_inode->i_ctime.tv_sec + = sci->sc_seg_ctime; + sci->sc_sketch_inode->i_mtime.tv_sec + = sci->sc_seg_ctime; + err = nilfs_mark_inode_dirty( + sci->sc_sketch_inode); + if (unlikely(err)) + goto break_or_fail; + } + err = nilfs_segctor_scan_file(sci, + sci->sc_sketch_inode, + &nilfs_sc_file_ops); + if (unlikely(err)) + goto break_or_fail; + } + sci->sc_stage.scnt++; + sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; + /* Fall through */ + case NILFS_ST_IFILE: + err = nilfs_segctor_scan_file(sci, sbi->s_ifile, + &nilfs_sc_file_ops); + if (unlikely(err)) + break; + sci->sc_stage.scnt++; + /* Creating a checkpoint */ + err = nilfs_segctor_create_checkpoint(sci); + if (unlikely(err)) + break; + /* Fall through */ + case NILFS_ST_CPFILE: + err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile, + &nilfs_sc_file_ops); + if (unlikely(err)) + break; + sci->sc_stage.scnt++; /* Fall through */ + case NILFS_ST_SUFILE: + err = nilfs_segctor_prepare_free_segments(sci, + nilfs->ns_sufile); + if (unlikely(err)) + break; + err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, + &nilfs_sc_file_ops); + if (unlikely(err)) + break; + sci->sc_stage.scnt++; /* Fall through */ + case NILFS_ST_DAT: + dat_stage: + err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs), + &nilfs_sc_dat_ops); + if (unlikely(err)) + break; + if (mode == SC_FLUSH_DAT) { + sci->sc_stage.scnt = NILFS_ST_DONE; + return 0; + } + sci->sc_stage.scnt++; /* Fall through */ + case NILFS_ST_SR: + if (mode == SC_LSEG_SR) { + /* Appending a super root */ + err = nilfs_segctor_add_super_root(sci); + if (unlikely(err)) + break; + } + /* End of a logical segment */ + sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; + sci->sc_stage.scnt = NILFS_ST_DONE; + return 0; + case NILFS_ST_DSYNC: + dsync_mode: + sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT; + ii = sci->sc_stage.dirty_file_ptr; + if (!test_bit(NILFS_I_BUSY, &ii->i_state)) + break; + + err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode); + if (unlikely(err)) + break; + sci->sc_stage.dirty_file_ptr = NULL; + sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; + sci->sc_stage.scnt = NILFS_ST_DONE; + return 0; + case NILFS_ST_DONE: + return 0; + default: + BUG(); + } + + break_or_fail: + return err; +} + +static int nilfs_segctor_terminate_segment(struct nilfs_sc_info *sci, + struct nilfs_segment_buffer *segbuf, + struct inode *sufile) +{ + struct nilfs_segment_entry *ent = segbuf->sb_segent; + int err; + + err = nilfs_open_segment_entry(ent, sufile); + if (unlikely(err)) + return err; + nilfs_mdt_mark_buffer_dirty(ent->bh_su); + nilfs_mdt_mark_dirty(sufile); + nilfs_close_segment_entry(ent, sufile); + + list_add_tail(&ent->list, &sci->sc_active_segments); + segbuf->sb_segent = NULL; + return 0; +} + +static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum) +{ + struct buffer_head *bh_su; + struct nilfs_segment_usage *raw_su; + int err; + + err = nilfs_sufile_get_segment_usage(sufile, segnum, &raw_su, &bh_su); + if (unlikely(err)) + return err; + nilfs_mdt_mark_buffer_dirty(bh_su); + nilfs_mdt_mark_dirty(sufile); + nilfs_sufile_put_segment_usage(sufile, segnum, bh_su); + return 0; +} + +static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) +{ + struct nilfs_segment_buffer *segbuf, *n; + struct inode *sufile = nilfs->ns_sufile; + __u64 nextnum; + int err; + + if (list_empty(&sci->sc_segbufs)) { + segbuf = nilfs_segbuf_new(sci->sc_super); + if (unlikely(!segbuf)) + return -ENOMEM; + list_add(&segbuf->sb_list, &sci->sc_segbufs); + } else + segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + + err = nilfs_segbuf_map(segbuf, nilfs->ns_segnum, + nilfs->ns_pseg_offset, nilfs); + if (unlikely(err)) + return err; + + if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { + err = nilfs_segctor_terminate_segment(sci, segbuf, sufile); + if (unlikely(err)) + return err; + + nilfs_shift_to_next_segment(nilfs); + err = nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); + } + sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; + + err = nilfs_touch_segusage(sufile, segbuf->sb_segnum); + if (unlikely(err)) + return err; + + if (nilfs->ns_segnum == nilfs->ns_nextnum) { + /* Start from the head of a new full segment */ + err = nilfs_sufile_alloc(sufile, &nextnum); + if (unlikely(err)) + return err; + } else + nextnum = nilfs->ns_nextnum; + + segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; + nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); + + /* truncating segment buffers */ + list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, + sb_list) { + list_del_init(&segbuf->sb_list); + nilfs_segbuf_free(segbuf); + } + return err; +} + +static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs, int nadd) +{ + struct nilfs_segment_buffer *segbuf, *prev, *n; + struct inode *sufile = nilfs->ns_sufile; + __u64 nextnextnum; + LIST_HEAD(list); + int err, ret, i; + + prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs); + /* + * Since the segment specified with nextnum might be allocated during + * the previous construction, the buffer including its segusage may + * not be dirty. The following call ensures that the buffer is dirty + * and will pin the buffer on memory until the sufile is written. + */ + err = nilfs_touch_segusage(sufile, prev->sb_nextnum); + if (unlikely(err)) + return err; + + for (i = 0; i < nadd; i++) { + /* extend segment info */ + err = -ENOMEM; + segbuf = nilfs_segbuf_new(sci->sc_super); + if (unlikely(!segbuf)) + goto failed; + + /* map this buffer to region of segment on-disk */ + err = nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); + if (unlikely(err)) + goto failed_segbuf; + + sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks; + + /* allocate the next next full segment */ + err = nilfs_sufile_alloc(sufile, &nextnextnum); + if (unlikely(err)) + goto failed_segbuf; + + segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1; + nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs); + + list_add_tail(&segbuf->sb_list, &list); + prev = segbuf; + } + list_splice(&list, sci->sc_segbufs.prev); + return 0; + + failed_segbuf: + nilfs_segbuf_free(segbuf); + failed: + list_for_each_entry_safe(segbuf, n, &list, sb_list) { + ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); + BUG_ON(ret); + list_del_init(&segbuf->sb_list); + nilfs_segbuf_free(segbuf); + } + return err; +} + +static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) +{ + struct nilfs_segment_buffer *segbuf; + int ret, done = 0; + + segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + if (nilfs->ns_nextnum != segbuf->sb_nextnum) { + ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); + BUG_ON(ret); + } + if (segbuf->sb_io_error) { + /* Case 1: The first segment failed */ + if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) + /* Case 1a: Partial segment appended into an existing + segment */ + nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start, + segbuf->sb_fseg_end); + else /* Case 1b: New full segment */ + set_nilfs_discontinued(nilfs); + done++; + } + + list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { + ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); + BUG_ON(ret); + if (!done && segbuf->sb_io_error) { + if (segbuf->sb_segnum != nilfs->ns_nextnum) + /* Case 2: extended segment (!= next) failed */ + nilfs_sufile_set_error(nilfs->ns_sufile, + segbuf->sb_segnum); + done++; + } + } +} + +static void nilfs_segctor_clear_segment_buffers(struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf; + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) + nilfs_segbuf_clear(segbuf); + sci->sc_super_root = NULL; +} + +static void nilfs_segctor_destroy_segment_buffers(struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf; + + while (!list_empty(&sci->sc_segbufs)) { + segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + list_del_init(&segbuf->sb_list); + nilfs_segbuf_free(segbuf); + } + /* sci->sc_curseg = NULL; */ +} + +static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs, int err) +{ + if (unlikely(err)) { + nilfs_segctor_free_incomplete_segments(sci, nilfs); + nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); + } + nilfs_segctor_clear_segment_buffers(sci); +} + +static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, + struct inode *sufile) +{ + struct nilfs_segment_buffer *segbuf; + struct buffer_head *bh_su; + struct nilfs_segment_usage *raw_su; + unsigned long live_blocks; + int ret; + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, + &raw_su, &bh_su); + BUG_ON(ret); /* always succeed because bh_su is dirty */ + live_blocks = segbuf->sb_sum.nblocks + + (segbuf->sb_pseg_start - segbuf->sb_fseg_start); + raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime); + raw_su->su_nblocks = cpu_to_le32(live_blocks); + nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, + bh_su); + } +} + +static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, + struct inode *sufile) +{ + struct nilfs_segment_buffer *segbuf; + struct buffer_head *bh_su; + struct nilfs_segment_usage *raw_su; + int ret; + + segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, + &raw_su, &bh_su); + BUG_ON(ret); /* always succeed because bh_su is dirty */ + raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start - + segbuf->sb_fseg_start); + nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su); + + list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { + ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, + &raw_su, &bh_su); + BUG_ON(ret); /* always succeed */ + raw_su->su_nblocks = 0; + nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, + bh_su); + } +} + +static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci, + struct nilfs_segment_buffer *last, + struct inode *sufile) +{ + struct nilfs_segment_buffer *segbuf = last, *n; + int ret; + + list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, + sb_list) { + list_del_init(&segbuf->sb_list); + sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; + ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); + BUG_ON(ret); + nilfs_segbuf_free(segbuf); + } +} + + +static int nilfs_segctor_collect(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs, int mode) +{ + struct nilfs_cstage prev_stage = sci->sc_stage; + int err, nadd = 1; + + /* Collection retry loop */ + for (;;) { + sci->sc_super_root = NULL; + sci->sc_nblk_this_inc = 0; + sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + + err = nilfs_segctor_reset_segment_buffer(sci); + if (unlikely(err)) + goto failed; + + err = nilfs_segctor_collect_blocks(sci, mode); + sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; + if (!err) + break; + + if (unlikely(err != -E2BIG)) + goto failed; + + /* The current segment is filled up */ + if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) + break; + + nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); + nilfs_segctor_clear_segment_buffers(sci); + + err = nilfs_segctor_extend_segments(sci, nilfs, nadd); + if (unlikely(err)) + return err; + + nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); + sci->sc_stage = prev_stage; + } + nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); + return 0; + + failed: + return err; +} + +static void nilfs_list_replace_buffer(struct buffer_head *old_bh, + struct buffer_head *new_bh) +{ + BUG_ON(!list_empty(&new_bh->b_assoc_buffers)); + + list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers); + /* The caller must release old_bh */ +} + +static int +nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, + struct nilfs_segment_buffer *segbuf, + int mode) +{ + struct inode *inode = NULL; + sector_t blocknr; + unsigned long nfinfo = segbuf->sb_sum.nfinfo; + unsigned long nblocks = 0, ndatablk = 0; + struct nilfs_sc_operations *sc_op = NULL; + struct nilfs_segsum_pointer ssp; + struct nilfs_finfo *finfo = NULL; + union nilfs_binfo binfo; + struct buffer_head *bh, *bh_org; + ino_t ino = 0; + int err = 0; + + if (!nfinfo) + goto out; + + blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk; + ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); + ssp.offset = sizeof(struct nilfs_segment_summary); + + list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { + if (bh == sci->sc_super_root) + break; + if (!finfo) { + finfo = nilfs_segctor_map_segsum_entry( + sci, &ssp, sizeof(*finfo)); + ino = le64_to_cpu(finfo->fi_ino); + nblocks = le32_to_cpu(finfo->fi_nblocks); + ndatablk = le32_to_cpu(finfo->fi_ndatablk); + + if (buffer_nilfs_node(bh)) + inode = NILFS_BTNC_I(bh->b_page->mapping); + else + inode = NILFS_AS_I(bh->b_page->mapping); + + if (mode == SC_LSEG_DSYNC) + sc_op = &nilfs_sc_dsync_ops; + else if (ino == NILFS_DAT_INO) + sc_op = &nilfs_sc_dat_ops; + else /* file blocks */ + sc_op = &nilfs_sc_file_ops; + } + bh_org = bh; + get_bh(bh_org); + err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr, + &binfo); + if (bh != bh_org) + nilfs_list_replace_buffer(bh_org, bh); + brelse(bh_org); + if (unlikely(err)) + goto failed_bmap; + + if (ndatablk > 0) + sc_op->write_data_binfo(sci, &ssp, &binfo); + else + sc_op->write_node_binfo(sci, &ssp, &binfo); + + blocknr++; + if (--nblocks == 0) { + finfo = NULL; + if (--nfinfo == 0) + break; + } else if (ndatablk > 0) + ndatablk--; + } + out: + return 0; + + failed_bmap: + err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super); + return err; +} + +static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) +{ + struct nilfs_segment_buffer *segbuf; + int err; + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode); + if (unlikely(err)) + return err; + nilfs_segbuf_fill_in_segsum(segbuf); + } + return 0; +} + +static int +nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out) +{ + struct page *clone_page; + struct buffer_head *bh, *head, *bh2; + void *kaddr; + + bh = head = page_buffers(page); + + clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0); + if (unlikely(!clone_page)) + return -ENOMEM; + + bh2 = page_buffers(clone_page); + kaddr = kmap_atomic(page, KM_USER0); + do { + if (list_empty(&bh->b_assoc_buffers)) + continue; + get_bh(bh2); + page_cache_get(clone_page); /* for each bh */ + memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size); + bh2->b_blocknr = bh->b_blocknr; + list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers); + list_add_tail(&bh->b_assoc_buffers, out); + } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head); + kunmap_atomic(kaddr, KM_USER0); + + if (!TestSetPageWriteback(clone_page)) + inc_zone_page_state(clone_page, NR_WRITEBACK); + unlock_page(clone_page); + + return 0; +} + +static int nilfs_test_page_to_be_frozen(struct page *page) +{ + struct address_space *mapping = page->mapping; + + if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode)) + return 0; + + if (page_mapped(page)) { + ClearPageChecked(page); + return 1; + } + return PageChecked(page); +} + +static int nilfs_begin_page_io(struct page *page, struct list_head *out) +{ + if (!page || PageWriteback(page)) + /* For split b-tree node pages, this function may be called + twice. We ignore the 2nd or later calls by this check. */ + return 0; + + lock_page(page); + clear_page_dirty_for_io(page); + set_page_writeback(page); + unlock_page(page); + + if (nilfs_test_page_to_be_frozen(page)) { + int err = nilfs_copy_replace_page_buffers(page, out); + if (unlikely(err)) + return err; + } + return 0; +} + +static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, + struct page **failed_page) +{ + struct nilfs_segment_buffer *segbuf; + struct page *bd_page = NULL, *fs_page = NULL; + struct list_head *list = &sci->sc_copied_buffers; + int err; + + *failed_page = NULL; + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + struct buffer_head *bh; + + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { + if (bh->b_page != bd_page) { + if (bd_page) { + lock_page(bd_page); + clear_page_dirty_for_io(bd_page); + set_page_writeback(bd_page); + unlock_page(bd_page); + } + bd_page = bh->b_page; + } + } + + list_for_each_entry(bh, &segbuf->sb_payload_buffers, + b_assoc_buffers) { + if (bh == sci->sc_super_root) { + if (bh->b_page != bd_page) { + lock_page(bd_page); + clear_page_dirty_for_io(bd_page); + set_page_writeback(bd_page); + unlock_page(bd_page); + bd_page = bh->b_page; + } + break; + } + if (bh->b_page != fs_page) { + err = nilfs_begin_page_io(fs_page, list); + if (unlikely(err)) { + *failed_page = fs_page; + goto out; + } + fs_page = bh->b_page; + } + } + } + if (bd_page) { + lock_page(bd_page); + clear_page_dirty_for_io(bd_page); + set_page_writeback(bd_page); + unlock_page(bd_page); + } + err = nilfs_begin_page_io(fs_page, list); + if (unlikely(err)) + *failed_page = fs_page; + out: + return err; +} + +static int nilfs_segctor_write(struct nilfs_sc_info *sci, + struct backing_dev_info *bdi) +{ + struct nilfs_segment_buffer *segbuf; + struct nilfs_write_info wi; + int err, res; + + wi.sb = sci->sc_super; + wi.bh_sr = sci->sc_super_root; + wi.bdi = bdi; + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + nilfs_segbuf_prepare_write(segbuf, &wi); + err = nilfs_segbuf_write(segbuf, &wi); + + res = nilfs_segbuf_wait(segbuf, &wi); + err = unlikely(err) ? : res; + if (unlikely(err)) + return err; + } + return 0; +} + +static int nilfs_page_has_uncleared_buffer(struct page *page) +{ + struct buffer_head *head, *bh; + + head = bh = page_buffers(page); + do { + if (buffer_dirty(bh) && !list_empty(&bh->b_assoc_buffers)) + return 1; + bh = bh->b_this_page; + } while (bh != head); + return 0; +} + +static void __nilfs_end_page_io(struct page *page, int err) +{ + /* BUG_ON(err > 0); */ + if (!err) { + if (!nilfs_page_buffers_clean(page)) + __set_page_dirty_nobuffers(page); + ClearPageError(page); + } else { + __set_page_dirty_nobuffers(page); + SetPageError(page); + } + + if (buffer_nilfs_allocated(page_buffers(page))) { + if (TestClearPageWriteback(page)) + dec_zone_page_state(page, NR_WRITEBACK); + } else + end_page_writeback(page); +} + +static void nilfs_end_page_io(struct page *page, int err) +{ + if (!page) + return; + + if (buffer_nilfs_node(page_buffers(page)) && + nilfs_page_has_uncleared_buffer(page)) + /* For b-tree node pages, this function may be called twice + or more because they might be split in a segment. + This check assures that cleanup has been done for all + buffers in a split btnode page. */ + return; + + __nilfs_end_page_io(page, err); +} + +static void nilfs_clear_copied_buffers(struct list_head *list, int err) +{ + struct buffer_head *bh, *head; + struct page *page; + + while (!list_empty(list)) { + bh = list_entry(list->next, struct buffer_head, + b_assoc_buffers); + page = bh->b_page; + page_cache_get(page); + head = bh = page_buffers(page); + do { + if (!list_empty(&bh->b_assoc_buffers)) { + list_del_init(&bh->b_assoc_buffers); + if (!err) { + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + clear_buffer_nilfs_volatile(bh); + } + brelse(bh); /* for b_assoc_buffers */ + } + } while ((bh = bh->b_this_page) != head); + + __nilfs_end_page_io(page, err); + page_cache_release(page); + } +} + +static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, + struct page *failed_page, int err) +{ + struct nilfs_segment_buffer *segbuf; + struct page *bd_page = NULL, *fs_page = NULL; + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + struct buffer_head *bh; + + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { + if (bh->b_page != bd_page) { + if (bd_page) + end_page_writeback(bd_page); + bd_page = bh->b_page; + } + } + + list_for_each_entry(bh, &segbuf->sb_payload_buffers, + b_assoc_buffers) { + if (bh == sci->sc_super_root) { + if (bh->b_page != bd_page) { + end_page_writeback(bd_page); + bd_page = bh->b_page; + } + break; + } + if (bh->b_page != fs_page) { + nilfs_end_page_io(fs_page, err); + if (unlikely(fs_page == failed_page)) + goto done; + fs_page = bh->b_page; + } + } + } + if (bd_page) + end_page_writeback(bd_page); + + nilfs_end_page_io(fs_page, err); + done: + nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err); +} + +static void nilfs_set_next_segment(struct the_nilfs *nilfs, + struct nilfs_segment_buffer *segbuf) +{ + nilfs->ns_segnum = segbuf->sb_segnum; + nilfs->ns_nextnum = segbuf->sb_nextnum; + nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start + + segbuf->sb_sum.nblocks; + nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq; + nilfs->ns_ctime = segbuf->sb_sum.ctime; +} + +static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf; + struct page *bd_page = NULL, *fs_page = NULL; + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sbi->s_nilfs; + int update_sr = (sci->sc_super_root != NULL); + + list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + struct buffer_head *bh; + + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + if (bh->b_page != bd_page) { + if (bd_page) + end_page_writeback(bd_page); + bd_page = bh->b_page; + } + } + /* + * We assume that the buffers which belong to the same page + * continue over the buffer list. + * Under this assumption, the last BHs of pages is + * identifiable by the discontinuity of bh->b_page + * (page != fs_page). + * + * For B-tree node blocks, however, this assumption is not + * guaranteed. The cleanup code of B-tree node pages needs + * special care. + */ + list_for_each_entry(bh, &segbuf->sb_payload_buffers, + b_assoc_buffers) { + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + clear_buffer_nilfs_volatile(bh); + if (bh == sci->sc_super_root) { + if (bh->b_page != bd_page) { + end_page_writeback(bd_page); + bd_page = bh->b_page; + } + break; + } + if (bh->b_page != fs_page) { + nilfs_end_page_io(fs_page, 0); + fs_page = bh->b_page; + } + } + + if (!NILFS_SEG_SIMPLEX(&segbuf->sb_sum)) { + if (NILFS_SEG_LOGBGN(&segbuf->sb_sum)) { + set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); + sci->sc_lseg_stime = jiffies; + } + if (NILFS_SEG_LOGEND(&segbuf->sb_sum)) + clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); + } + } + /* + * Since pages may continue over multiple segment buffers, + * end of the last page must be checked outside of the loop. + */ + if (bd_page) + end_page_writeback(bd_page); + + nilfs_end_page_io(fs_page, 0); + + nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0); + + nilfs_drop_collected_inodes(&sci->sc_dirty_files); + + if (nilfs_doing_gc()) { + nilfs_drop_collected_inodes(&sci->sc_gc_inodes); + if (update_sr) + nilfs_commit_gcdat_inode(nilfs); + } else { + nilfs->ns_nongc_ctime = sci->sc_seg_ctime; + set_nilfs_cond_nongc_write(nilfs); + wake_up(&nilfs->ns_cleanerd_wq); + } + + sci->sc_nblk_inc += sci->sc_nblk_this_inc; + + segbuf = NILFS_LAST_SEGBUF(&sci->sc_segbufs); + nilfs_set_next_segment(nilfs, segbuf); + + if (update_sr) { + nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, + segbuf->sb_sum.seg_seq, nilfs->ns_cno); + + clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); + set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); + } else + clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); +} + +static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, + struct nilfs_sb_info *sbi) +{ + struct nilfs_inode_info *ii, *n; + __u64 cno = sbi->s_nilfs->ns_cno; + + spin_lock(&sbi->s_inode_lock); + retry: + list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) { + if (!ii->i_bh) { + struct buffer_head *ibh; + int err; + + spin_unlock(&sbi->s_inode_lock); + err = nilfs_ifile_get_inode_block( + sbi->s_ifile, ii->vfs_inode.i_ino, &ibh); + if (unlikely(err)) { + nilfs_warning(sbi->s_super, __func__, + "failed to get inode block.\n"); + return err; + } + nilfs_mdt_mark_buffer_dirty(ibh); + nilfs_mdt_mark_dirty(sbi->s_ifile); + spin_lock(&sbi->s_inode_lock); + if (likely(!ii->i_bh)) + ii->i_bh = ibh; + else + brelse(ibh); + goto retry; + } + ii->i_cno = cno; + + clear_bit(NILFS_I_QUEUED, &ii->i_state); + set_bit(NILFS_I_BUSY, &ii->i_state); + list_del(&ii->i_dirty); + list_add_tail(&ii->i_dirty, &sci->sc_dirty_files); + } + spin_unlock(&sbi->s_inode_lock); + + NILFS_I(sbi->s_ifile)->i_cno = cno; + + return 0; +} + +static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, + struct nilfs_sb_info *sbi) +{ + struct nilfs_transaction_info *ti = current->journal_info; + struct nilfs_inode_info *ii, *n; + __u64 cno = sbi->s_nilfs->ns_cno; + + spin_lock(&sbi->s_inode_lock); + list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { + if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || + test_bit(NILFS_I_DIRTY, &ii->i_state)) { + /* The current checkpoint number (=nilfs->ns_cno) is + changed between check-in and check-out only if the + super root is written out. So, we can update i_cno + for the inodes that remain in the dirty list. */ + ii->i_cno = cno; + continue; + } + clear_bit(NILFS_I_BUSY, &ii->i_state); + brelse(ii->i_bh); + ii->i_bh = NULL; + list_del(&ii->i_dirty); + list_add_tail(&ii->i_dirty, &ti->ti_garbage); + } + spin_unlock(&sbi->s_inode_lock); +} + +/* + * Nasty routines to manipulate active flags on sufile. + * These would be removed in a future release. + */ +static void nilfs_segctor_reactivate_segments(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) +{ + struct nilfs_segment_buffer *segbuf, *last; + struct nilfs_segment_entry *ent, *n; + struct inode *sufile = nilfs->ns_sufile; + struct list_head *head; + + last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); + nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) { + ent = segbuf->sb_segent; + if (!ent) + break; /* ignore unmapped segments (should check it?)*/ + nilfs_segment_usage_set_active(ent->raw_su); + nilfs_close_segment_entry(ent, sufile); + } + + head = &sci->sc_active_segments; + list_for_each_entry_safe(ent, n, head, list) { + nilfs_segment_usage_set_active(ent->raw_su); + nilfs_close_segment_entry(ent, sufile); + } + + down_write(&nilfs->ns_sem); + head = &nilfs->ns_used_segments; + list_for_each_entry(ent, head, list) { + nilfs_segment_usage_set_volatile_active(ent->raw_su); + } + up_write(&nilfs->ns_sem); +} + +static int nilfs_segctor_deactivate_segments(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) +{ + struct nilfs_segment_buffer *segbuf, *last; + struct nilfs_segment_entry *ent; + struct inode *sufile = nilfs->ns_sufile; + struct list_head *head; + int err; + + last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); + nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) { + /* + * Deactivate ongoing full segments. The last segment is kept + * active because it is a start point of recovery, and is not + * relocatable until the super block points to a newer + * checkpoint. + */ + ent = segbuf->sb_segent; + if (!ent) + break; /* ignore unmapped segments (should check it?)*/ + err = nilfs_open_segment_entry(ent, sufile); + if (unlikely(err)) + goto failed; + nilfs_segment_usage_clear_active(ent->raw_su); + BUG_ON(!buffer_dirty(ent->bh_su)); + } + + head = &sci->sc_active_segments; + list_for_each_entry(ent, head, list) { + err = nilfs_open_segment_entry(ent, sufile); + if (unlikely(err)) + goto failed; + nilfs_segment_usage_clear_active(ent->raw_su); + BUG_ON(!buffer_dirty(ent->bh_su)); + } + + down_write(&nilfs->ns_sem); + head = &nilfs->ns_used_segments; + list_for_each_entry(ent, head, list) { + /* clear volatile active for segments of older generations */ + nilfs_segment_usage_clear_volatile_active(ent->raw_su); + } + up_write(&nilfs->ns_sem); + return 0; + + failed: + nilfs_segctor_reactivate_segments(sci, nilfs); + return err; +} + +static void nilfs_segctor_bead_completed_segments(struct nilfs_sc_info *sci) +{ + struct nilfs_segment_buffer *segbuf, *last; + struct nilfs_segment_entry *ent; + + /* move each segbuf->sb_segent to the list of used active segments */ + last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); + nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) { + ent = segbuf->sb_segent; + if (!ent) + break; /* ignore unmapped segments (should check it?)*/ + list_add_tail(&ent->list, &sci->sc_active_segments); + segbuf->sb_segent = NULL; + } +} + +static void +__nilfs_segctor_commit_deactivate_segments(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) + +{ + struct nilfs_segment_entry *ent; + + list_splice_init(&sci->sc_active_segments, + nilfs->ns_used_segments.prev); + + list_for_each_entry(ent, &nilfs->ns_used_segments, list) { + nilfs_segment_usage_set_volatile_active(ent->raw_su); + /* These segments are kept open */ + } +} + +/* + * Main procedure of segment constructor + */ +static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sbi->s_nilfs; + struct page *failed_page; + int err, has_sr = 0; + + sci->sc_stage.scnt = NILFS_ST_INIT; + + err = nilfs_segctor_check_in_files(sci, sbi); + if (unlikely(err)) + goto out; + + if (nilfs_test_metadata_dirty(sbi)) + set_bit(NILFS_SC_DIRTY, &sci->sc_flags); + + if (nilfs_segctor_clean(sci)) + goto out; + + do { + sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK; + + err = nilfs_segctor_begin_construction(sci, nilfs); + if (unlikely(err)) + goto out; + + /* Update time stamp */ + sci->sc_seg_ctime = get_seconds(); + + err = nilfs_segctor_collect(sci, nilfs, mode); + if (unlikely(err)) + goto failed; + + has_sr = (sci->sc_super_root != NULL); + + /* Avoid empty segment */ + if (sci->sc_stage.scnt == NILFS_ST_DONE && + NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { + BUG_ON(mode == SC_LSEG_SR); + nilfs_segctor_end_construction(sci, nilfs, 1); + goto out; + } + + err = nilfs_segctor_assign(sci, mode); + if (unlikely(err)) + goto failed; + + if (has_sr) { + err = nilfs_segctor_deactivate_segments(sci, nilfs); + if (unlikely(err)) + goto failed; + } + if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) + nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile); + + if (has_sr) { + err = nilfs_segctor_fill_in_checkpoint(sci); + if (unlikely(err)) + goto failed_to_make_up; + + nilfs_segctor_fill_in_super_root(sci, nilfs); + } + nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); + + /* Write partial segments */ + err = nilfs_segctor_prepare_write(sci, &failed_page); + if (unlikely(err)) + goto failed_to_write; + + nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed); + + err = nilfs_segctor_write(sci, nilfs->ns_bdi); + if (unlikely(err)) + goto failed_to_write; + + nilfs_segctor_complete_write(sci); + + /* Commit segments */ + nilfs_segctor_bead_completed_segments(sci); + if (has_sr) { + down_write(&nilfs->ns_sem); + nilfs_update_last_segment(sbi, 1); + __nilfs_segctor_commit_deactivate_segments(sci, nilfs); + up_write(&nilfs->ns_sem); + nilfs_segctor_commit_free_segments(sci); + nilfs_segctor_clear_metadata_dirty(sci); + } + + nilfs_segctor_end_construction(sci, nilfs, 0); + + } while (sci->sc_stage.scnt != NILFS_ST_DONE); + + /* Clearing sketch data */ + if (has_sr && sci->sc_sketch_inode) { + if (i_size_read(sci->sc_sketch_inode) == 0) + clear_bit(NILFS_I_DIRTY, + &NILFS_I(sci->sc_sketch_inode)->i_state); + i_size_write(sci->sc_sketch_inode, 0); + } + out: + nilfs_segctor_destroy_segment_buffers(sci); + nilfs_segctor_check_out_files(sci, sbi); + return err; + + failed_to_write: + nilfs_segctor_abort_write(sci, failed_page, err); + nilfs_segctor_cancel_segusage(sci, nilfs->ns_sufile); + + failed_to_make_up: + if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) + nilfs_redirty_inodes(&sci->sc_dirty_files); + if (has_sr) + nilfs_segctor_reactivate_segments(sci, nilfs); + + failed: + if (nilfs_doing_gc()) + nilfs_redirty_inodes(&sci->sc_gc_inodes); + nilfs_segctor_end_construction(sci, nilfs, err); + goto out; +} + +/** + * nilfs_secgtor_start_timer - set timer of background write + * @sci: nilfs_sc_info + * + * If the timer has already been set, it ignores the new request. + * This function MUST be called within a section locking the segment + * semaphore. + */ +static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) +{ + spin_lock(&sci->sc_state_lock); + if (sci->sc_timer && !(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { + sci->sc_timer->expires = jiffies + sci->sc_interval; + add_timer(sci->sc_timer); + sci->sc_state |= NILFS_SEGCTOR_COMMIT; + } + spin_unlock(&sci->sc_state_lock); +} + +static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) +{ + spin_lock(&sci->sc_state_lock); + if (!(sci->sc_flush_request & (1 << bn))) { + unsigned long prev_req = sci->sc_flush_request; + + sci->sc_flush_request |= (1 << bn); + if (!prev_req) + wake_up(&sci->sc_wait_daemon); + } + spin_unlock(&sci->sc_state_lock); +} + +/** + * nilfs_flush_segment - trigger a segment construction for resource control + * @sb: super block + * @ino: inode number of the file to be flushed out. + */ +void nilfs_flush_segment(struct super_block *sb, ino_t ino) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_sc_info *sci = NILFS_SC(sbi); + + if (!sci || nilfs_doing_construction()) + return; + nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0); + /* assign bit 0 to data files */ +} + +int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci, + __u64 *segnum, size_t nsegs) +{ + struct nilfs_segment_entry *ent; + struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; + struct inode *sufile = nilfs->ns_sufile; + LIST_HEAD(list); + __u64 *pnum; + const char *flag_name; + size_t i; + int err, err2 = 0; + + for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) { + ent = nilfs_alloc_segment_entry(*pnum); + if (unlikely(!ent)) { + err = -ENOMEM; + goto failed; + } + list_add_tail(&ent->list, &list); + + err = nilfs_open_segment_entry(ent, sufile); + if (unlikely(err)) + goto failed; + + if (unlikely(le32_to_cpu(ent->raw_su->su_flags) != + (1UL << NILFS_SEGMENT_USAGE_DIRTY))) { + if (nilfs_segment_usage_clean(ent->raw_su)) + flag_name = "clean"; + else if (nilfs_segment_usage_active(ent->raw_su)) + flag_name = "active"; + else if (nilfs_segment_usage_volatile_active( + ent->raw_su)) + flag_name = "volatile active"; + else if (!nilfs_segment_usage_dirty(ent->raw_su)) + flag_name = "non-dirty"; + else + flag_name = "erroneous"; + + printk(KERN_ERR + "NILFS: %s segment is requested to be cleaned " + "(segnum=%llu)\n", + flag_name, (unsigned long long)ent->segnum); + err2 = -EINVAL; + } + nilfs_close_segment_entry(ent, sufile); + } + if (unlikely(err2)) { + err = err2; + goto failed; + } + list_splice(&list, sci->sc_cleaning_segments.prev); + return 0; + + failed: + nilfs_dispose_segment_list(&list); + return err; +} + +void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *sci) +{ + nilfs_dispose_segment_list(&sci->sc_cleaning_segments); +} + +struct nilfs_segctor_wait_request { + wait_queue_t wq; + __u32 seq; + int err; + atomic_t done; +}; + +static int nilfs_segctor_sync(struct nilfs_sc_info *sci) +{ + struct nilfs_segctor_wait_request wait_req; + int err = 0; + + spin_lock(&sci->sc_state_lock); + init_wait(&wait_req.wq); + wait_req.err = 0; + atomic_set(&wait_req.done, 0); + wait_req.seq = ++sci->sc_seq_request; + spin_unlock(&sci->sc_state_lock); + + init_waitqueue_entry(&wait_req.wq, current); + add_wait_queue(&sci->sc_wait_request, &wait_req.wq); + set_current_state(TASK_INTERRUPTIBLE); + wake_up(&sci->sc_wait_daemon); + + for (;;) { + if (atomic_read(&wait_req.done)) { + err = wait_req.err; + break; + } + if (!signal_pending(current)) { + schedule(); + continue; + } + err = -ERESTARTSYS; + break; + } + finish_wait(&sci->sc_wait_request, &wait_req.wq); + return err; +} + +static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) +{ + struct nilfs_segctor_wait_request *wrq, *n; + unsigned long flags; + + spin_lock_irqsave(&sci->sc_wait_request.lock, flags); + list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list, + wq.task_list) { + if (!atomic_read(&wrq->done) && + nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { + wrq->err = err; + atomic_set(&wrq->done, 1); + } + if (atomic_read(&wrq->done)) { + wrq->wq.func(&wrq->wq, + TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, + 0, NULL); + } + } + spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags); +} + +/** + * nilfs_construct_segment - construct a logical segment + * @sb: super block + * + * Return Value: On success, 0 is retured. On errors, one of the following + * negative error code is returned. + * + * %-EROFS - Read only filesystem. + * + * %-EIO - I/O error + * + * %-ENOSPC - No space left on device (only in a panic state). + * + * %-ERESTARTSYS - Interrupted. + * + * %-ENOMEM - Insufficient memory available. + */ +int nilfs_construct_segment(struct super_block *sb) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct nilfs_transaction_info *ti; + int err; + + if (!sci) + return -EROFS; + + /* A call inside transactions causes a deadlock. */ + BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC); + + err = nilfs_segctor_sync(sci); + return err; +} + +/** + * nilfs_construct_dsync_segment - construct a data-only logical segment + * @sb: super block + * @inode: the inode whose data blocks should be written out + * + * Return Value: On success, 0 is retured. On errors, one of the following + * negative error code is returned. + * + * %-EROFS - Read only filesystem. + * + * %-EIO - I/O error + * + * %-ENOSPC - No space left on device (only in a panic state). + * + * %-ERESTARTSYS - Interrupted. + * + * %-ENOMEM - Insufficient memory available. + */ +int nilfs_construct_dsync_segment(struct super_block *sb, + struct inode *inode) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct nilfs_inode_info *ii; + struct nilfs_transaction_info ti; + int err = 0; + + if (!sci) + return -EROFS; + + nilfs_transaction_lock(sbi, &ti, 0); + + ii = NILFS_I(inode); + if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) || + nilfs_test_opt(sbi, STRICT_ORDER) || + test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || + nilfs_discontinued(sbi->s_nilfs)) { + nilfs_transaction_unlock(sbi); + err = nilfs_segctor_sync(sci); + return err; + } + + spin_lock(&sbi->s_inode_lock); + if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && + !test_bit(NILFS_I_BUSY, &ii->i_state)) { + spin_unlock(&sbi->s_inode_lock); + nilfs_transaction_unlock(sbi); + return 0; + } + spin_unlock(&sbi->s_inode_lock); + sci->sc_stage.dirty_file_ptr = ii; + + err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); + + nilfs_transaction_unlock(sbi); + return err; +} + +struct nilfs_segctor_req { + int mode; + __u32 seq_accepted; + int sc_err; /* construction failure */ + int sb_err; /* super block writeback failure */ +}; + +#define FLUSH_FILE_BIT (0x1) /* data file only */ +#define FLUSH_DAT_BIT (1 << NILFS_DAT_INO) /* DAT only */ + +static void nilfs_segctor_accept(struct nilfs_sc_info *sci, + struct nilfs_segctor_req *req) +{ + BUG_ON(!sci); + + req->sc_err = req->sb_err = 0; + spin_lock(&sci->sc_state_lock); + req->seq_accepted = sci->sc_seq_request; + spin_unlock(&sci->sc_state_lock); + + if (sci->sc_timer) + del_timer_sync(sci->sc_timer); +} + +static void nilfs_segctor_notify(struct nilfs_sc_info *sci, + struct nilfs_segctor_req *req) +{ + /* Clear requests (even when the construction failed) */ + spin_lock(&sci->sc_state_lock); + + sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; + + if (req->mode == SC_LSEG_SR) { + sci->sc_seq_done = req->seq_accepted; + nilfs_segctor_wakeup(sci, req->sc_err ? : req->sb_err); + sci->sc_flush_request = 0; + } else if (req->mode == SC_FLUSH_FILE) + sci->sc_flush_request &= ~FLUSH_FILE_BIT; + else if (req->mode == SC_FLUSH_DAT) + sci->sc_flush_request &= ~FLUSH_DAT_BIT; + + spin_unlock(&sci->sc_state_lock); +} + +static int nilfs_segctor_construct(struct nilfs_sc_info *sci, + struct nilfs_segctor_req *req) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct the_nilfs *nilfs = sbi->s_nilfs; + int err = 0; + + if (nilfs_discontinued(nilfs)) + req->mode = SC_LSEG_SR; + if (!nilfs_segctor_confirm(sci)) { + err = nilfs_segctor_do_construct(sci, req->mode); + req->sc_err = err; + } + if (likely(!err)) { + if (req->mode != SC_FLUSH_DAT) + atomic_set(&nilfs->ns_ndirtyblks, 0); + if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && + nilfs_discontinued(nilfs)) { + down_write(&nilfs->ns_sem); + req->sb_err = nilfs_commit_super(sbi); + up_write(&nilfs->ns_sem); + } + } + return err; +} + +static void nilfs_construction_timeout(unsigned long data) +{ + struct task_struct *p = (struct task_struct *)data; + wake_up_process(p); +} + +static void +nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) +{ + struct nilfs_inode_info *ii, *n; + + list_for_each_entry_safe(ii, n, head, i_dirty) { + if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) + continue; + hlist_del_init(&ii->vfs_inode.i_hash); + list_del_init(&ii->i_dirty); + nilfs_clear_gcinode(&ii->vfs_inode); + } +} + +int nilfs_clean_segments(struct super_block *sb, void __user *argp) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_sc_info *sci = NILFS_SC(sbi); + struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_transaction_info ti; + struct nilfs_segctor_req req = { .mode = SC_LSEG_SR }; + int err; + + if (unlikely(!sci)) + return -EROFS; + + nilfs_transaction_lock(sbi, &ti, 1); + + err = nilfs_init_gcdat_inode(nilfs); + if (unlikely(err)) + goto out_unlock; + err = nilfs_ioctl_prepare_clean_segments(nilfs, argp); + if (unlikely(err)) + goto out_unlock; + + list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev); + + for (;;) { + nilfs_segctor_accept(sci, &req); + err = nilfs_segctor_construct(sci, &req); + nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes); + nilfs_segctor_notify(sci, &req); + + if (likely(!err)) + break; + + nilfs_warning(sb, __func__, + "segment construction failed. (err=%d)", err); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(sci->sc_interval); + } + + out_unlock: + nilfs_clear_gcdat_inode(nilfs); + nilfs_transaction_unlock(sbi); + return err; +} + +static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct nilfs_transaction_info ti; + struct nilfs_segctor_req req = { .mode = mode }; + + nilfs_transaction_lock(sbi, &ti, 0); + + nilfs_segctor_accept(sci, &req); + nilfs_segctor_construct(sci, &req); + nilfs_segctor_notify(sci, &req); + + /* + * Unclosed segment should be retried. We do this using sc_timer. + * Timeout of sc_timer will invoke complete construction which leads + * to close the current logical segment. + */ + if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) + nilfs_segctor_start_timer(sci); + + nilfs_transaction_unlock(sbi); +} + +static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) +{ + int mode = 0; + int err; + + spin_lock(&sci->sc_state_lock); + mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ? + SC_FLUSH_DAT : SC_FLUSH_FILE; + spin_unlock(&sci->sc_state_lock); + + if (mode) { + err = nilfs_segctor_do_construct(sci, mode); + + spin_lock(&sci->sc_state_lock); + sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ? + ~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT; + spin_unlock(&sci->sc_state_lock); + } + clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); +} + +static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) +{ + if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || + time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) { + if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT)) + return SC_FLUSH_FILE; + else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT)) + return SC_FLUSH_DAT; + } + return SC_LSEG_SR; +} + +/** + * nilfs_segctor_thread - main loop of the segment constructor thread. + * @arg: pointer to a struct nilfs_sc_info. + * + * nilfs_segctor_thread() initializes a timer and serves as a daemon + * to execute segment constructions. + */ +static int nilfs_segctor_thread(void *arg) +{ + struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; + struct timer_list timer; + int timeout = 0; + + init_timer(&timer); + timer.data = (unsigned long)current; + timer.function = nilfs_construction_timeout; + sci->sc_timer = &timer; + + /* start sync. */ + sci->sc_task = current; + wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ + printk(KERN_INFO + "segctord starting. Construction interval = %lu seconds, " + "CP frequency < %lu seconds\n", + sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); + + spin_lock(&sci->sc_state_lock); + loop: + for (;;) { + int mode; + + if (sci->sc_state & NILFS_SEGCTOR_QUIT) + goto end_thread; + + if (timeout || sci->sc_seq_request != sci->sc_seq_done) + mode = SC_LSEG_SR; + else if (!sci->sc_flush_request) + break; + else + mode = nilfs_segctor_flush_mode(sci); + + spin_unlock(&sci->sc_state_lock); + nilfs_segctor_thread_construct(sci, mode); + spin_lock(&sci->sc_state_lock); + timeout = 0; + } + + + if (freezing(current)) { + spin_unlock(&sci->sc_state_lock); + refrigerator(); + spin_lock(&sci->sc_state_lock); + } else { + DEFINE_WAIT(wait); + int should_sleep = 1; + + prepare_to_wait(&sci->sc_wait_daemon, &wait, + TASK_INTERRUPTIBLE); + + if (sci->sc_seq_request != sci->sc_seq_done) + should_sleep = 0; + else if (sci->sc_flush_request) + should_sleep = 0; + else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) + should_sleep = time_before(jiffies, + sci->sc_timer->expires); + + if (should_sleep) { + spin_unlock(&sci->sc_state_lock); + schedule(); + spin_lock(&sci->sc_state_lock); + } + finish_wait(&sci->sc_wait_daemon, &wait); + timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + time_after_eq(jiffies, sci->sc_timer->expires)); + } + goto loop; + + end_thread: + spin_unlock(&sci->sc_state_lock); + del_timer_sync(sci->sc_timer); + sci->sc_timer = NULL; + + /* end sync. */ + sci->sc_task = NULL; + wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ + return 0; +} + +static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) +{ + struct task_struct *t; + + t = kthread_run(nilfs_segctor_thread, sci, "segctord"); + if (IS_ERR(t)) { + int err = PTR_ERR(t); + + printk(KERN_ERR "NILFS: error %d creating segctord thread\n", + err); + return err; + } + wait_event(sci->sc_wait_task, sci->sc_task != NULL); + return 0; +} + +static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) +{ + sci->sc_state |= NILFS_SEGCTOR_QUIT; + + while (sci->sc_task) { + wake_up(&sci->sc_wait_daemon); + spin_unlock(&sci->sc_state_lock); + wait_event(sci->sc_wait_task, sci->sc_task == NULL); + spin_lock(&sci->sc_state_lock); + } +} + +static int nilfs_segctor_init(struct nilfs_sc_info *sci, + struct nilfs_recovery_info *ri) +{ + int err; + struct inode *inode = nilfs_iget(sci->sc_super, NILFS_SKETCH_INO); + + sci->sc_sketch_inode = IS_ERR(inode) ? NULL : inode; + if (sci->sc_sketch_inode) + i_size_write(sci->sc_sketch_inode, 0); + + sci->sc_seq_done = sci->sc_seq_request; + if (ri) + list_splice_init(&ri->ri_used_segments, + sci->sc_active_segments.prev); + + err = nilfs_segctor_start_thread(sci); + if (err) { + if (ri) + list_splice_init(&sci->sc_active_segments, + ri->ri_used_segments.prev); + if (sci->sc_sketch_inode) { + iput(sci->sc_sketch_inode); + sci->sc_sketch_inode = NULL; + } + } + return err; +} + +/* + * Setup & clean-up functions + */ +static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) +{ + struct nilfs_sc_info *sci; + + sci = kzalloc(sizeof(*sci), GFP_KERNEL); + if (!sci) + return NULL; + + sci->sc_sbi = sbi; + sci->sc_super = sbi->s_super; + + init_waitqueue_head(&sci->sc_wait_request); + init_waitqueue_head(&sci->sc_wait_daemon); + init_waitqueue_head(&sci->sc_wait_task); + spin_lock_init(&sci->sc_state_lock); + INIT_LIST_HEAD(&sci->sc_dirty_files); + INIT_LIST_HEAD(&sci->sc_segbufs); + INIT_LIST_HEAD(&sci->sc_gc_inodes); + INIT_LIST_HEAD(&sci->sc_active_segments); + INIT_LIST_HEAD(&sci->sc_cleaning_segments); + INIT_LIST_HEAD(&sci->sc_copied_buffers); + + sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; + sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; + sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; + + if (sbi->s_interval) + sci->sc_interval = sbi->s_interval; + if (sbi->s_watermark) + sci->sc_watermark = sbi->s_watermark; + return sci; +} + +static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) +{ + int ret, retrycount = NILFS_SC_CLEANUP_RETRY; + + /* The segctord thread was stopped and its timer was removed. + But some tasks remain. */ + do { + struct nilfs_sb_info *sbi = sci->sc_sbi; + struct nilfs_transaction_info ti; + struct nilfs_segctor_req req = { .mode = SC_LSEG_SR }; + + nilfs_transaction_lock(sbi, &ti, 0); + nilfs_segctor_accept(sci, &req); + ret = nilfs_segctor_construct(sci, &req); + nilfs_segctor_notify(sci, &req); + nilfs_transaction_unlock(sbi); + + } while (ret && retrycount-- > 0); +} + +/** + * nilfs_segctor_destroy - destroy the segment constructor. + * @sci: nilfs_sc_info + * + * nilfs_segctor_destroy() kills the segctord thread and frees + * the nilfs_sc_info struct. + * Caller must hold the segment semaphore. + */ +static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) +{ + struct nilfs_sb_info *sbi = sci->sc_sbi; + int flag; + + up_write(&sbi->s_nilfs->ns_segctor_sem); + + spin_lock(&sci->sc_state_lock); + nilfs_segctor_kill_thread(sci); + flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request + || sci->sc_seq_request != sci->sc_seq_done); + spin_unlock(&sci->sc_state_lock); + + if (flag || nilfs_segctor_confirm(sci)) + nilfs_segctor_write_out(sci); + + BUG_ON(!list_empty(&sci->sc_copied_buffers)); + + if (!list_empty(&sci->sc_dirty_files)) { + nilfs_warning(sbi->s_super, __func__, + "dirty file(s) after the final construction\n"); + nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); + } + if (!list_empty(&sci->sc_active_segments)) + nilfs_dispose_segment_list(&sci->sc_active_segments); + + if (!list_empty(&sci->sc_cleaning_segments)) + nilfs_dispose_segment_list(&sci->sc_cleaning_segments); + + BUG_ON(!list_empty(&sci->sc_segbufs)); + + if (sci->sc_sketch_inode) { + iput(sci->sc_sketch_inode); + sci->sc_sketch_inode = NULL; + } + down_write(&sbi->s_nilfs->ns_segctor_sem); + + kfree(sci); +} + +/** + * nilfs_attach_segment_constructor - attach a segment constructor + * @sbi: nilfs_sb_info + * @ri: nilfs_recovery_info + * + * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, + * initilizes it, and starts the segment constructor. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error code is returned. + * + * %-ENOMEM - Insufficient memory available. + */ +int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, + struct nilfs_recovery_info *ri) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + int err; + + /* Each field of nilfs_segctor is cleared through the initialization + of super-block info */ + sbi->s_sc_info = nilfs_segctor_new(sbi); + if (!sbi->s_sc_info) + return -ENOMEM; + + nilfs_attach_writer(nilfs, sbi); + err = nilfs_segctor_init(NILFS_SC(sbi), ri); + if (err) { + nilfs_detach_writer(nilfs, sbi); + kfree(sbi->s_sc_info); + sbi->s_sc_info = NULL; + } + return err; +} + +/** + * nilfs_detach_segment_constructor - destroy the segment constructor + * @sbi: nilfs_sb_info + * + * nilfs_detach_segment_constructor() kills the segment constructor daemon, + * frees the struct nilfs_sc_info, and destroy the dirty file list. + */ +void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + LIST_HEAD(garbage_list); + + down_write(&nilfs->ns_segctor_sem); + if (NILFS_SC(sbi)) { + nilfs_segctor_destroy(NILFS_SC(sbi)); + sbi->s_sc_info = NULL; + } + + /* Force to free the list of dirty files */ + spin_lock(&sbi->s_inode_lock); + if (!list_empty(&sbi->s_dirty_files)) { + list_splice_init(&sbi->s_dirty_files, &garbage_list); + nilfs_warning(sbi->s_super, __func__, + "Non empty dirty list after the last " + "segment construction\n"); + } + spin_unlock(&sbi->s_inode_lock); + up_write(&nilfs->ns_segctor_sem); + + nilfs_dispose_list(sbi, &garbage_list, 1); + nilfs_detach_writer(nilfs, sbi); +} diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h new file mode 100644 index 00000000000..615654b8c32 --- /dev/null +++ b/fs/nilfs2/segment.h @@ -0,0 +1,246 @@ +/* + * segment.h - NILFS Segment constructor prototypes and definitions + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + * + */ +#ifndef _NILFS_SEGMENT_H +#define _NILFS_SEGMENT_H + +#include +#include +#include +#include +#include "sb.h" + +/** + * struct nilfs_recovery_info - Recovery infomation + * @ri_need_recovery: Recovery status + * @ri_super_root: Block number of the last super root + * @ri_ri_cno: Number of the last checkpoint + * @ri_lsegs_start: Region for roll-forwarding (start block number) + * @ri_lsegs_end: Region for roll-forwarding (end block number) + * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start + * @ri_used_segments: List of segments to be mark active + * @ri_pseg_start: Block number of the last partial segment + * @ri_seq: Sequence number on the last partial segment + * @ri_segnum: Segment number on the last partial segment + * @ri_nextnum: Next segment number on the last partial segment + */ +struct nilfs_recovery_info { + int ri_need_recovery; + sector_t ri_super_root; + __u64 ri_cno; + + sector_t ri_lsegs_start; + sector_t ri_lsegs_end; + u64 ri_lsegs_start_seq; + struct list_head ri_used_segments; + sector_t ri_pseg_start; + u64 ri_seq; + __u64 ri_segnum; + __u64 ri_nextnum; +}; + +/* ri_need_recovery */ +#define NILFS_RECOVERY_SR_UPDATED 1 /* The super root was updated */ +#define NILFS_RECOVERY_ROLLFORWARD_DONE 2 /* Rollforward was carried out */ + +/** + * struct nilfs_cstage - Context of collection stage + * @scnt: Stage count + * @flags: State flags + * @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file + * @gc_inode_ptr: Pointer on the list of gc-inodes + */ +struct nilfs_cstage { + int scnt; + unsigned flags; + struct nilfs_inode_info *dirty_file_ptr; + struct nilfs_inode_info *gc_inode_ptr; +}; + +struct nilfs_segment_buffer; + +struct nilfs_segsum_pointer { + struct buffer_head *bh; + unsigned offset; /* offset in bytes */ +}; + +/** + * struct nilfs_sc_info - Segment constructor information + * @sc_super: Back pointer to super_block struct + * @sc_sbi: Back pointer to nilfs_sb_info struct + * @sc_nblk_inc: Block count of current generation + * @sc_dirty_files: List of files to be written + * @sc_gc_inodes: List of GC inodes having blocks to be written + * @sc_active_segments: List of active segments that were already written out + * @sc_cleaning_segments: List of segments to be freed through construction + * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data + * @sc_segbufs: List of segment buffers + * @sc_segbuf_nblocks: Number of available blocks in segment buffers. + * @sc_curseg: Current segment buffer + * @sc_super_root: Pointer to the super root buffer + * @sc_stage: Collection stage + * @sc_finfo_ptr: pointer to the current finfo struct in the segment summary + * @sc_binfo_ptr: pointer to the current binfo struct in the segment summary + * @sc_blk_cnt: Block count of a file + * @sc_datablk_cnt: Data block count of a file + * @sc_nblk_this_inc: Number of blocks included in the current logical segment + * @sc_seg_ctime: Creation time + * @sc_flags: Internal flags + * @sc_sketch_inode: Inode of the sketch file + * @sc_state_lock: spinlock for sc_state and so on + * @sc_state: Segctord state flags + * @sc_flush_request: inode bitmap of metadata files to be flushed + * @sc_wait_request: Client request queue + * @sc_wait_daemon: Daemon wait queue + * @sc_wait_task: Start/end wait queue to control segctord task + * @sc_seq_request: Request counter + * @sc_seq_done: Completion counter + * @sc_sync: Request of explicit sync operation + * @sc_interval: Timeout value of background construction + * @sc_mjcp_freq: Frequency of creating checkpoints + * @sc_lseg_stime: Start time of the latest logical segment + * @sc_watermark: Watermark for the number of dirty buffers + * @sc_timer: Timer for segctord + * @sc_task: current thread of segctord + */ +struct nilfs_sc_info { + struct super_block *sc_super; + struct nilfs_sb_info *sc_sbi; + + unsigned long sc_nblk_inc; + + struct list_head sc_dirty_files; + struct list_head sc_gc_inodes; + struct list_head sc_active_segments; + struct list_head sc_cleaning_segments; + struct list_head sc_copied_buffers; + + /* Segment buffers */ + struct list_head sc_segbufs; + unsigned long sc_segbuf_nblocks; + struct nilfs_segment_buffer *sc_curseg; + struct buffer_head *sc_super_root; + + struct nilfs_cstage sc_stage; + + struct nilfs_segsum_pointer sc_finfo_ptr; + struct nilfs_segsum_pointer sc_binfo_ptr; + unsigned long sc_blk_cnt; + unsigned long sc_datablk_cnt; + unsigned long sc_nblk_this_inc; + time_t sc_seg_ctime; + + unsigned long sc_flags; + + /* + * Pointer to an inode of the sketch. + * This pointer is kept only while it contains data. + * We protect it with a semaphore of the segment constructor. + */ + struct inode *sc_sketch_inode; + + spinlock_t sc_state_lock; + unsigned long sc_state; + unsigned long sc_flush_request; + + wait_queue_head_t sc_wait_request; + wait_queue_head_t sc_wait_daemon; + wait_queue_head_t sc_wait_task; + + __u32 sc_seq_request; + __u32 sc_seq_done; + + int sc_sync; + unsigned long sc_interval; + unsigned long sc_mjcp_freq; + unsigned long sc_lseg_stime; /* in 1/HZ seconds */ + unsigned long sc_watermark; + + struct timer_list *sc_timer; + struct task_struct *sc_task; +}; + +/* sc_flags */ +enum { + NILFS_SC_DIRTY, /* One or more dirty meta-data blocks exist */ + NILFS_SC_UNCLOSED, /* Logical segment is not closed */ + NILFS_SC_SUPER_ROOT, /* The latest segment has a super root */ + NILFS_SC_PRIOR_FLUSH, /* Requesting immediate flush without making a + checkpoint */ +}; + +/* sc_state */ +#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */ +#define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */ + +/* + * Constant parameters + */ +#define NILFS_SC_CLEANUP_RETRY 3 /* Retry count of construction when + destroying segctord */ + +/* + * Default values of timeout, in seconds. + */ +#define NILFS_SC_DEFAULT_TIMEOUT 5 /* Timeout value of dirty blocks. + It triggers construction of a + logical segment with a super root */ +#define NILFS_SC_DEFAULT_SR_FREQ 30 /* Maximum frequency of super root + creation */ +#define NILFS_SC_DEFAULT_SB_FREQ 30 /* Minimum interval of periodical + update of superblock (reserved) */ + +/* + * The default threshold amount of data, in block counts. + */ +#define NILFS_SC_DEFAULT_WATERMARK 3600 + + +/* segment.c */ +extern int nilfs_init_transaction_cache(void); +extern void nilfs_destroy_transaction_cache(void); +extern void nilfs_relax_pressure_in_lock(struct super_block *); + +extern int nilfs_construct_segment(struct super_block *); +extern int nilfs_construct_dsync_segment(struct super_block *, + struct inode *); +extern void nilfs_flush_segment(struct super_block *, ino_t); +extern int nilfs_clean_segments(struct super_block *, void __user *); + +extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, + __u64 *, size_t); +extern void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *); + +extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *, + struct nilfs_recovery_info *); +extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); + +/* recovery.c */ +extern int nilfs_read_super_root_block(struct super_block *, sector_t, + struct buffer_head **, int); +extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *, + struct nilfs_recovery_info *); +extern int nilfs_recover_logical_segments(struct the_nilfs *, + struct nilfs_sb_info *, + struct nilfs_recovery_info *); + +#endif /* _NILFS_SEGMENT_H */ -- cgit v1.2.3 From f30bf3e40f80ef50c17f55271deae3abc03e793e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:38 -0700 Subject: nilfs2: fix missed-sync issue for do_sync_mapping_range() Chris Mason pointed out that there is a missed sync issue in nilfs_writepages(): On Wed, 17 Dec 2008 21:52:55 -0500, Chris Mason wrote: > It looks like nilfs_writepage ignores WB_SYNC_NONE, which is used by > do_sync_mapping_range(). where WB_SYNC_NONE in do_sync_mapping_range() was replaced with WB_SYNC_ALL by Nick's patch (commit: ee53a891f47444c53318b98dac947ede963db400). This fixes the problem by letting nilfs_writepages() write out the log of file data within the range if sync_mode is WB_SYNC_ALL. This involves removal of nilfs_file_aio_write() which was previously needed to ensure O_SYNC sync writes. Cc: Chris Mason Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/file.c | 27 ++---------- fs/nilfs2/inode.c | 16 ++++--- fs/nilfs2/segment.c | 120 +++++++++++++++++++++++++++++++--------------------- fs/nilfs2/segment.h | 11 ++++- 4 files changed, 93 insertions(+), 81 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 8031086db8d..cd38124372f 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -44,35 +44,14 @@ int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync) return 0; if (datasync) - err = nilfs_construct_dsync_segment(inode->i_sb, inode); + err = nilfs_construct_dsync_segment(inode->i_sb, inode, 0, + LLONG_MAX); else err = nilfs_construct_segment(inode->i_sb); return err; } -static ssize_t -nilfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_dentry->d_inode; - ssize_t ret; - - ret = generic_file_aio_write(iocb, iov, nr_segs, pos); - if (ret <= 0) - return ret; - - if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { - int err; - - err = nilfs_construct_dsync_segment(inode->i_sb, inode); - if (unlikely(err)) - return err; - } - return ret; -} - static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; @@ -160,7 +139,7 @@ struct file_operations nilfs_file_operations = { .read = do_sync_read, .write = do_sync_write, .aio_read = generic_file_aio_read, - .aio_write = nilfs_file_aio_write, + .aio_write = generic_file_aio_write, .ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = nilfs_compat_ioctl, diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b4697d9d7e5..289d1798dec 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "nilfs.h" #include "segment.h" #include "page.h" @@ -145,8 +146,14 @@ static int nilfs_readpages(struct file *file, struct address_space *mapping, static int nilfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - /* This empty method is required not to call generic_writepages() */ - return 0; + struct inode *inode = mapping->host; + int err = 0; + + if (wbc->sync_mode == WB_SYNC_ALL) + err = nilfs_construct_dsync_segment(inode->i_sb, inode, + wbc->range_start, + wbc->range_end); + return err; } static int nilfs_writepage(struct page *page, struct writeback_control *wbc) @@ -225,11 +232,6 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t size; - int err; - - err = nilfs_construct_dsync_segment(inode->i_sb, inode); - if (unlikely(err)) - return err; if (rw == WRITE) return 0; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 2c4c088059f..ad65a737aff 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -654,29 +654,41 @@ struct nilfs_sc_operations nilfs_sc_dsync_ops = { .write_node_binfo = NULL, }; -static int nilfs_lookup_dirty_data_buffers(struct inode *inode, - struct list_head *listp, - struct nilfs_sc_info *sci) +static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, + struct list_head *listp, + size_t nlimit, + loff_t start, loff_t end) { - struct nilfs_segment_buffer *segbuf = sci->sc_curseg; struct address_space *mapping = inode->i_mapping; struct pagevec pvec; - unsigned i, ndirties = 0, nlimit; - pgoff_t index = 0; - int err = 0; + pgoff_t index = 0, last = ULONG_MAX; + size_t ndirties = 0; + int i; - nlimit = sci->sc_segbuf_nblocks - - (sci->sc_nblk_this_inc + segbuf->sb_sum.nblocks); + if (unlikely(start != 0 || end != LLONG_MAX)) { + /* + * A valid range is given for sync-ing data pages. The + * range is rounded to per-page; extra dirty buffers + * may be included if blocksize < pagesize. + */ + index = start >> PAGE_SHIFT; + last = end >> PAGE_SHIFT; + } pagevec_init(&pvec, 0); repeat: - if (!pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, - PAGEVEC_SIZE)) - return 0; + if (unlikely(index > last) || + !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, + min_t(pgoff_t, last - index, + PAGEVEC_SIZE - 1) + 1)) + return ndirties; for (i = 0; i < pagevec_count(&pvec); i++) { struct buffer_head *bh, *head; struct page *page = pvec.pages[i]; + if (unlikely(page->index > last)) + break; + if (mapping->host) { lock_page(page); if (!page_has_buffers(page)) @@ -687,24 +699,21 @@ static int nilfs_lookup_dirty_data_buffers(struct inode *inode, bh = head = page_buffers(page); do { - if (buffer_dirty(bh)) { - if (ndirties > nlimit) { - err = -E2BIG; - break; - } - get_bh(bh); - list_add_tail(&bh->b_assoc_buffers, listp); - ndirties++; + if (!buffer_dirty(bh)) + continue; + get_bh(bh); + list_add_tail(&bh->b_assoc_buffers, listp); + ndirties++; + if (unlikely(ndirties >= nlimit)) { + pagevec_release(&pvec); + cond_resched(); + return ndirties; } - bh = bh->b_this_page; - } while (bh != head); + } while (bh = bh->b_this_page, bh != head); } pagevec_release(&pvec); cond_resched(); - - if (!err) - goto repeat; - return err; + goto repeat; } static void nilfs_lookup_dirty_node_buffers(struct inode *inode, @@ -1058,23 +1067,31 @@ static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, return err; } +static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci) +{ + /* Remaining number of blocks within segment buffer */ + return sci->sc_segbuf_nblocks - + (sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks); +} + static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci, struct inode *inode, struct nilfs_sc_operations *sc_ops) { LIST_HEAD(data_buffers); LIST_HEAD(node_buffers); - int err, err2; + int err; if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { - err = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, - sci); - if (err) { - err2 = nilfs_segctor_apply_buffers( + size_t n, rest = nilfs_segctor_buffer_rest(sci); + + n = nilfs_lookup_dirty_data_buffers( + inode, &data_buffers, rest + 1, 0, LLONG_MAX); + if (n > rest) { + err = nilfs_segctor_apply_buffers( sci, inode, &data_buffers, - err == -E2BIG ? sc_ops->collect_data : NULL); - if (err == -E2BIG) - err = err2; + sc_ops->collect_data); + BUG_ON(!err); /* always receive -E2BIG or true error */ goto break_or_fail; } } @@ -1114,16 +1131,20 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, struct inode *inode) { LIST_HEAD(data_buffers); - int err, err2; + size_t n, rest = nilfs_segctor_buffer_rest(sci); + int err; - err = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, sci); - err2 = nilfs_segctor_apply_buffers(sci, inode, &data_buffers, - (!err || err == -E2BIG) ? - nilfs_collect_file_data : NULL); - if (err == -E2BIG) - err = err2; - if (!err) + n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1, + sci->sc_dsync_start, + sci->sc_dsync_end); + + err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers, + nilfs_collect_file_data); + if (!err) { nilfs_segctor_end_finfo(sci, inode); + BUG_ON(n > rest); + /* always receive -E2BIG or true error if n > rest */ + } return err; } @@ -1276,14 +1297,13 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) case NILFS_ST_DSYNC: dsync_mode: sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT; - ii = sci->sc_stage.dirty_file_ptr; + ii = sci->sc_dsync_inode; if (!test_bit(NILFS_I_BUSY, &ii->i_state)) break; err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode); if (unlikely(err)) break; - sci->sc_stage.dirty_file_ptr = NULL; sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; sci->sc_stage.scnt = NILFS_ST_DONE; return 0; @@ -2624,7 +2644,9 @@ int nilfs_construct_segment(struct super_block *sb) /** * nilfs_construct_dsync_segment - construct a data-only logical segment * @sb: super block - * @inode: the inode whose data blocks should be written out + * @inode: inode whose data blocks should be written out + * @start: start byte offset + * @end: end byte offset (inclusive) * * Return Value: On success, 0 is retured. On errors, one of the following * negative error code is returned. @@ -2639,8 +2661,8 @@ int nilfs_construct_segment(struct super_block *sb) * * %-ENOMEM - Insufficient memory available. */ -int nilfs_construct_dsync_segment(struct super_block *sb, - struct inode *inode) +int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, + loff_t start, loff_t end) { struct nilfs_sb_info *sbi = NILFS_SB(sb); struct nilfs_sc_info *sci = NILFS_SC(sbi); @@ -2671,7 +2693,9 @@ int nilfs_construct_dsync_segment(struct super_block *sb, return 0; } spin_unlock(&sbi->s_inode_lock); - sci->sc_stage.dirty_file_ptr = ii; + sci->sc_dsync_inode = ii; + sci->sc_dsync_start = start; + sci->sc_dsync_end = end; err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 615654b8c32..2dd39da9f38 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -93,6 +93,9 @@ struct nilfs_segsum_pointer { * @sc_active_segments: List of active segments that were already written out * @sc_cleaning_segments: List of segments to be freed through construction * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data + * @sc_dsync_inode: inode whose data pages are written for a sync operation + * @sc_dsync_start: start byte offset of data pages + * @sc_dsync_end: end byte offset of data pages (inclusive) * @sc_segbufs: List of segment buffers * @sc_segbuf_nblocks: Number of available blocks in segment buffers. * @sc_curseg: Current segment buffer @@ -134,6 +137,10 @@ struct nilfs_sc_info { struct list_head sc_cleaning_segments; struct list_head sc_copied_buffers; + struct nilfs_inode_info *sc_dsync_inode; + loff_t sc_dsync_start; + loff_t sc_dsync_end; + /* Segment buffers */ struct list_head sc_segbufs; unsigned long sc_segbuf_nblocks; @@ -221,8 +228,8 @@ extern void nilfs_destroy_transaction_cache(void); extern void nilfs_relax_pressure_in_lock(struct super_block *); extern int nilfs_construct_segment(struct super_block *); -extern int nilfs_construct_dsync_segment(struct super_block *, - struct inode *); +extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, + loff_t, loff_t); extern void nilfs_flush_segment(struct super_block *, ino_t); extern int nilfs_clean_segments(struct super_block *, void __user *); -- cgit v1.2.3 From 0f3e1c7f23f8a6f8224fa1d275381f6d9279ad4b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:38 -0700 Subject: nilfs2: recovery functions This adds recovery function on mount. Usually the recovery is achieved by just finding the latest super root. When logs without checkpoints were appended for data sync operations after the latest super root, the recovery function will perform roll forwarding and reconstruct new log(s) with a super root. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/recovery.c | 941 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 941 insertions(+) create mode 100644 fs/nilfs2/recovery.c (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c new file mode 100644 index 00000000000..877dc1ba23f --- /dev/null +++ b/fs/nilfs2/recovery.c @@ -0,0 +1,941 @@ +/* + * recovery.c - NILFS recovery logic + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Ryusuke Konishi + */ + +#include +#include +#include +#include +#include "nilfs.h" +#include "segment.h" +#include "sufile.h" +#include "page.h" +#include "seglist.h" +#include "segbuf.h" + +/* + * Segment check result + */ +enum { + NILFS_SEG_VALID, + NILFS_SEG_NO_SUPER_ROOT, + NILFS_SEG_FAIL_IO, + NILFS_SEG_FAIL_MAGIC, + NILFS_SEG_FAIL_SEQ, + NILFS_SEG_FAIL_CHECKSUM_SEGSUM, + NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT, + NILFS_SEG_FAIL_CHECKSUM_FULL, + NILFS_SEG_FAIL_CONSISTENCY, +}; + +/* work structure for recovery */ +struct nilfs_recovery_block { + ino_t ino; /* Inode number of the file that this block + belongs to */ + sector_t blocknr; /* block number */ + __u64 vblocknr; /* virtual block number */ + unsigned long blkoff; /* File offset of the data block (per block) */ + struct list_head list; +}; + + +static int nilfs_warn_segment_error(int err) +{ + switch (err) { + case NILFS_SEG_FAIL_IO: + printk(KERN_WARNING + "NILFS warning: I/O error on loading last segment\n"); + return -EIO; + case NILFS_SEG_FAIL_MAGIC: + printk(KERN_WARNING + "NILFS warning: Segment magic number invalid\n"); + break; + case NILFS_SEG_FAIL_SEQ: + printk(KERN_WARNING + "NILFS warning: Sequence number mismatch\n"); + break; + case NILFS_SEG_FAIL_CHECKSUM_SEGSUM: + printk(KERN_WARNING + "NILFS warning: Checksum error in segment summary\n"); + break; + case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT: + printk(KERN_WARNING + "NILFS warning: Checksum error in super root\n"); + break; + case NILFS_SEG_FAIL_CHECKSUM_FULL: + printk(KERN_WARNING + "NILFS warning: Checksum error in segment payload\n"); + break; + case NILFS_SEG_FAIL_CONSISTENCY: + printk(KERN_WARNING + "NILFS warning: Inconsistent segment\n"); + break; + case NILFS_SEG_NO_SUPER_ROOT: + printk(KERN_WARNING + "NILFS warning: No super root in the last segment\n"); + break; + case NILFS_SEG_VALID: + default: + BUG(); + } + return -EINVAL; +} + +static void store_segsum_info(struct nilfs_segsum_info *ssi, + struct nilfs_segment_summary *sum, + unsigned int blocksize) +{ + ssi->flags = le16_to_cpu(sum->ss_flags); + ssi->seg_seq = le64_to_cpu(sum->ss_seq); + ssi->ctime = le64_to_cpu(sum->ss_create); + ssi->next = le64_to_cpu(sum->ss_next); + ssi->nblocks = le32_to_cpu(sum->ss_nblocks); + ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo); + ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes); + + ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize); + ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi); +} + +/** + * calc_crc_cont - check CRC of blocks continuously + * @sbi: nilfs_sb_info + * @bhs: buffer head of start block + * @sum: place to store result + * @offset: offset bytes in the first block + * @check_bytes: number of bytes to be checked + * @start: DBN of start block + * @nblock: number of blocks to be checked + */ +static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs, + u32 *sum, unsigned long offset, u64 check_bytes, + sector_t start, unsigned long nblock) +{ + unsigned long blocksize = sbi->s_super->s_blocksize; + unsigned long size; + u32 crc; + + BUG_ON(offset >= blocksize); + check_bytes -= offset; + size = min_t(u64, check_bytes, blocksize - offset); + crc = crc32_le(sbi->s_nilfs->ns_crc_seed, + (unsigned char *)bhs->b_data + offset, size); + if (--nblock > 0) { + do { + struct buffer_head *bh + = sb_bread(sbi->s_super, ++start); + if (!bh) + return -EIO; + check_bytes -= size; + size = min_t(u64, check_bytes, blocksize); + crc = crc32_le(crc, bh->b_data, size); + brelse(bh); + } while (--nblock > 0); + } + *sum = crc; + return 0; +} + +/** + * nilfs_read_super_root_block - read super root block + * @sb: super_block + * @sr_block: disk block number of the super root block + * @pbh: address of a buffer_head pointer to return super root buffer + * @check: CRC check flag + */ +int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, + struct buffer_head **pbh, int check) +{ + struct buffer_head *bh_sr; + struct nilfs_super_root *sr; + u32 crc; + int ret; + + *pbh = NULL; + bh_sr = sb_bread(sb, sr_block); + if (unlikely(!bh_sr)) { + ret = NILFS_SEG_FAIL_IO; + goto failed; + } + + sr = (struct nilfs_super_root *)bh_sr->b_data; + if (check) { + unsigned bytes = le16_to_cpu(sr->sr_bytes); + + if (bytes == 0 || bytes > sb->s_blocksize) { + ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT; + goto failed_bh; + } + if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc, + sizeof(sr->sr_sum), bytes, sr_block, 1)) { + ret = NILFS_SEG_FAIL_IO; + goto failed_bh; + } + if (crc != le32_to_cpu(sr->sr_sum)) { + ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT; + goto failed_bh; + } + } + *pbh = bh_sr; + return 0; + + failed_bh: + brelse(bh_sr); + + failed: + return nilfs_warn_segment_error(ret); +} + +/** + * load_segment_summary - read segment summary of the specified partial segment + * @sbi: nilfs_sb_info + * @pseg_start: start disk block number of partial segment + * @seg_seq: sequence number requested + * @ssi: pointer to nilfs_segsum_info struct to store information + * @full_check: full check flag + * (0: only checks segment summary CRC, 1: data CRC) + */ +static int +load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start, + u64 seg_seq, struct nilfs_segsum_info *ssi, + int full_check) +{ + struct buffer_head *bh_sum; + struct nilfs_segment_summary *sum; + unsigned long offset, nblock; + u64 check_bytes; + u32 crc, crc_sum; + int ret = NILFS_SEG_FAIL_IO; + + bh_sum = sb_bread(sbi->s_super, pseg_start); + if (!bh_sum) + goto out; + + sum = (struct nilfs_segment_summary *)bh_sum->b_data; + + /* Check consistency of segment summary */ + if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) { + ret = NILFS_SEG_FAIL_MAGIC; + goto failed; + } + store_segsum_info(ssi, sum, sbi->s_super->s_blocksize); + if (seg_seq != ssi->seg_seq) { + ret = NILFS_SEG_FAIL_SEQ; + goto failed; + } + if (full_check) { + offset = sizeof(sum->ss_datasum); + check_bytes = + ((u64)ssi->nblocks << sbi->s_super->s_blocksize_bits); + nblock = ssi->nblocks; + crc_sum = le32_to_cpu(sum->ss_datasum); + ret = NILFS_SEG_FAIL_CHECKSUM_FULL; + } else { /* only checks segment summary */ + offset = sizeof(sum->ss_datasum) + sizeof(sum->ss_sumsum); + check_bytes = ssi->sumbytes; + nblock = ssi->nsumblk; + crc_sum = le32_to_cpu(sum->ss_sumsum); + ret = NILFS_SEG_FAIL_CHECKSUM_SEGSUM; + } + + if (unlikely(nblock == 0 || + nblock > sbi->s_nilfs->ns_blocks_per_segment)) { + /* This limits the number of blocks read in the CRC check */ + ret = NILFS_SEG_FAIL_CONSISTENCY; + goto failed; + } + if (calc_crc_cont(sbi, bh_sum, &crc, offset, check_bytes, + pseg_start, nblock)) { + ret = NILFS_SEG_FAIL_IO; + goto failed; + } + if (crc == crc_sum) + ret = 0; + failed: + brelse(bh_sum); + out: + return ret; +} + +static void *segsum_get(struct super_block *sb, struct buffer_head **pbh, + unsigned int *offset, unsigned int bytes) +{ + void *ptr; + sector_t blocknr; + + BUG_ON((*pbh)->b_size < *offset); + if (bytes > (*pbh)->b_size - *offset) { + blocknr = (*pbh)->b_blocknr; + brelse(*pbh); + *pbh = sb_bread(sb, blocknr + 1); + if (unlikely(!*pbh)) + return NULL; + *offset = 0; + } + ptr = (*pbh)->b_data + *offset; + *offset += bytes; + return ptr; +} + +static void segsum_skip(struct super_block *sb, struct buffer_head **pbh, + unsigned int *offset, unsigned int bytes, + unsigned long count) +{ + unsigned int rest_item_in_current_block + = ((*pbh)->b_size - *offset) / bytes; + + if (count <= rest_item_in_current_block) { + *offset += bytes * count; + } else { + sector_t blocknr = (*pbh)->b_blocknr; + unsigned int nitem_per_block = (*pbh)->b_size / bytes; + unsigned int bcnt; + + count -= rest_item_in_current_block; + bcnt = DIV_ROUND_UP(count, nitem_per_block); + *offset = bytes * (count - (bcnt - 1) * nitem_per_block); + + brelse(*pbh); + *pbh = sb_bread(sb, blocknr + bcnt); + } +} + +static int +collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, + struct nilfs_segsum_info *ssi, + struct list_head *head) +{ + struct buffer_head *bh; + unsigned int offset; + unsigned long nfinfo = ssi->nfinfo; + sector_t blocknr = sum_blocknr + ssi->nsumblk; + ino_t ino; + int err = -EIO; + + if (!nfinfo) + return 0; + + bh = sb_bread(sbi->s_super, sum_blocknr); + if (unlikely(!bh)) + goto out; + + offset = le16_to_cpu( + ((struct nilfs_segment_summary *)bh->b_data)->ss_bytes); + for (;;) { + unsigned long nblocks, ndatablk, nnodeblk; + struct nilfs_finfo *finfo; + + finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo)); + if (unlikely(!finfo)) + goto out; + + ino = le64_to_cpu(finfo->fi_ino); + nblocks = le32_to_cpu(finfo->fi_nblocks); + ndatablk = le32_to_cpu(finfo->fi_ndatablk); + nnodeblk = nblocks - ndatablk; + + while (ndatablk-- > 0) { + struct nilfs_recovery_block *rb; + struct nilfs_binfo_v *binfo; + + binfo = segsum_get(sbi->s_super, &bh, &offset, + sizeof(*binfo)); + if (unlikely(!binfo)) + goto out; + + rb = kmalloc(sizeof(*rb), GFP_NOFS); + if (unlikely(!rb)) { + err = -ENOMEM; + goto out; + } + rb->ino = ino; + rb->blocknr = blocknr++; + rb->vblocknr = le64_to_cpu(binfo->bi_vblocknr); + rb->blkoff = le64_to_cpu(binfo->bi_blkoff); + /* INIT_LIST_HEAD(&rb->list); */ + list_add_tail(&rb->list, head); + } + if (--nfinfo == 0) + break; + blocknr += nnodeblk; /* always 0 for the data sync segments */ + segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64), + nnodeblk); + if (unlikely(!bh)) + goto out; + } + err = 0; + out: + brelse(bh); /* brelse(NULL) is just ignored */ + return err; +} + +static void dispose_recovery_list(struct list_head *head) +{ + while (!list_empty(head)) { + struct nilfs_recovery_block *rb + = list_entry(head->next, + struct nilfs_recovery_block, list); + list_del(&rb->list); + kfree(rb); + } +} + +void nilfs_dispose_segment_list(struct list_head *head) +{ + while (!list_empty(head)) { + struct nilfs_segment_entry *ent + = list_entry(head->next, + struct nilfs_segment_entry, list); + list_del(&ent->list); + nilfs_free_segment_entry(ent); + } +} + +static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, + struct nilfs_recovery_info *ri) +{ + struct list_head *head = &ri->ri_used_segments; + struct nilfs_segment_entry *ent, *n; + struct inode *sufile = nilfs->ns_sufile; + __u64 segnum[4]; + int err; + int i; + + segnum[0] = nilfs->ns_segnum; + segnum[1] = nilfs->ns_nextnum; + segnum[2] = ri->ri_segnum; + segnum[3] = ri->ri_nextnum; + + /* + * Releasing the next segment of the latest super root. + * The next segment is invalidated by this recovery. + */ + err = nilfs_sufile_free(sufile, segnum[1]); + if (unlikely(err)) + goto failed; + + err = -ENOMEM; + for (i = 1; i < 4; i++) { + ent = nilfs_alloc_segment_entry(segnum[i]); + if (unlikely(!ent)) + goto failed; + list_add_tail(&ent->list, head); + } + + /* + * Collecting segments written after the latest super root. + * These are marked volatile active, and won't be reallocated in + * the next construction. + */ + list_for_each_entry_safe(ent, n, head, list) { + if (ent->segnum == segnum[0]) { + list_del(&ent->list); + nilfs_free_segment_entry(ent); + continue; + } + err = nilfs_open_segment_entry(ent, sufile); + if (unlikely(err)) + goto failed; + if (nilfs_segment_usage_clean(ent->raw_su)) { + nilfs_segment_usage_set_volatile_active(ent->raw_su); + /* Keep it open */ + } else { + /* Removing duplicated entries */ + list_del(&ent->list); + nilfs_close_segment_entry(ent, sufile); + nilfs_free_segment_entry(ent); + } + } + list_splice_init(head, nilfs->ns_used_segments.prev); + + /* + * The segment having the latest super root is active, and + * should be deactivated on the next construction for recovery. + */ + err = -ENOMEM; + ent = nilfs_alloc_segment_entry(segnum[0]); + if (unlikely(!ent)) + goto failed; + list_add_tail(&ent->list, &ri->ri_used_segments); + + /* Allocate new segments for recovery */ + err = nilfs_sufile_alloc(sufile, &segnum[0]); + if (unlikely(err)) + goto failed; + + nilfs->ns_pseg_offset = 0; + nilfs->ns_seg_seq = ri->ri_seq + 2; + nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0]; + return 0; + + failed: + /* No need to recover sufile because it will be destroyed on error */ + return err; +} + +static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi, + struct nilfs_recovery_block *rb, + struct page *page) +{ + struct buffer_head *bh_org; + void *kaddr; + + bh_org = sb_bread(sbi->s_super, rb->blocknr); + if (unlikely(!bh_org)) + return -EIO; + + kaddr = kmap_atomic(page, KM_USER0); + memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); + kunmap_atomic(kaddr, KM_USER0); + brelse(bh_org); + return 0; +} + +static int recover_dsync_blocks(struct nilfs_sb_info *sbi, + struct list_head *head, + unsigned long *nr_salvaged_blocks) +{ + struct inode *inode; + struct nilfs_recovery_block *rb, *n; + unsigned blocksize = sbi->s_super->s_blocksize; + struct page *page; + loff_t pos; + int err = 0, err2 = 0; + + list_for_each_entry_safe(rb, n, head, list) { + inode = nilfs_iget(sbi->s_super, rb->ino); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + inode = NULL; + goto failed_inode; + } + + pos = rb->blkoff << inode->i_blkbits; + page = NULL; + err = block_write_begin(NULL, inode->i_mapping, pos, blocksize, + 0, &page, NULL, nilfs_get_block); + if (unlikely(err)) + goto failed_inode; + + err = nilfs_recovery_copy_block(sbi, rb, page); + if (unlikely(err)) + goto failed_page; + + err = nilfs_set_file_dirty(sbi, inode, 1); + if (unlikely(err)) + goto failed_page; + + block_write_end(NULL, inode->i_mapping, pos, blocksize, + blocksize, page, NULL); + + unlock_page(page); + page_cache_release(page); + + (*nr_salvaged_blocks)++; + goto next; + + failed_page: + unlock_page(page); + page_cache_release(page); + + failed_inode: + printk(KERN_WARNING + "NILFS warning: error recovering data block " + "(err=%d, ino=%lu, block-offset=%llu)\n", + err, rb->ino, (unsigned long long)rb->blkoff); + if (!err2) + err2 = err; + next: + iput(inode); /* iput(NULL) is just ignored */ + list_del_init(&rb->list); + kfree(rb); + } + return err2; +} + +/** + * nilfs_do_roll_forward - salvage logical segments newer than the latest + * checkpoint + * @sbi: nilfs_sb_info + * @nilfs: the_nilfs + * @ri: pointer to a nilfs_recovery_info + */ +static int nilfs_do_roll_forward(struct the_nilfs *nilfs, + struct nilfs_sb_info *sbi, + struct nilfs_recovery_info *ri) +{ + struct nilfs_segsum_info ssi; + sector_t pseg_start; + sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */ + unsigned long nsalvaged_blocks = 0; + u64 seg_seq; + __u64 segnum, nextnum = 0; + int empty_seg = 0; + int err = 0, ret; + LIST_HEAD(dsync_blocks); /* list of data blocks to be recovered */ + enum { + RF_INIT_ST, + RF_DSYNC_ST, /* scanning data-sync segments */ + }; + int state = RF_INIT_ST; + + nilfs_attach_writer(nilfs, sbi); + pseg_start = ri->ri_lsegs_start; + seg_seq = ri->ri_lsegs_start_seq; + segnum = nilfs_get_segnum_of_block(nilfs, pseg_start); + nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); + + while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) { + + ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1); + if (ret) { + if (ret == NILFS_SEG_FAIL_IO) { + err = -EIO; + goto failed; + } + goto strayed; + } + if (unlikely(NILFS_SEG_HAS_SR(&ssi))) + goto confused; + + /* Found a valid partial segment; do recovery actions */ + nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); + empty_seg = 0; + nilfs->ns_ctime = ssi.ctime; + if (!(ssi.flags & NILFS_SS_GC)) + nilfs->ns_nongc_ctime = ssi.ctime; + + switch (state) { + case RF_INIT_ST: + if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi)) + goto try_next_pseg; + state = RF_DSYNC_ST; + /* Fall through */ + case RF_DSYNC_ST: + if (!NILFS_SEG_DSYNC(&ssi)) + goto confused; + + err = collect_blocks_from_segsum( + sbi, pseg_start, &ssi, &dsync_blocks); + if (unlikely(err)) + goto failed; + if (NILFS_SEG_LOGEND(&ssi)) { + err = recover_dsync_blocks( + sbi, &dsync_blocks, &nsalvaged_blocks); + if (unlikely(err)) + goto failed; + state = RF_INIT_ST; + } + break; /* Fall through to try_next_pseg */ + } + + try_next_pseg: + if (pseg_start == ri->ri_lsegs_end) + break; + pseg_start += ssi.nblocks; + if (pseg_start < seg_end) + continue; + goto feed_segment; + + strayed: + if (pseg_start == ri->ri_lsegs_end) + break; + + feed_segment: + /* Looking to the next full segment */ + if (empty_seg++) + break; + seg_seq++; + segnum = nextnum; + nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); + pseg_start = seg_start; + } + + if (nsalvaged_blocks) { + printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n", + sbi->s_super->s_id, nsalvaged_blocks); + ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; + } + out: + dispose_recovery_list(&dsync_blocks); + nilfs_detach_writer(sbi->s_nilfs, sbi); + return err; + + confused: + err = -EINVAL; + failed: + printk(KERN_ERR + "NILFS (device %s): Error roll-forwarding " + "(err=%d, pseg block=%llu). ", + sbi->s_super->s_id, err, (unsigned long long)pseg_start); + goto out; +} + +static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, + struct nilfs_sb_info *sbi, + struct nilfs_recovery_info *ri) +{ + struct buffer_head *bh; + int err; + + if (nilfs_get_segnum_of_block(nilfs, ri->ri_lsegs_start) != + nilfs_get_segnum_of_block(nilfs, ri->ri_super_root)) + return; + + bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start); + BUG_ON(!bh); + memset(bh->b_data, 0, bh->b_size); + set_buffer_dirty(bh); + err = sync_dirty_buffer(bh); + if (unlikely(err)) + printk(KERN_WARNING + "NILFS warning: buffer sync write failed during " + "post-cleaning of recovery.\n"); + brelse(bh); +} + +/** + * nilfs_recover_logical_segments - salvage logical segments written after + * the latest super root + * @nilfs: the_nilfs + * @sbi: nilfs_sb_info + * @ri: pointer to a nilfs_recovery_info struct to store search results. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error code is returned. + * + * %-EINVAL - Inconsistent filesystem state. + * + * %-EIO - I/O error + * + * %-ENOSPC - No space left on device (only in a panic state). + * + * %-ERESTARTSYS - Interrupted. + * + * %-ENOMEM - Insufficient memory available. + */ +int nilfs_recover_logical_segments(struct the_nilfs *nilfs, + struct nilfs_sb_info *sbi, + struct nilfs_recovery_info *ri) +{ + int err; + + if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0) + return 0; + + err = nilfs_attach_checkpoint(sbi, ri->ri_cno); + if (unlikely(err)) { + printk(KERN_ERR + "NILFS: error loading the latest checkpoint.\n"); + return err; + } + + err = nilfs_do_roll_forward(nilfs, sbi, ri); + if (unlikely(err)) + goto failed; + + if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { + err = nilfs_prepare_segment_for_recovery(nilfs, ri); + if (unlikely(err)) { + printk(KERN_ERR "NILFS: Error preparing segments for " + "recovery.\n"); + goto failed; + } + + err = nilfs_attach_segment_constructor(sbi, ri); + if (unlikely(err)) + goto failed; + + set_nilfs_discontinued(nilfs); + err = nilfs_construct_segment(sbi->s_super); + nilfs_detach_segment_constructor(sbi); + + if (unlikely(err)) { + printk(KERN_ERR "NILFS: Oops! recovery failed. " + "(err=%d)\n", err); + goto failed; + } + + nilfs_finish_roll_forward(nilfs, sbi, ri); + } + + nilfs_detach_checkpoint(sbi); + return 0; + + failed: + nilfs_detach_checkpoint(sbi); + nilfs_mdt_clear(nilfs->ns_cpfile); + nilfs_mdt_clear(nilfs->ns_sufile); + nilfs_mdt_clear(nilfs->ns_dat); + return err; +} + +/** + * nilfs_search_super_root - search the latest valid super root + * @nilfs: the_nilfs + * @sbi: nilfs_sb_info + * @ri: pointer to a nilfs_recovery_info struct to store search results. + * + * nilfs_search_super_root() looks for the latest super-root from a partial + * segment pointed by the superblock. It sets up struct the_nilfs through + * this search. It fills nilfs_recovery_info (ri) required for recovery. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error code is returned. + * + * %-EINVAL - No valid segment found + * + * %-EIO - I/O error + */ +int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, + struct nilfs_recovery_info *ri) +{ + struct nilfs_segsum_info ssi; + sector_t pseg_start, pseg_end, sr_pseg_start = 0; + sector_t seg_start, seg_end; /* range of full segment (block number) */ + u64 seg_seq; + __u64 segnum, nextnum = 0; + __u64 cno; + struct nilfs_segment_entry *ent; + LIST_HEAD(segments); + int empty_seg = 0, scan_newer = 0; + int ret; + + pseg_start = nilfs->ns_last_pseg; + seg_seq = nilfs->ns_last_seq; + cno = nilfs->ns_last_cno; + segnum = nilfs_get_segnum_of_block(nilfs, pseg_start); + + /* Calculate range of segment */ + nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); + + for (;;) { + /* Load segment summary */ + ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1); + if (ret) { + if (ret == NILFS_SEG_FAIL_IO) + goto failed; + goto strayed; + } + pseg_end = pseg_start + ssi.nblocks - 1; + if (unlikely(pseg_end > seg_end)) { + ret = NILFS_SEG_FAIL_CONSISTENCY; + goto strayed; + } + + /* A valid partial segment */ + ri->ri_pseg_start = pseg_start; + ri->ri_seq = seg_seq; + ri->ri_segnum = segnum; + nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); + ri->ri_nextnum = nextnum; + empty_seg = 0; + + if (!NILFS_SEG_HAS_SR(&ssi)) { + if (!scan_newer) { + /* This will never happen because a superblock + (last_segment) always points to a pseg + having a super root. */ + ret = NILFS_SEG_FAIL_CONSISTENCY; + goto failed; + } + if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) { + ri->ri_lsegs_start = pseg_start; + ri->ri_lsegs_start_seq = seg_seq; + } + if (NILFS_SEG_LOGEND(&ssi)) + ri->ri_lsegs_end = pseg_start; + goto try_next_pseg; + } + + /* A valid super root was found. */ + ri->ri_cno = cno++; + ri->ri_super_root = pseg_end; + ri->ri_lsegs_start = ri->ri_lsegs_end = 0; + + nilfs_dispose_segment_list(&segments); + nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start) + + ssi.nblocks - seg_start; + nilfs->ns_seg_seq = seg_seq; + nilfs->ns_segnum = segnum; + nilfs->ns_cno = cno; /* nilfs->ns_cno = ri->ri_cno + 1 */ + nilfs->ns_ctime = ssi.ctime; + nilfs->ns_nextnum = nextnum; + + if (scan_newer) + ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED; + else if (nilfs->ns_mount_state & NILFS_VALID_FS) + goto super_root_found; + + scan_newer = 1; + + /* reset region for roll-forward */ + pseg_start += ssi.nblocks; + if (pseg_start < seg_end) + continue; + goto feed_segment; + + try_next_pseg: + /* Standing on a course, or met an inconsistent state */ + pseg_start += ssi.nblocks; + if (pseg_start < seg_end) + continue; + goto feed_segment; + + strayed: + /* Off the trail */ + if (!scan_newer) + /* + * This can happen if a checkpoint was written without + * barriers, or as a result of an I/O failure. + */ + goto failed; + + feed_segment: + /* Looking to the next full segment */ + if (empty_seg++) + goto super_root_found; /* found a valid super root */ + + ent = nilfs_alloc_segment_entry(segnum); + if (unlikely(!ent)) { + ret = -ENOMEM; + goto failed; + } + list_add_tail(&ent->list, &segments); + + seg_seq++; + segnum = nextnum; + nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); + pseg_start = seg_start; + } + + super_root_found: + /* Updating pointers relating to the latest checkpoint */ + list_splice(&segments, ri->ri_used_segments.prev); + nilfs->ns_last_pseg = sr_pseg_start; + nilfs->ns_last_seq = nilfs->ns_seg_seq; + nilfs->ns_last_cno = ri->ri_cno; + return 0; + + failed: + nilfs_dispose_segment_list(&segments); + return (ret < 0) ? ret : nilfs_warn_segment_error(ret); +} -- cgit v1.2.3 From 84ef1ecfdea2f9f1e740a4bee0fa9cd629bdda70 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:39 -0700 Subject: nilfs2: another dat for garbage collection NILFS2 uses another DAT inode during garbage collection to ensure atomicity and consistency of the DAT in the transient state. This twin inode is called GCDAT. This adds functions to initialize the GCDAT and to switch page caches and B-tree node caches between these two inodes. Signed-off-by: Seiji Kihara Signed-off-by: Ryusuke Konishi Signed-off-by: Yoshiji Amagai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/gcdat.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 fs/nilfs2/gcdat.c (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c new file mode 100644 index 00000000000..93383c5cee9 --- /dev/null +++ b/fs/nilfs2/gcdat.c @@ -0,0 +1,84 @@ +/* + * gcdat.c - NILFS shadow DAT inode for GC + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Seiji Kihara , Amagai Yoshiji , + * and Ryusuke Konishi . + * + */ + +#include +#include "nilfs.h" +#include "page.h" +#include "mdt.h" + +int nilfs_init_gcdat_inode(struct the_nilfs *nilfs) +{ + struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; + struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); + int err; + + gcdat->i_state = 0; + gcdat->i_blocks = dat->i_blocks; + gii->i_flags = dii->i_flags; + gii->i_state = dii->i_state | (1 << NILFS_I_GCDAT); + gii->i_cno = 0; + nilfs_bmap_init_gcdat(gii->i_bmap, dii->i_bmap); + err = nilfs_copy_dirty_pages(gcdat->i_mapping, dat->i_mapping); + if (unlikely(err)) + return err; + + return nilfs_copy_dirty_pages(&gii->i_btnode_cache, + &dii->i_btnode_cache); +} + +void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs) +{ + struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; + struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); + struct address_space *mapping = dat->i_mapping; + struct address_space *gmapping = gcdat->i_mapping; + + down_write(&NILFS_MDT(dat)->mi_sem); + dat->i_blocks = gcdat->i_blocks; + dii->i_flags = gii->i_flags; + dii->i_state = gii->i_state & ~(1 << NILFS_I_GCDAT); + + nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap); + + nilfs_clear_dirty_pages(mapping); + nilfs_copy_back_pages(mapping, gmapping); + /* note: mdt dirty flags should be cleared by segctor. */ + + nilfs_clear_dirty_pages(&dii->i_btnode_cache); + nilfs_copy_back_pages(&dii->i_btnode_cache, &gii->i_btnode_cache); + + up_write(&NILFS_MDT(dat)->mi_sem); +} + +void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs) +{ + struct inode *gcdat = nilfs->ns_gc_dat; + struct nilfs_inode_info *gii = NILFS_I(gcdat); + + gcdat->i_state = I_CLEAR; + gii->i_flags = 0; + + truncate_inode_pages(gcdat->i_mapping, 0); + truncate_inode_pages(&gii->i_btnode_cache, 0); +} -- cgit v1.2.3 From a3d93f709e893187d301aa5458b2248db9f22bd1 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:40 -0700 Subject: nilfs2: block cache for garbage collection This adds the cache of on-disk blocks to be moved in garbage collection. The disk blocks are held with dummy inodes (called gcinodes), and this file provides lookup function of the dummy inodes, and their buffer read function. Signed-off-by: Seiji Kihara Signed-off-by: Ryusuke Konishi Signed-off-by: Yoshiji Amagai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/gcinode.c | 270 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 fs/nilfs2/gcinode.c (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c new file mode 100644 index 00000000000..00139526632 --- /dev/null +++ b/fs/nilfs2/gcinode.c @@ -0,0 +1,270 @@ +/* + * gcinode.c - NILFS memory inode for GC + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Seiji Kihara , Amagai Yoshiji , + * and Ryusuke Konishi . + * Revised by Ryusuke Konishi . + * + */ + +#include +#include +#include +#include +#include "nilfs.h" +#include "page.h" +#include "mdt.h" +#include "dat.h" +#include "ifile.h" + +static struct address_space_operations def_gcinode_aops = {}; +/* XXX need def_gcinode_iops/fops? */ + +/* + * nilfs_gccache_submit_read_data() - add data buffer and submit read request + * @inode - gc inode + * @blkoff - dummy offset treated as the key for the page cache + * @pbn - physical block number of the block + * @vbn - virtual block number of the block, 0 for non-virtual block + * @out_bh - indirect pointer to a buffer_head struct to receive the results + * + * Description: nilfs_gccache_submit_read_data() registers the data buffer + * specified by @pbn to the GC pagecache with the key @blkoff. + * This function sets @vbn (@pbn if @vbn is zero) in b_blocknr of the buffer. + * + * Return Value: On success, 0 is returned. On Error, one of the following + * negative error code is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - The block specified with @pbn does not exist. + */ +int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, + sector_t pbn, __u64 vbn, + struct buffer_head **out_bh) +{ + struct buffer_head *bh; + int err; + + bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); + if (unlikely(!bh)) + return -ENOMEM; + + if (buffer_uptodate(bh)) + goto out; + + if (pbn == 0) { + struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat; + /* use original dat, not gc dat. */ + err = nilfs_dat_translate(dat_inode, vbn, &pbn); + if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ + brelse(bh); + goto failed; + } + } + + lock_buffer(bh); + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + goto out; + } + + if (!buffer_mapped(bh)) { + bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; + set_buffer_mapped(bh); + } + bh->b_blocknr = pbn; + bh->b_end_io = end_buffer_read_sync; + get_bh(bh); + submit_bh(READ, bh); + if (vbn) + bh->b_blocknr = vbn; + out: + err = 0; + *out_bh = bh; + + failed: + unlock_page(bh->b_page); + page_cache_release(bh->b_page); + return err; +} + +/* + * nilfs_gccache_submit_read_node() - add node buffer and submit read request + * @inode - gc inode + * @pbn - physical block number for the block + * @vbn - virtual block number for the block + * @out_bh - indirect pointer to a buffer_head struct to receive the results + * + * Description: nilfs_gccache_submit_read_node() registers the node buffer + * specified by @vbn to the GC pagecache. @pbn can be supplied by the + * caller to avoid translation of the disk block address. + * + * Return Value: On success, 0 is returned. On Error, one of the following + * negative error code is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, + __u64 vbn, struct buffer_head **out_bh) +{ + int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, + vbn ? : pbn, pbn, out_bh, 0); + if (ret == -EEXIST) /* internal code (cache hit) */ + ret = 0; + return ret; +} + +int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) +{ + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + return -EIO; + if (buffer_dirty(bh)) + return -EEXIST; + + if (buffer_nilfs_node(bh)) + nilfs_btnode_mark_dirty(bh); + else + nilfs_mdt_mark_buffer_dirty(bh); + return 0; +} + +/* + * nilfs_init_gccache() - allocate and initialize gc_inode hash table + * @nilfs - the_nilfs + * + * Return Value: On success, 0. + * On error, a negative error code is returned. + */ +int nilfs_init_gccache(struct the_nilfs *nilfs) +{ + int loop; + + BUG_ON(nilfs->ns_gc_inodes_h); + + INIT_LIST_HEAD(&nilfs->ns_gc_inodes); + + nilfs->ns_gc_inodes_h = + kmalloc(sizeof(struct hlist_head) * NILFS_GCINODE_HASH_SIZE, + GFP_NOFS); + if (nilfs->ns_gc_inodes_h == NULL) + return -ENOMEM; + + for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++) + INIT_HLIST_HEAD(&nilfs->ns_gc_inodes_h[loop]); + return 0; +} + +/* + * nilfs_destroy_gccache() - free gc_inode hash table + * @nilfs - the nilfs + */ +void nilfs_destroy_gccache(struct the_nilfs *nilfs) +{ + if (nilfs->ns_gc_inodes_h) { + nilfs_remove_all_gcinode(nilfs); + kfree(nilfs->ns_gc_inodes_h); + nilfs->ns_gc_inodes_h = NULL; + } +} + +static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino, + __u64 cno) +{ + struct inode *inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS); + struct nilfs_inode_info *ii; + + if (!inode) + return NULL; + + inode->i_op = NULL; + inode->i_fop = NULL; + inode->i_mapping->a_ops = &def_gcinode_aops; + + ii = NILFS_I(inode); + ii->i_cno = cno; + ii->i_flags = 0; + ii->i_state = 1 << NILFS_I_GCINODE; + ii->i_bh = NULL; + ii->i_dtime = 0; + nilfs_bmap_init_gc(ii->i_bmap); + + return inode; +} + +static unsigned long ihash(ino_t ino, __u64 cno) +{ + return hash_long((unsigned long)((ino << 2) + cno), + NILFS_GCINODE_HASH_BITS); +} + +/* + * nilfs_gc_iget() - find or create gc inode with specified (ino,cno) + */ +struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno) +{ + struct hlist_head *head = nilfs->ns_gc_inodes_h + ihash(ino, cno); + struct hlist_node *node; + struct inode *inode; + + hlist_for_each_entry(inode, node, head, i_hash) { + if (inode->i_ino == ino && NILFS_I(inode)->i_cno == cno) + return inode; + } + + inode = alloc_gcinode(nilfs, ino, cno); + if (likely(inode)) { + hlist_add_head(&inode->i_hash, head); + list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes); + } + return inode; +} + +/* + * nilfs_clear_gcinode() - clear and free a gc inode + */ +void nilfs_clear_gcinode(struct inode *inode) +{ + nilfs_mdt_clear(inode); + nilfs_mdt_destroy(inode); +} + +/* + * nilfs_remove_all_gcinode() - remove all inodes from the_nilfs + */ +void nilfs_remove_all_gcinode(struct the_nilfs *nilfs) +{ + struct hlist_head *head = nilfs->ns_gc_inodes_h; + struct hlist_node *node, *n; + struct inode *inode; + int loop; + + for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++, head++) { + hlist_for_each_entry_safe(inode, node, n, head, i_hash) { + hlist_del_init(&inode->i_hash); + list_del_init(&NILFS_I(inode)->i_dirty); + nilfs_clear_gcinode(inode); /* might sleep */ + } + } +} -- cgit v1.2.3 From 7942b919f7321f95a777d396ff7894a7a83dc9b0 Mon Sep 17 00:00:00 2001 From: Koji Sato Date: Mon, 6 Apr 2009 19:01:41 -0700 Subject: nilfs2: ioctl operations This adds userland interface implemented with ioctl. Signed-off-by: Koji Sato Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/ioctl.c | 941 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 941 insertions(+) create mode 100644 fs/nilfs2/ioctl.c (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c new file mode 100644 index 00000000000..35ba60ea961 --- /dev/null +++ b/fs/nilfs2/ioctl.c @@ -0,0 +1,941 @@ +/* + * ioctl.c - NILFS ioctl operations. + * + * Copyright (C) 2007, 2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Written by Koji Sato . + */ + +#include +#include +#include /* lock_kernel(), unlock_kernel() */ +#include /* capable() */ +#include /* copy_from_user(), copy_to_user() */ +#include +#include "nilfs.h" +#include "segment.h" +#include "bmap.h" +#include "cpfile.h" +#include "sufile.h" +#include "dat.h" + + +#define KMALLOC_SIZE_MIN 4096 /* 4KB */ +#define KMALLOC_SIZE_MAX 131072 /* 128 KB */ + +static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, + struct nilfs_argv *argv, int dir, + ssize_t (*dofunc)(struct the_nilfs *, + int, int, + void *, size_t, size_t)) +{ + void *buf; + size_t ksize, maxmembs, total, n; + ssize_t nr; + int ret, i; + + if (argv->v_nmembs == 0) + return 0; + + for (ksize = KMALLOC_SIZE_MAX; ksize >= KMALLOC_SIZE_MIN; ksize /= 2) { + buf = kmalloc(ksize, GFP_NOFS); + if (buf != NULL) + break; + } + if (ksize < KMALLOC_SIZE_MIN) + return -ENOMEM; + maxmembs = ksize / argv->v_size; + + ret = 0; + total = 0; + for (i = 0; i < argv->v_nmembs; i += n) { + n = (argv->v_nmembs - i < maxmembs) ? + argv->v_nmembs - i : maxmembs; + if ((dir & _IOC_WRITE) && + copy_from_user(buf, + (void __user *)argv->v_base + argv->v_size * i, + argv->v_size * n)) { + ret = -EFAULT; + break; + } + nr = (*dofunc)(nilfs, argv->v_index + i, argv->v_flags, buf, + argv->v_size, n); + if (nr < 0) { + ret = nr; + break; + } + if ((dir & _IOC_READ) && + copy_to_user( + (void __user *)argv->v_base + argv->v_size * i, + buf, argv->v_size * nr)) { + ret = -EFAULT; + break; + } + total += nr; + } + argv->v_nmembs = total; + + kfree(buf); + return ret; +} + +static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; + struct nilfs_transaction_info ti; + struct nilfs_cpmode cpmode; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&cpmode, argp, sizeof(cpmode))) + return -EFAULT; + + nilfs_transaction_begin(inode->i_sb, &ti, 0); + ret = nilfs_cpfile_change_cpmode( + cpfile, cpmode.cm_cno, cpmode.cm_mode); + nilfs_transaction_end(inode->i_sb, !ret); + return ret; +} + +static int +nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; + struct nilfs_transaction_info ti; + __u64 cno; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&cno, argp, sizeof(cno))) + return -EFAULT; + + nilfs_transaction_begin(inode->i_sb, &ti, 0); + ret = nilfs_cpfile_delete_checkpoint(cpfile, cno); + nilfs_transaction_end(inode->i_sb, !ret); + return ret; +} + +static ssize_t +nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, int index, int flags, + void *buf, size_t size, size_t nmembs) +{ + return nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, index, flags, buf, + nmembs); +} + +static int nilfs_ioctl_get_cpinfo(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct nilfs_argv argv; + struct nilfs_transaction_info ti; + int ret; + + if (copy_from_user(&argv, argp, sizeof(argv))) + return -EFAULT; + + nilfs_transaction_begin(inode->i_sb, &ti, 0); + ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), + nilfs_ioctl_do_get_cpinfo); + nilfs_transaction_end(inode->i_sb, 0); + + if (copy_to_user(argp, &argv, sizeof(argv))) + ret = -EFAULT; + return ret; +} + +static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; + struct nilfs_cpstat cpstat; + struct nilfs_transaction_info ti; + int ret; + + nilfs_transaction_begin(inode->i_sb, &ti, 0); + ret = nilfs_cpfile_get_stat(cpfile, &cpstat); + nilfs_transaction_end(inode->i_sb, 0); + if (ret < 0) + return ret; + + if (copy_to_user(argp, &cpstat, sizeof(cpstat))) + ret = -EFAULT; + return ret; +} + +static ssize_t +nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, int index, int flags, + void *buf, size_t size, size_t nmembs) +{ + return nilfs_sufile_get_suinfo(nilfs->ns_sufile, index, buf, nmembs); +} + +static int nilfs_ioctl_get_suinfo(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct nilfs_argv argv; + struct nilfs_transaction_info ti; + int ret; + + if (copy_from_user(&argv, argp, sizeof(argv))) + return -EFAULT; + + nilfs_transaction_begin(inode->i_sb, &ti, 0); + ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), + nilfs_ioctl_do_get_suinfo); + nilfs_transaction_end(inode->i_sb, 0); + + if (copy_to_user(argp, &argv, sizeof(argv))) + ret = -EFAULT; + return ret; +} + +static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + struct inode *sufile = NILFS_SB(inode->i_sb)->s_nilfs->ns_sufile; + struct nilfs_sustat sustat; + struct nilfs_transaction_info ti; + int ret; + + nilfs_transaction_begin(inode->i_sb, &ti, 0); + ret = nilfs_sufile_get_stat(sufile, &sustat); + nilfs_transaction_end(inode->i_sb, 0); + if (ret < 0) + return ret; + + if (copy_to_user(argp, &sustat, sizeof(sustat))) + ret = -EFAULT; + return ret; +} + +static ssize_t +nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, int index, int flags, + void *buf, size_t size, size_t nmembs) +{ + return nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); +} + +static int nilfs_ioctl_get_vinfo(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct nilfs_argv argv; + struct nilfs_transaction_info ti; + int ret; + + if (copy_from_user(&argv, argp, sizeof(argv))) + return -EFAULT; + + nilfs_transaction_begin(inode->i_sb, &ti, 0); + ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), + nilfs_ioctl_do_get_vinfo); + nilfs_transaction_end(inode->i_sb, 0); + + if (copy_to_user(argp, &argv, sizeof(argv))) + ret = -EFAULT; + return ret; +} + +static ssize_t +nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, int index, int flags, + void *buf, size_t size, size_t nmembs) +{ + struct inode *dat = nilfs_dat_inode(nilfs); + struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; + struct nilfs_bdesc *bdescs = buf; + int ret, i; + + for (i = 0; i < nmembs; i++) { + ret = nilfs_bmap_lookup_at_level(bmap, + bdescs[i].bd_offset, + bdescs[i].bd_level + 1, + &bdescs[i].bd_blocknr); + if (ret < 0) { + if (ret != -ENOENT) + return ret; + bdescs[i].bd_blocknr = 0; + } + } + return nmembs; +} + +static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct nilfs_argv argv; + struct nilfs_transaction_info ti; + int ret; + + if (copy_from_user(&argv, argp, sizeof(argv))) + return -EFAULT; + + nilfs_transaction_begin(inode->i_sb, &ti, 0); + ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), + nilfs_ioctl_do_get_bdescs); + nilfs_transaction_end(inode->i_sb, 0); + + if (copy_to_user(argp, &argv, sizeof(argv))) + ret = -EFAULT; + return ret; +} + +static int nilfs_ioctl_move_inode_block(struct inode *inode, + struct nilfs_vdesc *vdesc, + struct list_head *buffers) +{ + struct buffer_head *bh; + int ret; + + if (vdesc->vd_flags == 0) + ret = nilfs_gccache_submit_read_data( + inode, vdesc->vd_offset, vdesc->vd_blocknr, + vdesc->vd_vblocknr, &bh); + else + ret = nilfs_gccache_submit_read_node( + inode, vdesc->vd_blocknr, vdesc->vd_vblocknr, &bh); + + if (unlikely(ret < 0)) { + if (ret == -ENOENT) + printk(KERN_CRIT + "%s: invalid virtual block address (%s): " + "ino=%llu, cno=%llu, offset=%llu, " + "blocknr=%llu, vblocknr=%llu\n", + __func__, vdesc->vd_flags ? "node" : "data", + (unsigned long long)vdesc->vd_ino, + (unsigned long long)vdesc->vd_cno, + (unsigned long long)vdesc->vd_offset, + (unsigned long long)vdesc->vd_blocknr, + (unsigned long long)vdesc->vd_vblocknr); + return ret; + } + bh->b_private = vdesc; + list_add_tail(&bh->b_assoc_buffers, buffers); + return 0; +} + +static ssize_t +nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, int index, int flags, + void *buf, size_t size, size_t nmembs) +{ + struct inode *inode; + struct nilfs_vdesc *vdesc; + struct buffer_head *bh, *n; + LIST_HEAD(buffers); + ino_t ino; + __u64 cno; + int i, ret; + + for (i = 0, vdesc = buf; i < nmembs; ) { + ino = vdesc->vd_ino; + cno = vdesc->vd_cno; + inode = nilfs_gc_iget(nilfs, ino, cno); + if (unlikely(inode == NULL)) { + ret = -ENOMEM; + goto failed; + } + do { + ret = nilfs_ioctl_move_inode_block(inode, vdesc, + &buffers); + if (unlikely(ret < 0)) + goto failed; + vdesc++; + } while (++i < nmembs && + vdesc->vd_ino == ino && vdesc->vd_cno == cno); + } + + list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) { + ret = nilfs_gccache_wait_and_mark_dirty(bh); + if (unlikely(ret < 0)) { + if (ret == -EEXIST) { + vdesc = bh->b_private; + printk(KERN_CRIT + "%s: conflicting %s buffer: " + "ino=%llu, cno=%llu, offset=%llu, " + "blocknr=%llu, vblocknr=%llu\n", + __func__, + vdesc->vd_flags ? "node" : "data", + (unsigned long long)vdesc->vd_ino, + (unsigned long long)vdesc->vd_cno, + (unsigned long long)vdesc->vd_offset, + (unsigned long long)vdesc->vd_blocknr, + (unsigned long long)vdesc->vd_vblocknr); + } + goto failed; + } + list_del_init(&bh->b_assoc_buffers); + bh->b_private = NULL; + brelse(bh); + } + return nmembs; + + failed: + list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) { + list_del_init(&bh->b_assoc_buffers); + bh->b_private = NULL; + brelse(bh); + } + return ret; +} + +static inline int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, + struct nilfs_argv *argv, + int dir) +{ + return nilfs_ioctl_wrap_copy(nilfs, argv, dir, + nilfs_ioctl_do_move_blocks); +} + +static ssize_t +nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, int index, + int flags, void *buf, size_t size, + size_t nmembs) +{ + struct inode *cpfile = nilfs->ns_cpfile; + struct nilfs_period *periods = buf; + int ret, i; + + for (i = 0; i < nmembs; i++) { + ret = nilfs_cpfile_delete_checkpoints( + cpfile, periods[i].p_start, periods[i].p_end); + if (ret < 0) + return ret; + } + return nmembs; +} + +static inline int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, + struct nilfs_argv *argv, + int dir) +{ + return nilfs_ioctl_wrap_copy(nilfs, argv, dir, + nilfs_ioctl_do_delete_checkpoints); +} + +static ssize_t +nilfs_ioctl_do_free_vblocknrs(struct the_nilfs *nilfs, int index, int flags, + void *buf, size_t size, size_t nmembs) +{ + int ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); + + return (ret < 0) ? ret : nmembs; +} + +static inline int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, + struct nilfs_argv *argv, + int dir) +{ + return nilfs_ioctl_wrap_copy(nilfs, argv, dir, + nilfs_ioctl_do_free_vblocknrs); +} + +static ssize_t +nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, int index, int flags, + void *buf, size_t size, size_t nmembs) +{ + struct inode *dat = nilfs_dat_inode(nilfs); + struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; + struct nilfs_bdesc *bdescs = buf; + int ret, i; + + for (i = 0; i < nmembs; i++) { + /* XXX: use macro or inline func to check liveness */ + ret = nilfs_bmap_lookup_at_level(bmap, + bdescs[i].bd_offset, + bdescs[i].bd_level + 1, + &bdescs[i].bd_blocknr); + if (ret < 0) { + if (ret != -ENOENT) + return ret; + bdescs[i].bd_blocknr = 0; + } + if (bdescs[i].bd_blocknr != bdescs[i].bd_oblocknr) + /* skip dead block */ + continue; + if (bdescs[i].bd_level == 0) { + ret = nilfs_mdt_mark_block_dirty(dat, + bdescs[i].bd_offset); + if (ret < 0) { + BUG_ON(ret == -ENOENT); + return ret; + } + } else { + ret = nilfs_bmap_mark(bmap, bdescs[i].bd_offset, + bdescs[i].bd_level); + if (ret < 0) { + BUG_ON(ret == -ENOENT); + return ret; + } + } + } + return nmembs; +} + +static inline int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, + struct nilfs_argv *argv, + int dir) +{ + return nilfs_ioctl_wrap_copy(nilfs, argv, dir, + nilfs_ioctl_do_mark_blocks_dirty); +} + +static ssize_t +nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, int index, int flags, + void *buf, size_t size, size_t nmembs) +{ + struct nilfs_sb_info *sbi = nilfs_get_writer(nilfs); + int ret; + + BUG_ON(!sbi); + ret = nilfs_segctor_add_segments_to_be_freed( + NILFS_SC(sbi), buf, nmembs); + nilfs_put_writer(nilfs); + + return (ret < 0) ? ret : nmembs; +} + +static inline int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, + struct nilfs_argv *argv, + int dir) +{ + return nilfs_ioctl_wrap_copy(nilfs, argv, dir, + nilfs_ioctl_do_free_segments); +} + +int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, + void __user *argp) +{ + struct nilfs_argv argv[5]; + int dir, ret; + + if (copy_from_user(argv, argp, sizeof(argv))) + return -EFAULT; + + dir = _IOC_WRITE; + ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], dir); + if (ret < 0) + goto out_move_blks; + ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], dir); + if (ret < 0) + goto out_del_cps; + ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], dir); + if (ret < 0) + goto out_free_vbns; + ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], dir); + if (ret < 0) + goto out_free_vbns; + ret = nilfs_ioctl_free_segments(nilfs, &argv[4], dir); + if (ret < 0) + goto out_free_segs; + + return 0; + + out_free_segs: + BUG(); /* XXX: not implemented yet */ + out_free_vbns: + BUG();/* XXX: not implemented yet */ + out_del_cps: + BUG();/* XXX: not implemented yet */ + out_move_blks: + nilfs_remove_all_gcinode(nilfs); + return ret; +} + +static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = nilfs_clean_segments(inode->i_sb, argp); + clear_nilfs_cond_nongc_write(NILFS_SB(inode->i_sb)->s_nilfs); + return ret; +} + +static int nilfs_ioctl_test_cond(struct the_nilfs *nilfs, int cond) +{ + return (cond & NILFS_TIMEDWAIT_SEG_WRITE) && + nilfs_cond_nongc_write(nilfs); +} + +static void nilfs_ioctl_clear_cond(struct the_nilfs *nilfs, int cond) +{ + if (cond & NILFS_TIMEDWAIT_SEG_WRITE) + clear_nilfs_cond_nongc_write(nilfs); +} + +static int nilfs_ioctl_timedwait(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct nilfs_wait_cond wc; + long ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&wc, argp, sizeof(wc))) + return -EFAULT; + + unlock_kernel(); + ret = wc.wc_flags ? + wait_event_interruptible_timeout( + nilfs->ns_cleanerd_wq, + nilfs_ioctl_test_cond(nilfs, wc.wc_cond), + timespec_to_jiffies(&wc.wc_timeout)) : + wait_event_interruptible( + nilfs->ns_cleanerd_wq, + nilfs_ioctl_test_cond(nilfs, wc.wc_cond)); + lock_kernel(); + nilfs_ioctl_clear_cond(nilfs, wc.wc_cond); + + if (ret > 0) { + jiffies_to_timespec(ret, &wc.wc_timeout); + if (copy_to_user(argp, &wc, sizeof(wc))) + return -EFAULT; + return 0; + } + if (ret != 0) + return -EINTR; + + return wc.wc_flags ? -ETIME : 0; +} + +static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp) +{ + __u64 cno; + int ret; + + ret = nilfs_construct_segment(inode->i_sb); + if (ret < 0) + return ret; + + if (argp != NULL) { + cno = NILFS_SB(inode->i_sb)->s_nilfs->ns_cno - 1; + if (copy_to_user(argp, &cno, sizeof(cno))) + return -EFAULT; + } + return 0; +} + +int nilfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, + unsigned long arg) +{ + void __user *argp = (void * __user *)arg; + + switch (cmd) { + case NILFS_IOCTL_CHANGE_CPMODE: + return nilfs_ioctl_change_cpmode(inode, filp, cmd, argp); + case NILFS_IOCTL_DELETE_CHECKPOINT: + return nilfs_ioctl_delete_checkpoint(inode, filp, cmd, argp); + case NILFS_IOCTL_GET_CPINFO: + return nilfs_ioctl_get_cpinfo(inode, filp, cmd, argp); + case NILFS_IOCTL_GET_CPSTAT: + return nilfs_ioctl_get_cpstat(inode, filp, cmd, argp); + case NILFS_IOCTL_GET_SUINFO: + return nilfs_ioctl_get_suinfo(inode, filp, cmd, argp); + case NILFS_IOCTL_GET_SUSTAT: + return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); + case NILFS_IOCTL_GET_VINFO: + /* XXX: rename to ??? */ + return nilfs_ioctl_get_vinfo(inode, filp, cmd, argp); + case NILFS_IOCTL_GET_BDESCS: + return nilfs_ioctl_get_bdescs(inode, filp, cmd, argp); + case NILFS_IOCTL_CLEAN_SEGMENTS: + return nilfs_ioctl_clean_segments(inode, filp, cmd, argp); + case NILFS_IOCTL_TIMEDWAIT: + return nilfs_ioctl_timedwait(inode, filp, cmd, argp); + case NILFS_IOCTL_SYNC: + return nilfs_ioctl_sync(inode, filp, cmd, argp); + default: + return -ENOTTY; + } +} + +/* compat_ioctl */ +#ifdef CONFIG_COMPAT +#include + +static int nilfs_compat_locked_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + int ret; + + lock_kernel(); + ret = nilfs_ioctl(inode, filp, cmd, arg); + unlock_kernel(); + return ret; +} + +static int +nilfs_compat_ioctl_uargv32_to_uargv(struct nilfs_argv32 __user *uargv32, + struct nilfs_argv __user *uargv) +{ + compat_uptr_t base; + compat_size_t nmembs, size; + compat_int_t index, flags; + + if (get_user(base, &uargv32->v_base) || + put_user(compat_ptr(base), &uargv->v_base) || + get_user(nmembs, &uargv32->v_nmembs) || + put_user(nmembs, &uargv->v_nmembs) || + get_user(size, &uargv32->v_size) || + put_user(size, &uargv->v_size) || + get_user(index, &uargv32->v_index) || + put_user(index, &uargv->v_index) || + get_user(flags, &uargv32->v_flags) || + put_user(flags, &uargv->v_flags)) + return -EFAULT; + return 0; +} + +static int +nilfs_compat_ioctl_uargv_to_uargv32(struct nilfs_argv __user *uargv, + struct nilfs_argv32 __user *uargv32) +{ + size_t nmembs; + + if (get_user(nmembs, &uargv->v_nmembs) || + put_user(nmembs, &uargv32->v_nmembs)) + return -EFAULT; + return 0; +} + +static int +nilfs_compat_ioctl_get_by_argv(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct nilfs_argv __user *uargv; + struct nilfs_argv32 __user *uargv32; + int ret; + + uargv = compat_alloc_user_space(sizeof(struct nilfs_argv)); + uargv32 = compat_ptr(arg); + ret = nilfs_compat_ioctl_uargv32_to_uargv(uargv32, uargv); + if (ret < 0) + return ret; + + ret = nilfs_compat_locked_ioctl(inode, filp, cmd, (unsigned long)uargv); + if (ret < 0) + return ret; + + return nilfs_compat_ioctl_uargv_to_uargv32(uargv, uargv32); +} + +static int +nilfs_compat_ioctl_change_cpmode(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct nilfs_cpmode __user *ucpmode; + struct nilfs_cpmode32 __user *ucpmode32; + int mode; + + ucpmode = compat_alloc_user_space(sizeof(struct nilfs_cpmode)); + ucpmode32 = compat_ptr(arg); + if (copy_in_user(&ucpmode->cm_cno, &ucpmode32->cm_cno, + sizeof(__u64)) || + get_user(mode, &ucpmode32->cm_mode) || + put_user(mode, &ucpmode->cm_mode)) + return -EFAULT; + + return nilfs_compat_locked_ioctl( + inode, filp, cmd, (unsigned long)ucpmode); +} + + +static inline int +nilfs_compat_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return nilfs_compat_locked_ioctl(inode, filp, cmd, arg); +} + +static inline int +nilfs_compat_ioctl_get_cpinfo(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return nilfs_compat_ioctl_get_by_argv(inode, filp, cmd, arg); +} + +static inline int +nilfs_compat_ioctl_get_cpstat(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return nilfs_compat_locked_ioctl(inode, filp, cmd, arg); +} + +static inline int +nilfs_compat_ioctl_get_suinfo(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return nilfs_compat_ioctl_get_by_argv(inode, filp, cmd, arg); +} + +static int +nilfs_compat_ioctl_get_sustat(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct nilfs_sustat __user *usustat; + struct nilfs_sustat32 __user *usustat32; + time_t ctime, nongc_ctime; + int ret; + + usustat = compat_alloc_user_space(sizeof(struct nilfs_sustat)); + ret = nilfs_compat_locked_ioctl(inode, filp, cmd, + (unsigned long)usustat); + if (ret < 0) + return ret; + + usustat32 = compat_ptr(arg); + if (copy_in_user(&usustat32->ss_nsegs, &usustat->ss_nsegs, + sizeof(__u64)) || + copy_in_user(&usustat32->ss_ncleansegs, &usustat->ss_ncleansegs, + sizeof(__u64)) || + copy_in_user(&usustat32->ss_ndirtysegs, &usustat->ss_ndirtysegs, + sizeof(__u64)) || + get_user(ctime, &usustat->ss_ctime) || + put_user(ctime, &usustat32->ss_ctime) || + get_user(nongc_ctime, &usustat->ss_nongc_ctime) || + put_user(nongc_ctime, &usustat32->ss_nongc_ctime)) + return -EFAULT; + return 0; +} + +static inline int +nilfs_compat_ioctl_get_vinfo(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return nilfs_compat_ioctl_get_by_argv(inode, filp, cmd, arg); +} + +static inline int +nilfs_compat_ioctl_get_bdescs(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return nilfs_compat_ioctl_get_by_argv(inode, filp, cmd, arg); +} + +static int +nilfs_compat_ioctl_clean_segments(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct nilfs_argv __user *uargv; + struct nilfs_argv32 __user *uargv32; + int i, ret; + + uargv = compat_alloc_user_space(sizeof(struct nilfs_argv) * 5); + uargv32 = compat_ptr(arg); + for (i = 0; i < 5; i++) { + ret = nilfs_compat_ioctl_uargv32_to_uargv(&uargv32[i], + &uargv[i]); + if (ret < 0) + return ret; + } + return nilfs_compat_locked_ioctl( + inode, filp, cmd, (unsigned long)uargv); +} + +static int +nilfs_compat_ioctl_timedwait(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct nilfs_wait_cond __user *uwcond; + struct nilfs_wait_cond32 __user *uwcond32; + struct timespec ts; + int cond, flags, ret; + + uwcond = compat_alloc_user_space(sizeof(struct nilfs_wait_cond)); + uwcond32 = compat_ptr(arg); + if (get_user(cond, &uwcond32->wc_cond) || + put_user(cond, &uwcond->wc_cond) || + get_user(flags, &uwcond32->wc_flags) || + put_user(flags, &uwcond->wc_flags) || + get_user(ts.tv_sec, &uwcond32->wc_timeout.tv_sec) || + get_user(ts.tv_nsec, &uwcond32->wc_timeout.tv_nsec) || + put_user(ts.tv_sec, &uwcond->wc_timeout.tv_sec) || + put_user(ts.tv_nsec, &uwcond->wc_timeout.tv_nsec)) + return -EFAULT; + + ret = nilfs_compat_locked_ioctl(inode, filp, cmd, + (unsigned long)uwcond); + if (ret < 0) + return ret; + + if (get_user(ts.tv_sec, &uwcond->wc_timeout.tv_sec) || + get_user(ts.tv_nsec, &uwcond->wc_timeout.tv_nsec) || + put_user(ts.tv_sec, &uwcond32->wc_timeout.tv_sec) || + put_user(ts.tv_nsec, &uwcond32->wc_timeout.tv_nsec)) + return -EFAULT; + + return 0; +} + +static int nilfs_compat_ioctl_sync(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return nilfs_compat_locked_ioctl(inode, filp, cmd, arg); +} + +long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = filp->f_dentry->d_inode; + + switch (cmd) { + case NILFS_IOCTL32_CHANGE_CPMODE: + return nilfs_compat_ioctl_change_cpmode( + inode, filp, NILFS_IOCTL_CHANGE_CPMODE, arg); + case NILFS_IOCTL_DELETE_CHECKPOINT: + return nilfs_compat_ioctl_delete_checkpoint( + inode, filp, cmd, arg); + case NILFS_IOCTL32_GET_CPINFO: + return nilfs_compat_ioctl_get_cpinfo( + inode, filp, NILFS_IOCTL_GET_CPINFO, arg); + case NILFS_IOCTL_GET_CPSTAT: + return nilfs_compat_ioctl_get_cpstat(inode, filp, cmd, arg); + case NILFS_IOCTL32_GET_SUINFO: + return nilfs_compat_ioctl_get_suinfo( + inode, filp, NILFS_IOCTL_GET_SUINFO, arg); + case NILFS_IOCTL32_GET_SUSTAT: + return nilfs_compat_ioctl_get_sustat( + inode, filp, NILFS_IOCTL_GET_SUSTAT, arg); + case NILFS_IOCTL32_GET_VINFO: + return nilfs_compat_ioctl_get_vinfo( + inode, filp, NILFS_IOCTL_GET_VINFO, arg); + case NILFS_IOCTL32_GET_BDESCS: + return nilfs_compat_ioctl_get_bdescs( + inode, filp, NILFS_IOCTL_GET_BDESCS, arg); + case NILFS_IOCTL32_CLEAN_SEGMENTS: + return nilfs_compat_ioctl_clean_segments( + inode, filp, NILFS_IOCTL_CLEAN_SEGMENTS, arg); + case NILFS_IOCTL32_TIMEDWAIT: + return nilfs_compat_ioctl_timedwait( + inode, filp, NILFS_IOCTL_TIMEDWAIT, arg); + case NILFS_IOCTL_SYNC: + return nilfs_compat_ioctl_sync(inode, filp, cmd, arg); + default: + return -ENOIOCTLCMD; + } +} +#endif -- cgit v1.2.3 From 0c4fb877641c5c72d4a3ce8921a256bfe44055c0 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:41 -0700 Subject: nilfs2: update makefile and Kconfig This adds a Makefile for the nilfs2 file system, and updates the makefile and Kconfig file in the file system directory. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/Makefile | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 fs/nilfs2/Makefile (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile new file mode 100644 index 00000000000..df3e62c1ddc --- /dev/null +++ b/fs/nilfs2/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_NILFS2_FS) += nilfs2.o +nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \ + btnode.o bmap.o btree.o direct.o dat.o recovery.o \ + the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \ + ifile.o alloc.o gcinode.o ioctl.o gcdat.o -- cgit v1.2.3 From 3358b4aaa84fd4c1cdd64391875e92cbb8afeb29 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:43 -0700 Subject: nilfs2: fix problems of memory allocation in ioctl This is another patch for fixing the following problems of a memory copy function in nilfs2 ioctl: (1) It tries to allocate 128KB size of memory even for small objects. (2) Though the function repeatedly tries large memory allocations while reducing the size, GFP_NOWAIT flag is not specified. This increases the possibility of system memory shortage. (3) During the retries of (2), verbose warnings are printed because _GFP_NOWARN flag is not used for the kmalloc calls. The first patch was still doing large allocations by kmalloc which are repeatedly tried while reducing the size. Andi Kleen told me that using copy_from_user for large memory is not good from the viewpoint of preempt latency: On Fri, 12 Dec 2008 21:24:11 +0100, Andi Kleen wrote: > > In the current interface, each data item is copied twice: one is to > > the allocated memory from user space (via copy_from_user), and another > > For such large copies it is better to use multiple smaller (e.g. 4K) > copy user, that gives better real time preempt latencies. Each cfu has a > cond_resched(), but only one, not multiple times in the inner loop. He also advised me that: On Sun, 14 Dec 2008 16:13:27 +0100, Andi Kleen wrote: > Better would be if you could go to PAGE_SIZE. order 0 allocations > are typically the fastest / least likely to stall. > > Also in this case it's a good idea to use __get_free_pages() > directly, kmalloc tends to be become less efficient at larger > sizes. For the function in question, the size of buffer memory can be reduced since the buffer is repeatedly used for a number of small objects. On the other hand, it may incur large preempt latencies for larger buffer because a copy_from_user (and a copy_to_user) was applied only once each cycle. With that, this revision uses the order 0 allocations with __get_free_pages() to fix the original problems. Cc: Andi Kleen Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/ioctl.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 35ba60ea961..02e91e167ca 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -34,9 +34,6 @@ #include "dat.h" -#define KMALLOC_SIZE_MIN 4096 /* 4KB */ -#define KMALLOC_SIZE_MAX 131072 /* 128 KB */ - static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, struct nilfs_argv *argv, int dir, ssize_t (*dofunc)(struct the_nilfs *, @@ -44,21 +41,20 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, void *, size_t, size_t)) { void *buf; - size_t ksize, maxmembs, total, n; + size_t maxmembs, total, n; ssize_t nr; int ret, i; if (argv->v_nmembs == 0) return 0; - for (ksize = KMALLOC_SIZE_MAX; ksize >= KMALLOC_SIZE_MIN; ksize /= 2) { - buf = kmalloc(ksize, GFP_NOFS); - if (buf != NULL) - break; - } - if (ksize < KMALLOC_SIZE_MIN) + if (argv->v_size > PAGE_SIZE) + return -EINVAL; + + buf = (void *)__get_free_pages(GFP_NOFS, 0); + if (unlikely(!buf)) return -ENOMEM; - maxmembs = ksize / argv->v_size; + maxmembs = PAGE_SIZE / argv->v_size; ret = 0; total = 0; @@ -89,7 +85,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, } argv->v_nmembs = total; - kfree(buf); + free_pages((unsigned long)buf, 0); return ret; } -- cgit v1.2.3 From a2e7d2df82cafb76f76809ddf6e2caa8afe4f75e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:44 -0700 Subject: nilfs2: cleanup nilfs_clear_inode This will remove the following unnecessary locks and cleanup code in nilfs_clear_inode(): - unnecessary protection using nilfs_transaction_begin() and nilfs_transaction_end(). - cleanup code of i_dirty list field which is never chained when this function is called. - spinlock used when releasing i_bh field. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/super.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 10e82c00aed..268b563d215 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -184,8 +184,6 @@ static inline void nilfs_destroy_inode_cache(void) static void nilfs_clear_inode(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); - struct nilfs_transaction_info ti; - struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); #ifdef CONFIG_NILFS_POSIX_ACL if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) { @@ -200,21 +198,14 @@ static void nilfs_clear_inode(struct inode *inode) /* * Free resources allocated in nilfs_read_inode(), here. */ - nilfs_transaction_begin(inode->i_sb, &ti, 0); - - spin_lock(&sbi->s_inode_lock); - if (!list_empty(&ii->i_dirty)) - list_del_init(&ii->i_dirty); + BUG_ON(!list_empty(&ii->i_dirty)); brelse(ii->i_bh); ii->i_bh = NULL; - spin_unlock(&sbi->s_inode_lock); if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); nilfs_btnode_cache_clear(&ii->i_btnode_cache); - - nilfs_transaction_end(inode->i_sb, 0); } /** -- cgit v1.2.3 From 47420c799830d4676e544dbec56b2a7f787528f5 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:45 -0700 Subject: nilfs2: avoid double error caused by nilfs_transaction_end Pekka Enberg pointed out that double error handlings found after nilfs_transaction_end() can be avoided by separating abort operation: OK, I don't understand this. The only way nilfs_transaction_end() can fail is if we have NILFS_TI_SYNC set and we fail to construct the segment. But why do we want to construct a segment if we don't commit? I guess what I'm asking is why don't we have a separate nilfs_transaction_abort() function that can't fail for the erroneous case to avoid this double error value tracking thing? This does the separation and renames nilfs_transaction_end() to nilfs_transaction_commit() for clarification. Since, some calls of these functions were used just for exclusion control against the segment constructor, they are replaced with semaphore operations. Acked-by: Pekka Enberg Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/inode.c | 23 +++++++++------- fs/nilfs2/ioctl.c | 58 +++++++++++++++++++++++----------------- fs/nilfs2/mdt.c | 5 +++- fs/nilfs2/namei.c | 74 +++++++++++++++++++++++++++++++++++---------------- fs/nilfs2/nilfs.h | 3 ++- fs/nilfs2/segment.c | 43 +++++++++++++++++++----------- fs/nilfs2/the_nilfs.h | 4 +-- 7 files changed, 135 insertions(+), 75 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 289d1798dec..4bf1e2c5bac 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -77,7 +77,6 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, goto out; err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff, (unsigned long)bh_result); - nilfs_transaction_end(inode->i_sb, !err); if (unlikely(err != 0)) { if (err == -EEXIST) { /* @@ -100,8 +99,10 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, inode->i_ino); err = -EIO; } + nilfs_transaction_abort(inode->i_sb); goto out; } + nilfs_transaction_commit(inode->i_sb); /* never fails */ /* Error handling should be detailed */ set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed @@ -203,7 +204,7 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, err = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, nilfs_get_block); if (unlikely(err)) - nilfs_transaction_end(inode->i_sb, 0); + nilfs_transaction_abort(inode->i_sb); return err; } @@ -221,7 +222,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, copied = generic_write_end(file, mapping, pos, len, copied, page, fsdata); nilfs_set_file_dirty(NILFS_SB(inode->i_sb), inode, nr_dirty); - err = nilfs_transaction_end(inode->i_sb, 1); + err = nilfs_transaction_commit(inode->i_sb); return err ? : copied; } @@ -641,7 +642,7 @@ void nilfs_truncate(struct inode *inode) nilfs_set_transaction_flag(NILFS_TI_SYNC); nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); - nilfs_transaction_end(sb, 1); + nilfs_transaction_commit(sb); /* May construct a logical segment and may fail in sync mode. But truncate has no return value. */ } @@ -669,7 +670,7 @@ void nilfs_delete_inode(struct inode *inode) /* nilfs_free_inode() marks inode buffer dirty */ if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); - nilfs_transaction_end(sb, 1); + nilfs_transaction_commit(sb); /* May construct a logical segment and may fail in sync mode. But delete_inode has no return value. */ } @@ -679,7 +680,7 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) struct nilfs_transaction_info ti; struct inode *inode = dentry->d_inode; struct super_block *sb = inode->i_sb; - int err, err2; + int err; err = inode_change_ok(inode, iattr); if (err) @@ -691,8 +692,12 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) err = inode_setattr(inode, iattr); if (!err && (iattr->ia_valid & ATTR_MODE)) err = nilfs_acl_chmod(inode); - err2 = nilfs_transaction_end(sb, 1); - return err ? : err2; + if (likely(!err)) + err = nilfs_transaction_commit(sb); + else + nilfs_transaction_abort(sb); + + return err; } int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, @@ -817,5 +822,5 @@ void nilfs_dirty_inode(struct inode *inode) nilfs_transaction_begin(inode->i_sb, &ti, 0); if (likely(inode->i_ino != NILFS_SKETCH_INO)) nilfs_mark_inode_dirty(inode); - nilfs_transaction_end(inode->i_sb, 1); /* never fails */ + nilfs_transaction_commit(inode->i_sb); /* never fails */ } diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 02e91e167ca..5ce06a01c7e 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -105,7 +105,11 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, nilfs_transaction_begin(inode->i_sb, &ti, 0); ret = nilfs_cpfile_change_cpmode( cpfile, cpmode.cm_cno, cpmode.cm_mode); - nilfs_transaction_end(inode->i_sb, !ret); + if (unlikely(ret < 0)) { + nilfs_transaction_abort(inode->i_sb); + return ret; + } + nilfs_transaction_commit(inode->i_sb); /* never fails */ return ret; } @@ -125,7 +129,11 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, nilfs_transaction_begin(inode->i_sb, &ti, 0); ret = nilfs_cpfile_delete_checkpoint(cpfile, cno); - nilfs_transaction_end(inode->i_sb, !ret); + if (unlikely(ret < 0)) { + nilfs_transaction_abort(inode->i_sb); + return ret; + } + nilfs_transaction_commit(inode->i_sb); /* never fails */ return ret; } @@ -142,16 +150,17 @@ static int nilfs_ioctl_get_cpinfo(struct inode *inode, struct file *filp, { struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; struct nilfs_argv argv; - struct nilfs_transaction_info ti; int ret; if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - nilfs_transaction_begin(inode->i_sb, &ti, 0); + down_read(&nilfs->ns_segctor_sem); ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_cpinfo); - nilfs_transaction_end(inode->i_sb, 0); + up_read(&nilfs->ns_segctor_sem); + if (ret < 0) + return ret; if (copy_to_user(argp, &argv, sizeof(argv))) ret = -EFAULT; @@ -161,14 +170,13 @@ static int nilfs_ioctl_get_cpinfo(struct inode *inode, struct file *filp, static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; struct nilfs_cpstat cpstat; - struct nilfs_transaction_info ti; int ret; - nilfs_transaction_begin(inode->i_sb, &ti, 0); - ret = nilfs_cpfile_get_stat(cpfile, &cpstat); - nilfs_transaction_end(inode->i_sb, 0); + down_read(&nilfs->ns_segctor_sem); + ret = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat); + up_read(&nilfs->ns_segctor_sem); if (ret < 0) return ret; @@ -189,16 +197,17 @@ static int nilfs_ioctl_get_suinfo(struct inode *inode, struct file *filp, { struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; struct nilfs_argv argv; - struct nilfs_transaction_info ti; int ret; if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - nilfs_transaction_begin(inode->i_sb, &ti, 0); + down_read(&nilfs->ns_segctor_sem); ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_suinfo); - nilfs_transaction_end(inode->i_sb, 0); + up_read(&nilfs->ns_segctor_sem); + if (ret < 0) + return ret; if (copy_to_user(argp, &argv, sizeof(argv))) ret = -EFAULT; @@ -208,14 +217,13 @@ static int nilfs_ioctl_get_suinfo(struct inode *inode, struct file *filp, static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - struct inode *sufile = NILFS_SB(inode->i_sb)->s_nilfs->ns_sufile; + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; struct nilfs_sustat sustat; - struct nilfs_transaction_info ti; int ret; - nilfs_transaction_begin(inode->i_sb, &ti, 0); - ret = nilfs_sufile_get_stat(sufile, &sustat); - nilfs_transaction_end(inode->i_sb, 0); + down_read(&nilfs->ns_segctor_sem); + ret = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat); + up_read(&nilfs->ns_segctor_sem); if (ret < 0) return ret; @@ -236,16 +244,17 @@ static int nilfs_ioctl_get_vinfo(struct inode *inode, struct file *filp, { struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; struct nilfs_argv argv; - struct nilfs_transaction_info ti; int ret; if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - nilfs_transaction_begin(inode->i_sb, &ti, 0); + down_read(&nilfs->ns_segctor_sem); ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_vinfo); - nilfs_transaction_end(inode->i_sb, 0); + up_read(&nilfs->ns_segctor_sem); + if (ret < 0) + return ret; if (copy_to_user(argp, &argv, sizeof(argv))) ret = -EFAULT; @@ -280,16 +289,17 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, { struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; struct nilfs_argv argv; - struct nilfs_transaction_info ti; int ret; if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - nilfs_transaction_begin(inode->i_sb, &ti, 0); + down_read(&nilfs->ns_segctor_sem); ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_bdescs); - nilfs_transaction_end(inode->i_sb, 0); + up_read(&nilfs->ns_segctor_sem); + if (ret < 0) + return ret; if (copy_to_user(argp, &argv, sizeof(argv))) ret = -EFAULT; diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 6ab84757861..e0a632b86fe 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -123,7 +123,10 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, brelse(bh); failed_unlock: - nilfs_transaction_end(sb, !err); + if (likely(!err)) + err = nilfs_transaction_commit(sb); + else + nilfs_transaction_abort(sb); if (writer) nilfs_put_writer(nilfs); out: diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 95d1b29bff3..df70dadb336 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -109,7 +109,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, { struct inode *inode; struct nilfs_transaction_info ti; - int err, err2; + int err; err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) @@ -123,8 +123,12 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, mark_inode_dirty(inode); err = nilfs_add_nondir(dentry, inode); } - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; } static int @@ -132,7 +136,7 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) { struct inode *inode; struct nilfs_transaction_info ti; - int err, err2; + int err; if (!new_valid_dev(rdev)) return -EINVAL; @@ -147,8 +151,12 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) mark_inode_dirty(inode); err = nilfs_add_nondir(dentry, inode); } - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; } static int nilfs_symlink(struct inode *dir, struct dentry *dentry, @@ -158,7 +166,7 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry, struct super_block *sb = dir->i_sb; unsigned l = strlen(symname)+1; struct inode *inode; - int err, err2; + int err; if (l > sb->s_blocksize) return -ENAMETOOLONG; @@ -184,8 +192,12 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry, err = nilfs_add_nondir(dentry, inode); out: - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; out_fail: inode_dec_link_count(inode); @@ -198,7 +210,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, { struct inode *inode = old_dentry->d_inode; struct nilfs_transaction_info ti; - int err, err2; + int err; if (inode->i_nlink >= NILFS_LINK_MAX) return -EMLINK; @@ -212,15 +224,19 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, atomic_inc(&inode->i_count); err = nilfs_add_nondir(dentry, inode); - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; } static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { struct inode *inode; struct nilfs_transaction_info ti; - int err, err2; + int err; if (dir->i_nlink >= NILFS_LINK_MAX) return -EMLINK; @@ -252,8 +268,12 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); out: - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; out_fail: inode_dec_link_count(inode); @@ -270,7 +290,7 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry) struct nilfs_dir_entry *de; struct page *page; struct nilfs_transaction_info ti; - int err, err2; + int err; err = nilfs_transaction_begin(dir->i_sb, &ti, 0); if (err) @@ -300,15 +320,19 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry) inode_dec_link_count(inode); err = 0; out: - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; } static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; struct nilfs_transaction_info ti; - int err, err2; + int err; err = nilfs_transaction_begin(dir->i_sb, &ti, 0); if (err) @@ -323,8 +347,12 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) inode_dec_link_count(dir); } } - err2 = nilfs_transaction_end(dir->i_sb, !err); - return err ? : err2; + if (!err) + err = nilfs_transaction_commit(dir->i_sb); + else + nilfs_transaction_abort(dir->i_sb); + + return err; } static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -404,7 +432,7 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, inode_dec_link_count(old_dir); } - err = nilfs_transaction_end(old_dir->i_sb, 1); + err = nilfs_transaction_commit(old_dir->i_sb); return err; out_dir: @@ -416,7 +444,7 @@ out_old: kunmap(old_page); page_cache_release(old_page); out: - nilfs_transaction_end(old_dir->i_sb, 0); + nilfs_transaction_abort(old_dir->i_sb); return err; } diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 17458ad4a80..48c070676cc 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -166,7 +166,8 @@ struct nilfs_transaction_info { int nilfs_transaction_begin(struct super_block *, struct nilfs_transaction_info *, int); -int nilfs_transaction_end(struct super_block *, int); +int nilfs_transaction_commit(struct super_block *); +void nilfs_transaction_abort(struct super_block *); static inline void nilfs_set_transaction_flag(unsigned int flag) { diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index ad65a737aff..6d66c5cb7b5 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -163,8 +163,8 @@ static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) else { /* * If journal_info field is occupied by other FS, - * we save it and restore on nilfs_transaction_end(). - * But this should never happen. + * it is saved and will be restored on + * nilfs_transaction_commit(). */ printk(KERN_WARNING "NILFS warning: journal info from a different " @@ -195,7 +195,7 @@ static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) * * nilfs_transaction_begin() acquires a reader/writer semaphore, called * the segment semaphore, to make a segment construction and write tasks - * exclusive. The function is used with nilfs_transaction_end() in pairs. + * exclusive. The function is used with nilfs_transaction_commit() in pairs. * The region enclosed by these two functions can be nested. To avoid a * deadlock, the semaphore is only acquired or released in the outermost call. * @@ -212,8 +212,6 @@ static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) * * %-ENOMEM - Insufficient memory available. * - * %-ERESTARTSYS - Interrupted - * * %-ENOSPC - No space left on device */ int nilfs_transaction_begin(struct super_block *sb, @@ -248,16 +246,17 @@ int nilfs_transaction_begin(struct super_block *sb, } /** - * nilfs_transaction_end - end indivisible file operations. + * nilfs_transaction_commit - commit indivisible file operations. * @sb: super block - * @commit: commit flag (0 for no change) * - * nilfs_transaction_end() releases the read semaphore which is - * acquired by nilfs_transaction_begin(). Its releasing is only done - * in outermost call of this function. If the nilfs_transaction_info - * was allocated dynamically, it is given back to a slab cache. + * nilfs_transaction_commit() releases the read semaphore which is + * acquired by nilfs_transaction_begin(). This is only performed + * in outermost call of this function. If a commit flag is set, + * nilfs_transaction_commit() sets a timer to start the segment + * constructor. If a sync flag is set, it starts construction + * directly. */ -int nilfs_transaction_end(struct super_block *sb, int commit) +int nilfs_transaction_commit(struct super_block *sb) { struct nilfs_transaction_info *ti = current->journal_info; struct nilfs_sb_info *sbi; @@ -265,9 +264,7 @@ int nilfs_transaction_end(struct super_block *sb, int commit) int err = 0; BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); - - if (commit) - ti->ti_flags |= NILFS_TI_COMMIT; + ti->ti_flags |= NILFS_TI_COMMIT; if (ti->ti_count > 0) { ti->ti_count--; return 0; @@ -291,6 +288,22 @@ int nilfs_transaction_end(struct super_block *sb, int commit) return err; } +void nilfs_transaction_abort(struct super_block *sb) +{ + struct nilfs_transaction_info *ti = current->journal_info; + + BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); + if (ti->ti_count > 0) { + ti->ti_count--; + return; + } + up_read(&NILFS_SB(sb)->s_nilfs->ns_segctor_sem); + + current->journal_info = ti->ti_save; + if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) + kmem_cache_free(nilfs_transaction_cachep, ti); +} + void nilfs_relax_pressure_in_lock(struct super_block *sb) { struct nilfs_sb_info *sbi = NILFS_SB(sb); diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index dee8d83e054..9cd3c113f05 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -112,8 +112,8 @@ struct the_nilfs { /* * Following fields are dedicated to a writable FS-instance. * Except for the period seeking checkpoint, code outside the segment - * constructor must lock a segment semaphore with transaction_begin() - * and transaction_end(), when accessing these fields. + * constructor must lock a segment semaphore while accessing these + * fields. * The writable FS-instance is sole during a lifetime of the_nilfs. */ u64 ns_seg_seq; -- cgit v1.2.3 From 047180f2d7bf95ff5699ca04da639a7556e23435 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:45 -0700 Subject: nilfs2: insert explanations in gcinode file The file gcinode.c gives buffer cache functions for on-disk blocks moved in garbage collection. Joern Engel has suggested inserting its explanations in the source file (Message-ID: <20080917144146.GD8750@logfs.org> and <20080917224953.GB14644@logfs.org>). This follows the comment. Cc: Joern Engel Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/gcinode.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 00139526632..77615aabc7e 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -1,5 +1,5 @@ /* - * gcinode.c - NILFS memory inode for GC + * gcinode.c - dummy inodes to buffer blocks for garbage collection * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * @@ -22,6 +22,25 @@ * Revised by Ryusuke Konishi . * */ +/* + * This file adds the cache of on-disk blocks to be moved in garbage + * collection. The disk blocks are held with dummy inodes (called + * gcinodes), and this file provides lookup function of the dummy + * inodes and their buffer read function. + * + * Since NILFS2 keeps up multiple checkpoints/snapshots accross GC, it + * has to treat blocks that belong to a same file but have different + * checkpoint numbers. To avoid interference among generations, dummy + * inodes are managed separatly from actual inodes, and their lookup + * function (nilfs_gc_iget) is designed to be specified with a + * checkpoint number argument as well as an inode number. + * + * Buffers and pages held by the dummy inodes will be released each + * time after they are copied to a new log. Dirty blocks made on the + * current generation and the blocks to be moved by GC never overlap + * because the dirty blocks make a new generation; they rather must be + * written individually. + */ #include #include -- cgit v1.2.3 From b028fcfc4cd198a6aa1ffcfb872073ccc1db3459 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:47 -0700 Subject: nilfs2: fix gc failure on volumes keeping numerous snapshots This resolves the following failure of nilfs2 cleaner daemon: nilfs_cleanerd[20670]: cannot clean segments: No such file or directory nilfs_cleanerd[20670]: shutdown When creating thousands of snapshots, the cleaner daemon had rarely died as above due to an error returned from the kernel code. After applying the recent patch which fixed memory allocation problems in ioctl (Message-Id: <20081215.155840.105124170.ryusuke@osrg.net>), the problem gets more frequent. It turned out to be a bug of nilfs_ioctl_wrap_copy function and one of its callback routines to read out information of snapshots; if the nilfs_ioctl_wrap_copy function divided a large read request into multiple requests, the second and later requests have failed since a restart position on snapshot meta data was not properly set forward. It's a deficiency of the callback interface that cannot pass the restart position among multiple requests. This patch fixes the issue by allowing nilfs_ioctl_wrap_copy and snapshot read functions to exchange a position argument. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/cpfile.c | 24 ++++++++++++++---------- fs/nilfs2/cpfile.h | 2 +- fs/nilfs2/ioctl.c | 38 +++++++++++++++++++++++--------------- 3 files changed, 38 insertions(+), 26 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 82462acd06e..a4c9550fd77 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -422,20 +422,20 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 cno, return ret; } -static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 cno, +static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, struct nilfs_cpinfo *ci, size_t nci) { struct buffer_head *bh; struct nilfs_cpfile_header *header; struct nilfs_checkpoint *cp; - __u64 curr, next; + __u64 curr = *cnop, next; unsigned long curr_blkoff, next_blkoff; void *kaddr; int n, ret; down_read(&NILFS_MDT(cpfile)->mi_sem); - if (cno == 0) { + if (curr == 0) { ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out; @@ -448,8 +448,11 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 cno, ret = 0; goto out; } - } else - curr = cno; + } else if (unlikely(curr == ~(__u64)0)) { + ret = 0; + goto out; + } + curr_blkoff = nilfs_cpfile_get_blkoff(cpfile, curr); ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &bh); if (ret < 0) @@ -461,7 +464,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 cno, nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, &ci[n]); next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); if (next == 0) { - curr = next; + curr = ~(__u64)0; /* Terminator */ n++; break; } @@ -480,6 +483,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 cno, } kunmap_atomic(kaddr, KM_USER0); brelse(bh); + *cnop = curr; ret = n; out: @@ -494,15 +498,15 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 cno, * @ci: * @nci: */ -ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, - __u64 cno, int mode, + +ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, struct nilfs_cpinfo *ci, size_t nci) { switch (mode) { case NILFS_CHECKPOINT: - return nilfs_cpfile_do_get_cpinfo(cpfile, cno, ci, nci); + return nilfs_cpfile_do_get_cpinfo(cpfile, *cnop, ci, nci); case NILFS_SNAPSHOT: - return nilfs_cpfile_do_get_ssinfo(cpfile, cno, ci, nci); + return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, ci, nci); default: return -EINVAL; } diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index f097e90fcb2..1a8a1008c34 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -39,7 +39,7 @@ int nilfs_cpfile_delete_checkpoint(struct inode *, __u64); int nilfs_cpfile_change_cpmode(struct inode *, __u64, int); int nilfs_cpfile_is_snapshot(struct inode *, __u64); int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); -ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64, int, +ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, struct nilfs_cpinfo *, size_t); #endif /* _NILFS_CPFILE_H */ diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 5ce06a01c7e..d9e3990f958 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -37,13 +37,14 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, struct nilfs_argv *argv, int dir, ssize_t (*dofunc)(struct the_nilfs *, - int, int, + __u64 *, int, void *, size_t, size_t)) { void *buf; size_t maxmembs, total, n; ssize_t nr; int ret, i; + __u64 pos, ppos; if (argv->v_nmembs == 0) return 0; @@ -58,6 +59,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, ret = 0; total = 0; + pos = argv->v_index; for (i = 0; i < argv->v_nmembs; i += n) { n = (argv->v_nmembs - i < maxmembs) ? argv->v_nmembs - i : maxmembs; @@ -68,8 +70,9 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, ret = -EFAULT; break; } - nr = (*dofunc)(nilfs, argv->v_index + i, argv->v_flags, buf, - argv->v_size, n); + ppos = pos; + nr = (*dofunc)(nilfs, &pos, argv->v_flags, buf, argv->v_size, + n); if (nr < 0) { ret = nr; break; @@ -82,6 +85,10 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, break; } total += nr; + if ((size_t)nr < n) + break; + if (pos == ppos) + pos += n; } argv->v_nmembs = total; @@ -138,10 +145,10 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, } static ssize_t -nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, int index, int flags, +nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - return nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, index, flags, buf, + return nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf, nmembs); } @@ -186,10 +193,10 @@ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, } static ssize_t -nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, int index, int flags, +nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - return nilfs_sufile_get_suinfo(nilfs->ns_sufile, index, buf, nmembs); + return nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs); } static int nilfs_ioctl_get_suinfo(struct inode *inode, struct file *filp, @@ -233,7 +240,7 @@ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, } static ssize_t -nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, int index, int flags, +nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { return nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); @@ -262,7 +269,7 @@ static int nilfs_ioctl_get_vinfo(struct inode *inode, struct file *filp, } static ssize_t -nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, int index, int flags, +nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { struct inode *dat = nilfs_dat_inode(nilfs); @@ -341,7 +348,7 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, } static ssize_t -nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, int index, int flags, +nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { struct inode *inode; @@ -413,7 +420,7 @@ static inline int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, } static ssize_t -nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, int index, +nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { @@ -439,7 +446,7 @@ static inline int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, } static ssize_t -nilfs_ioctl_do_free_vblocknrs(struct the_nilfs *nilfs, int index, int flags, +nilfs_ioctl_do_free_vblocknrs(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { int ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); @@ -456,8 +463,9 @@ static inline int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, } static ssize_t -nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, int index, int flags, - void *buf, size_t size, size_t nmembs) +nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, + int flags, void *buf, size_t size, + size_t nmembs) { struct inode *dat = nilfs_dat_inode(nilfs); struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; @@ -506,7 +514,7 @@ static inline int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, } static ssize_t -nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, int index, int flags, +nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { struct nilfs_sb_info *sbi = nilfs_get_writer(nilfs); -- cgit v1.2.3 From 7fa10d20012296300dfe645cb3e628a4e9a0d5ef Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:48 -0700 Subject: nilfs2: fix improper return values of nilfs_get_cpinfo ioctl A few tool developers gave me requests for fixing inconvenient return value of nilfs_get_cpinfo() ioctl; if the requested mode is NILFS_SNAPSHOT and the specified start entry is not a snapshot, the ioctl unnaturally returns one as the number of acquired snapshot item. In addition, the ioctl function returns an ENOENT error for checkpoints within blocks deleted by garbage collection. These behaviors require corrections for programs which enumerate snapshots. This resolves the inconvenience by changing the return values to zero for the above cases. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/cpfile.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index a4c9550fd77..50dff147744 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -431,7 +431,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, __u64 curr = *cnop, next; unsigned long curr_blkoff, next_blkoff; void *kaddr; - int n, ret; + int n = 0, ret; down_read(&NILFS_MDT(cpfile)->mi_sem); @@ -455,27 +455,33 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, curr_blkoff = nilfs_cpfile_get_blkoff(cpfile, curr); ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &bh); - if (ret < 0) + if (unlikely(ret < 0)) { + if (ret == -ENOENT) + ret = 0; /* No snapshots (started from a hole block) */ goto out; + } kaddr = kmap_atomic(bh->b_page, KM_USER0); - for (n = 0; n < nci; n++) { - cp = nilfs_cpfile_block_get_checkpoint( - cpfile, curr, bh, kaddr); - nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, &ci[n]); - next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); - if (next == 0) { - curr = ~(__u64)0; /* Terminator */ - n++; + while (n < nci) { + cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr); + curr = ~(__u64)0; /* Terminator */ + if (unlikely(nilfs_checkpoint_invalid(cp) || + !nilfs_checkpoint_snapshot(cp))) break; - } + nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, &ci[n++]); + next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); + if (next == 0) + break; /* reach end of the snapshot list */ + next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); if (curr_blkoff != next_blkoff) { kunmap_atomic(kaddr, KM_USER0); brelse(bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, &bh); - if (ret < 0) + if (unlikely(ret < 0)) { + WARN_ON(ret == -ENOENT); goto out; + } kaddr = kmap_atomic(bh->b_page, KM_USER0); } curr = next; -- cgit v1.2.3 From 8acfbf0939e98cc77dab94c24899c9930ddd1e13 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 6 Apr 2009 19:01:49 -0700 Subject: nilfs2: clean up indirect function calling conventions This cleans up the strange indirect function calling convention used in nilfs to follow the normal kernel coding style. Signed-off-by: Pekka Enberg Acked-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/bmap.c | 46 ++++++++++++++++++++-------------------- fs/nilfs2/btree.c | 62 +++++++++++++++++++++++++++--------------------------- fs/nilfs2/direct.c | 28 ++++++++++++------------ fs/nilfs2/ioctl.c | 2 +- 4 files changed, 69 insertions(+), 69 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 6fe72addc9c..24638e059bf 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -38,11 +38,11 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, int ret; down_read(&bmap->b_sem); - ret = (*bmap->b_ops->bop_lookup)(bmap, key, level, ptrp); + ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); if (ret < 0) goto out; if (bmap->b_pops->bpop_translate != NULL) { - ret = (*bmap->b_pops->bpop_translate)(bmap, *ptrp, &ptr); + ret = bmap->b_pops->bpop_translate(bmap, *ptrp, &ptr); if (ret < 0) goto out; *ptrp = ptr; @@ -94,9 +94,9 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) int ret, n; if (bmap->b_ops->bop_check_insert != NULL) { - ret = (*bmap->b_ops->bop_check_insert)(bmap, key); + ret = bmap->b_ops->bop_check_insert(bmap, key); if (ret > 0) { - n = (*bmap->b_ops->bop_gather_data)( + n = bmap->b_ops->bop_gather_data( bmap, keys, ptrs, NILFS_BMAP_SMALL_HIGH + 1); if (n < 0) return n; @@ -111,7 +111,7 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) return ret; } - return (*bmap->b_ops->bop_insert)(bmap, key, ptr); + return bmap->b_ops->bop_insert(bmap, key, ptr); } /** @@ -151,9 +151,9 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) int ret, n; if (bmap->b_ops->bop_check_delete != NULL) { - ret = (*bmap->b_ops->bop_check_delete)(bmap, key); + ret = bmap->b_ops->bop_check_delete(bmap, key); if (ret > 0) { - n = (*bmap->b_ops->bop_gather_data)( + n = bmap->b_ops->bop_gather_data( bmap, keys, ptrs, NILFS_BMAP_LARGE_LOW + 1); if (n < 0) return n; @@ -168,7 +168,7 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) return ret; } - return (*bmap->b_ops->bop_delete)(bmap, key); + return bmap->b_ops->bop_delete(bmap, key); } int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key) @@ -177,7 +177,7 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key) int ret; down_read(&bmap->b_sem); - ret = (*bmap->b_ops->bop_last_key)(bmap, &lastkey); + ret = bmap->b_ops->bop_last_key(bmap, &lastkey); if (!ret) *key = lastkey; up_read(&bmap->b_sem); @@ -216,7 +216,7 @@ static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) __u64 lastkey; int ret; - ret = (*bmap->b_ops->bop_last_key)(bmap, &lastkey); + ret = bmap->b_ops->bop_last_key(bmap, &lastkey); if (ret < 0) { if (ret == -ENOENT) ret = 0; @@ -227,7 +227,7 @@ static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) ret = nilfs_bmap_do_delete(bmap, lastkey); if (ret < 0) return ret; - ret = (*bmap->b_ops->bop_last_key)(bmap, &lastkey); + ret = bmap->b_ops->bop_last_key(bmap, &lastkey); if (ret < 0) { if (ret == -ENOENT) ret = 0; @@ -272,7 +272,7 @@ void nilfs_bmap_clear(struct nilfs_bmap *bmap) { down_write(&bmap->b_sem); if (bmap->b_ops->bop_clear != NULL) - (*bmap->b_ops->bop_clear)(bmap); + bmap->b_ops->bop_clear(bmap); up_write(&bmap->b_sem); } @@ -296,7 +296,7 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) int ret; down_write(&bmap->b_sem); - ret = (*bmap->b_ops->bop_propagate)(bmap, bh); + ret = bmap->b_ops->bop_propagate(bmap, bh); up_write(&bmap->b_sem); return ret; } @@ -310,7 +310,7 @@ void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap, struct list_head *listp) { if (bmap->b_ops->bop_lookup_dirty_buffers != NULL) - (*bmap->b_ops->bop_lookup_dirty_buffers)(bmap, listp); + bmap->b_ops->bop_lookup_dirty_buffers(bmap, listp); } /** @@ -340,7 +340,7 @@ int nilfs_bmap_assign(struct nilfs_bmap *bmap, int ret; down_write(&bmap->b_sem); - ret = (*bmap->b_ops->bop_assign)(bmap, bh, blocknr, binfo); + ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo); up_write(&bmap->b_sem); return ret; } @@ -369,7 +369,7 @@ int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) return 0; down_write(&bmap->b_sem); - ret = (*bmap->b_ops->bop_mark)(bmap, key, level); + ret = bmap->b_ops->bop_mark(bmap, key, level); up_write(&bmap->b_sem); return ret; } @@ -572,12 +572,12 @@ int nilfs_bmap_prepare_update(struct nilfs_bmap *bmap, { int ret; - ret = (*bmap->b_pops->bpop_prepare_end_ptr)(bmap, oldreq); + ret = bmap->b_pops->bpop_prepare_end_ptr(bmap, oldreq); if (ret < 0) return ret; - ret = (*bmap->b_pops->bpop_prepare_alloc_ptr)(bmap, newreq); + ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, newreq); if (ret < 0) - (*bmap->b_pops->bpop_abort_end_ptr)(bmap, oldreq); + bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq); return ret; } @@ -586,16 +586,16 @@ void nilfs_bmap_commit_update(struct nilfs_bmap *bmap, union nilfs_bmap_ptr_req *oldreq, union nilfs_bmap_ptr_req *newreq) { - (*bmap->b_pops->bpop_commit_end_ptr)(bmap, oldreq); - (*bmap->b_pops->bpop_commit_alloc_ptr)(bmap, newreq); + bmap->b_pops->bpop_commit_end_ptr(bmap, oldreq); + bmap->b_pops->bpop_commit_alloc_ptr(bmap, newreq); } void nilfs_bmap_abort_update(struct nilfs_bmap *bmap, union nilfs_bmap_ptr_req *oldreq, union nilfs_bmap_ptr_req *newreq) { - (*bmap->b_pops->bpop_abort_end_ptr)(bmap, oldreq); - (*bmap->b_pops->bpop_abort_alloc_ptr)(bmap, newreq); + bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq); + bmap->b_pops->bpop_abort_alloc_ptr(bmap, newreq); } static int nilfs_bmap_translate_v(const struct nilfs_bmap *bmap, __u64 ptr, diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 893f0190a61..53f0d4c31cb 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -902,9 +902,9 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, /* allocate a new ptr for data block */ if (btree->bt_ops->btop_find_target != NULL) path[level].bp_newreq.bpr_ptr = - (*btree->bt_ops->btop_find_target)(btree, path, key); + btree->bt_ops->btop_find_target(btree, path, key); - ret = (*btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr)( + ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( &btree->bt_bmap, &path[level].bp_newreq); if (ret < 0) goto err_out_data; @@ -965,7 +965,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, /* split */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; - ret = (*btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr)( + ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( &btree->bt_bmap, &path[level].bp_newreq); if (ret < 0) goto err_out_child_node; @@ -997,7 +997,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, /* grow */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; - ret = (*btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr)( + ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( &btree->bt_bmap, &path[level].bp_newreq); if (ret < 0) goto err_out_child_node; @@ -1026,17 +1026,17 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, /* error */ err_out_curr_node: - (*btree->bt_bmap.b_pops->bpop_abort_alloc_ptr)(&btree->bt_bmap, - &path[level].bp_newreq); + btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, + &path[level].bp_newreq); err_out_child_node: for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); - (*btree->bt_bmap.b_pops->bpop_abort_alloc_ptr)( + btree->bt_bmap.b_pops->bpop_abort_alloc_ptr( &btree->bt_bmap, &path[level].bp_newreq); } - (*btree->bt_bmap.b_pops->bpop_abort_alloc_ptr)(&btree->bt_bmap, + btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq); err_out_data: *levelp = level; @@ -1053,14 +1053,14 @@ static void nilfs_btree_commit_insert(struct nilfs_btree *btree, set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; if (btree->bt_ops->btop_set_target != NULL) - (*btree->bt_ops->btop_set_target)(btree, key, ptr); + btree->bt_ops->btop_set_target(btree, key, ptr); for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) { - (*btree->bt_bmap.b_pops->bpop_commit_alloc_ptr)( + btree->bt_bmap.b_pops->bpop_commit_alloc_ptr( &btree->bt_bmap, &path[level - 1].bp_newreq); } - (*path[level].bp_op)(btree, path, level, &key, &ptr); + path[level].bp_op(btree, path, level, &key, &ptr); } if (!nilfs_bmap_dirty(&btree->bt_bmap)) @@ -1304,7 +1304,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { - ret = (*btree->bt_bmap.b_pops->bpop_prepare_end_ptr)( + ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( &btree->bt_bmap, &path[level].bp_oldreq); if (ret < 0) goto err_out_child_node; @@ -1385,7 +1385,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { - ret = (*btree->bt_bmap.b_pops->bpop_prepare_end_ptr)( + ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( &btree->bt_bmap, &path[level].bp_oldreq); if (ret < 0) goto err_out_child_node; @@ -1402,13 +1402,13 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, /* error */ err_out_curr_node: if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) - (*btree->bt_bmap.b_pops->bpop_abort_end_ptr)( + btree->bt_bmap.b_pops->bpop_abort_end_ptr( &btree->bt_bmap, &path[level].bp_oldreq); err_out_child_node: for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) - (*btree->bt_bmap.b_pops->bpop_abort_end_ptr)( + btree->bt_bmap.b_pops->bpop_abort_end_ptr( &btree->bt_bmap, &path[level].bp_oldreq); } *levelp = level; @@ -1424,9 +1424,9 @@ static void nilfs_btree_commit_delete(struct nilfs_btree *btree, for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL) - (*btree->bt_bmap.b_pops->bpop_commit_end_ptr)( + btree->bt_bmap.b_pops->bpop_commit_end_ptr( &btree->bt_bmap, &path[level].bp_oldreq); - (*path[level].bp_op)(btree, path, level, NULL, NULL); + path[level].bp_op(btree, path, level, NULL, NULL); } if (!nilfs_bmap_dirty(&btree->bt_bmap)) @@ -1589,8 +1589,8 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, /* cannot find near ptr */ if (btree->bt_ops->btop_find_target != NULL) dreq->bpr_ptr - = (*btree->bt_ops->btop_find_target)(btree, NULL, key); - ret = (*bmap->b_pops->bpop_prepare_alloc_ptr)(bmap, dreq); + = btree->bt_ops->btop_find_target(btree, NULL, key); + ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq); if (ret < 0) return ret; @@ -1598,7 +1598,7 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, stats->bs_nblocks++; if (nreq != NULL) { nreq->bpr_ptr = dreq->bpr_ptr + 1; - ret = (*bmap->b_pops->bpop_prepare_alloc_ptr)(bmap, nreq); + ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq); if (ret < 0) goto err_out_dreq; @@ -1615,9 +1615,9 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, /* error */ err_out_nreq: - (*bmap->b_pops->bpop_abort_alloc_ptr)(bmap, nreq); + bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq); err_out_dreq: - (*bmap->b_pops->bpop_abort_alloc_ptr)(bmap, dreq); + bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq); stats->bs_nblocks = 0; return ret; @@ -1638,7 +1638,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, /* free resources */ if (bmap->b_ops->bop_clear != NULL) - (*bmap->b_ops->bop_clear)(bmap); + bmap->b_ops->bop_clear(bmap); /* ptr must be a pointer to a buffer head. */ set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); @@ -1648,8 +1648,8 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, nilfs_btree_init(bmap, low, high); if (nreq != NULL) { if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) { - (*bmap->b_pops->bpop_commit_alloc_ptr)(bmap, dreq); - (*bmap->b_pops->bpop_commit_alloc_ptr)(bmap, nreq); + bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq); + bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq); } /* create child node at level 1 */ @@ -1673,7 +1673,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, 2, 1, &keys[0], &tmpptr); } else { if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) - (*bmap->b_pops->bpop_commit_alloc_ptr)(bmap, dreq); + bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq); /* create root node at level 1 */ node = nilfs_btree_get_root(btree); @@ -1686,7 +1686,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, } if (btree->bt_ops->btop_set_target != NULL) - (*btree->bt_ops->btop_set_target)(btree, key, dreq->bpr_ptr); + btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr); } /** @@ -1937,7 +1937,7 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, goto out; } - ret = (*btree->bt_ops->btop_propagate)(btree, path, level, bh); + ret = btree->bt_ops->btop_propagate(btree, path, level, bh); out: nilfs_btree_clear_path(btree, path); @@ -2073,11 +2073,11 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree, ptr = nilfs_btree_node_get_ptr(btree, parent, path[level + 1].bp_index); req.bpr_ptr = ptr; - ret = (*btree->bt_bmap.b_pops->bpop_prepare_start_ptr)(&btree->bt_bmap, + ret = btree->bt_bmap.b_pops->bpop_prepare_start_ptr(&btree->bt_bmap, &req); if (ret < 0) return ret; - (*btree->bt_bmap.b_pops->bpop_commit_start_ptr)(&btree->bt_bmap, + btree->bt_bmap.b_pops->bpop_commit_start_ptr(&btree->bt_bmap, &req, blocknr); key = nilfs_btree_node_get_key(btree, parent, @@ -2121,7 +2121,7 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap, goto out; } - ret = (*btree->bt_ops->btop_assign)(btree, path, level, bh, + ret = btree->bt_ops->btop_assign(btree, path, level, bh, blocknr, binfo); out: diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 303d7f1982f..e3ec2485008 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -91,8 +91,8 @@ static int nilfs_direct_prepare_insert(struct nilfs_direct *direct, int ret; if (direct->d_ops->dop_find_target != NULL) - req->bpr_ptr = (*direct->d_ops->dop_find_target)(direct, key); - ret = (*direct->d_bmap.b_pops->bpop_prepare_alloc_ptr)(&direct->d_bmap, + req->bpr_ptr = direct->d_ops->dop_find_target(direct, key); + ret = direct->d_bmap.b_pops->bpop_prepare_alloc_ptr(&direct->d_bmap, req); if (ret < 0) return ret; @@ -112,7 +112,7 @@ static void nilfs_direct_commit_insert(struct nilfs_direct *direct, set_buffer_nilfs_volatile(bh); if (direct->d_bmap.b_pops->bpop_commit_alloc_ptr != NULL) - (*direct->d_bmap.b_pops->bpop_commit_alloc_ptr)( + direct->d_bmap.b_pops->bpop_commit_alloc_ptr( &direct->d_bmap, req); nilfs_direct_set_ptr(direct, key, req->bpr_ptr); @@ -120,7 +120,7 @@ static void nilfs_direct_commit_insert(struct nilfs_direct *direct, nilfs_bmap_set_dirty(&direct->d_bmap); if (direct->d_ops->dop_set_target != NULL) - (*direct->d_ops->dop_set_target)(direct, key, req->bpr_ptr); + direct->d_ops->dop_set_target(direct, key, req->bpr_ptr); } static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) @@ -154,7 +154,7 @@ static int nilfs_direct_prepare_delete(struct nilfs_direct *direct, if (direct->d_bmap.b_pops->bpop_prepare_end_ptr != NULL) { req->bpr_ptr = nilfs_direct_get_ptr(direct, key); - ret = (*direct->d_bmap.b_pops->bpop_prepare_end_ptr)( + ret = direct->d_bmap.b_pops->bpop_prepare_end_ptr( &direct->d_bmap, req); if (ret < 0) return ret; @@ -169,7 +169,7 @@ static void nilfs_direct_commit_delete(struct nilfs_direct *direct, __u64 key) { if (direct->d_bmap.b_pops->bpop_commit_end_ptr != NULL) - (*direct->d_bmap.b_pops->bpop_commit_end_ptr)( + direct->d_bmap.b_pops->bpop_commit_end_ptr( &direct->d_bmap, req); nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR); } @@ -255,13 +255,13 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, /* no need to allocate any resource for conversion */ /* delete */ - ret = (*bmap->b_ops->bop_delete)(bmap, key); + ret = bmap->b_ops->bop_delete(bmap, key); if (ret < 0) return ret; /* free resources */ if (bmap->b_ops->bop_clear != NULL) - (*bmap->b_ops->bop_clear)(bmap); + bmap->b_ops->bop_clear(bmap); /* convert */ direct = (struct nilfs_direct *)bmap; @@ -314,7 +314,7 @@ static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, direct = (struct nilfs_direct *)bmap; return (direct->d_ops->dop_propagate != NULL) ? - (*direct->d_ops->dop_propagate)(direct, bh) : + direct->d_ops->dop_propagate(direct, bh) : 0; } @@ -328,12 +328,12 @@ static int nilfs_direct_assign_v(struct nilfs_direct *direct, int ret; req.bpr_ptr = ptr; - ret = (*direct->d_bmap.b_pops->bpop_prepare_start_ptr)( + ret = direct->d_bmap.b_pops->bpop_prepare_start_ptr( &direct->d_bmap, &req); if (ret < 0) return ret; - (*direct->d_bmap.b_pops->bpop_commit_start_ptr)(&direct->d_bmap, - &req, blocknr); + direct->d_bmap.b_pops->bpop_commit_start_ptr(&direct->d_bmap, + &req, blocknr); binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); @@ -370,8 +370,8 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap, ptr = nilfs_direct_get_ptr(direct, key); BUG_ON(ptr == NILFS_BMAP_INVALID_PTR); - return (*direct->d_ops->dop_assign)(direct, key, ptr, bh, - blocknr, binfo); + return direct->d_ops->dop_assign(direct, key, ptr, bh, + blocknr, binfo); } static const struct nilfs_bmap_operations nilfs_direct_ops = { diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index d9e3990f958..9e4d9e64c8f 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -71,7 +71,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, break; } ppos = pos; - nr = (*dofunc)(nilfs, &pos, argv->v_flags, buf, argv->v_size, + nr = dofunc(nilfs, &pos, argv->v_flags, buf, argv->v_size, n); if (nr < 0) { ret = nr; -- cgit v1.2.3 From 76068c4ff1cc03d9d24d17fd9e6a1475bc2f6730 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:50 -0700 Subject: nilfs2: fix buggy behavior seen in enumerating checkpoints This will fix the weird behavior of lscp command in listing continuously created checkpoints; the output of lscp is rewinded regularly for the recent nilfs. As a result of debugging, a defect was found in nilfs_cpfile_do_get_cpinfo() function. Though the function can be repeatedly called to enumerate checkpoints and it can skip invalid checkpoint entries, the index value was not carried between successive calls. The bug has long been present, and came to surface after applying a bugfix nilfs2-fix-problems-of-memory-allocation-in-ioctl.patch, which increased frequency of calling the function. The similar bugfix was already applied for ``snapshots'' by nilfs2-fix-gc-failure-on-volumes-keeping-numerous-snapshots.patch. This fixes the problem by making the index argument bidirectional on the function. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/cpfile.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 50dff147744..218b3441850 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -382,13 +382,13 @@ static void nilfs_cpfile_checkpoint_to_cpinfo(struct inode *cpfile, ci->ci_next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); } -static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 cno, +static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, struct nilfs_cpinfo *ci, size_t nci) { struct nilfs_checkpoint *cp; struct buffer_head *bh; size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; - __u64 cur_cno = nilfs_mdt_cno(cpfile); + __u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop; void *kaddr; int n, ret; int ncps, i; @@ -416,6 +416,8 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 cno, } ret = n; + if (n > 0) + *cnop = ci[n - 1].ci_cno + 1; out: up_read(&NILFS_MDT(cpfile)->mi_sem); @@ -510,7 +512,7 @@ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, { switch (mode) { case NILFS_CHECKPOINT: - return nilfs_cpfile_do_get_cpinfo(cpfile, *cnop, ci, nci); + return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, ci, nci); case NILFS_SNAPSHOT: return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, ci, nci); default: @@ -526,13 +528,14 @@ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) { struct nilfs_cpinfo ci; + __u64 tcno = cno; ssize_t nci; int ret; /* checkpoint number 0 is invalid */ if (cno == 0) return -ENOENT; - nci = nilfs_cpfile_do_get_cpinfo(cpfile, cno, &ci, 1); + nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, 1); if (nci < 0) return nci; else if (nci == 0 || ci.ci_cno != cno) -- cgit v1.2.3 From 1088dcf4c3a0a27fdad5214781d5084b11405238 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:51 -0700 Subject: nilfs2: remove timedwait ioctl command This removes NILFS_IOCTL_TIMEDWAIT command from ioctl interface along with the related flags and wait queue. The command is terrible because it just sleeps in the ioctl. I prefer to avoid this by devising means of event polling in userland program. By reconsidering the userland GC daemon, I found this is possible without changing behaviour of the daemon and sacrificing efficiency. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/ioctl.c | 95 +-------------------------------------------------- fs/nilfs2/segment.c | 5 +-- fs/nilfs2/the_nilfs.c | 1 - fs/nilfs2/the_nilfs.h | 6 ---- 4 files changed, 2 insertions(+), 105 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 9e4d9e64c8f..85a291ccc1b 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -578,62 +578,9 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; - - ret = nilfs_clean_segments(inode->i_sb, argp); - clear_nilfs_cond_nongc_write(NILFS_SB(inode->i_sb)->s_nilfs); - return ret; -} - -static int nilfs_ioctl_test_cond(struct the_nilfs *nilfs, int cond) -{ - return (cond & NILFS_TIMEDWAIT_SEG_WRITE) && - nilfs_cond_nongc_write(nilfs); -} - -static void nilfs_ioctl_clear_cond(struct the_nilfs *nilfs, int cond) -{ - if (cond & NILFS_TIMEDWAIT_SEG_WRITE) - clear_nilfs_cond_nongc_write(nilfs); -} - -static int nilfs_ioctl_timedwait(struct inode *inode, struct file *filp, - unsigned int cmd, void __user *argp) -{ - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - struct nilfs_wait_cond wc; - long ret; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&wc, argp, sizeof(wc))) - return -EFAULT; - - unlock_kernel(); - ret = wc.wc_flags ? - wait_event_interruptible_timeout( - nilfs->ns_cleanerd_wq, - nilfs_ioctl_test_cond(nilfs, wc.wc_cond), - timespec_to_jiffies(&wc.wc_timeout)) : - wait_event_interruptible( - nilfs->ns_cleanerd_wq, - nilfs_ioctl_test_cond(nilfs, wc.wc_cond)); - lock_kernel(); - nilfs_ioctl_clear_cond(nilfs, wc.wc_cond); - - if (ret > 0) { - jiffies_to_timespec(ret, &wc.wc_timeout); - if (copy_to_user(argp, &wc, sizeof(wc))) - return -EFAULT; - return 0; - } - if (ret != 0) - return -EINTR; - - return wc.wc_flags ? -ETIME : 0; + return nilfs_clean_segments(inode->i_sb, argp); } static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, @@ -679,8 +626,6 @@ int nilfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, return nilfs_ioctl_get_bdescs(inode, filp, cmd, argp); case NILFS_IOCTL_CLEAN_SEGMENTS: return nilfs_ioctl_clean_segments(inode, filp, cmd, argp); - case NILFS_IOCTL_TIMEDWAIT: - return nilfs_ioctl_timedwait(inode, filp, cmd, argp); case NILFS_IOCTL_SYNC: return nilfs_ioctl_sync(inode, filp, cmd, argp); default: @@ -871,41 +816,6 @@ nilfs_compat_ioctl_clean_segments(struct inode *inode, struct file *filp, inode, filp, cmd, (unsigned long)uargv); } -static int -nilfs_compat_ioctl_timedwait(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct nilfs_wait_cond __user *uwcond; - struct nilfs_wait_cond32 __user *uwcond32; - struct timespec ts; - int cond, flags, ret; - - uwcond = compat_alloc_user_space(sizeof(struct nilfs_wait_cond)); - uwcond32 = compat_ptr(arg); - if (get_user(cond, &uwcond32->wc_cond) || - put_user(cond, &uwcond->wc_cond) || - get_user(flags, &uwcond32->wc_flags) || - put_user(flags, &uwcond->wc_flags) || - get_user(ts.tv_sec, &uwcond32->wc_timeout.tv_sec) || - get_user(ts.tv_nsec, &uwcond32->wc_timeout.tv_nsec) || - put_user(ts.tv_sec, &uwcond->wc_timeout.tv_sec) || - put_user(ts.tv_nsec, &uwcond->wc_timeout.tv_nsec)) - return -EFAULT; - - ret = nilfs_compat_locked_ioctl(inode, filp, cmd, - (unsigned long)uwcond); - if (ret < 0) - return ret; - - if (get_user(ts.tv_sec, &uwcond->wc_timeout.tv_sec) || - get_user(ts.tv_nsec, &uwcond->wc_timeout.tv_nsec) || - put_user(ts.tv_sec, &uwcond32->wc_timeout.tv_sec) || - put_user(ts.tv_nsec, &uwcond32->wc_timeout.tv_nsec)) - return -EFAULT; - - return 0; -} - static int nilfs_compat_ioctl_sync(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -943,9 +853,6 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL32_CLEAN_SEGMENTS: return nilfs_compat_ioctl_clean_segments( inode, filp, NILFS_IOCTL_CLEAN_SEGMENTS, arg); - case NILFS_IOCTL32_TIMEDWAIT: - return nilfs_compat_ioctl_timedwait( - inode, filp, NILFS_IOCTL_TIMEDWAIT, arg); case NILFS_IOCTL_SYNC: return nilfs_compat_ioctl_sync(inode, filp, cmd, arg); default: diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 6d66c5cb7b5..5db12d774a0 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2114,11 +2114,8 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) nilfs_drop_collected_inodes(&sci->sc_gc_inodes); if (update_sr) nilfs_commit_gcdat_inode(nilfs); - } else { + } else nilfs->ns_nongc_ctime = sci->sc_seg_ctime; - set_nilfs_cond_nongc_write(nilfs); - wake_up(&nilfs->ns_cleanerd_wq); - } sci->sc_nblk_inc += sci->sc_nblk_this_inc; diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 852e0bf3a3c..69b62558622 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -73,7 +73,6 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) nilfs->ns_gc_inodes_h = NULL; INIT_LIST_HEAD(&nilfs->ns_used_segments); init_rwsem(&nilfs->ns_segctor_sem); - init_waitqueue_head(&nilfs->ns_cleanerd_wq); return nilfs; } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 9cd3c113f05..75da3730696 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -37,7 +37,6 @@ enum { THE_NILFS_LOADED, /* Roll-back/roll-forward has done and the latest checkpoint was loaded */ THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ - THE_NILFS_COND_NONGC_WRITE, /* Condition to wake up cleanerd */ }; /** @@ -74,7 +73,6 @@ enum { * @ns_gc_dat: shadow inode of the DAT file inode for GC * @ns_gc_inodes: dummy inodes to keep live blocks * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks - * @ns_cleanerd_wq: wait queue for cleanerd * @ns_blocksize_bits: bit length of block size * @ns_nsegments: number of segments in filesystem * @ns_blocks_per_segment: number of blocks per segment @@ -151,9 +149,6 @@ struct the_nilfs { struct list_head ns_gc_inodes; struct hlist_head *ns_gc_inodes_h; - /* cleanerd */ - wait_queue_head_t ns_cleanerd_wq; - /* Disk layout information (static) */ unsigned int ns_blocksize_bits; unsigned long ns_nsegments; @@ -186,7 +181,6 @@ static inline int nilfs_##name(struct the_nilfs *nilfs) \ THE_NILFS_FNS(INIT, init) THE_NILFS_FNS(LOADED, loaded) THE_NILFS_FNS(DISCONTINUED, discontinued) -THE_NILFS_FNS(COND_NONGC_WRITE, cond_nongc_write) void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); struct the_nilfs *alloc_nilfs(struct block_device *); -- cgit v1.2.3 From dc498d09be28172846cacded35ca2378222a8c7b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:52 -0700 Subject: nilfs2: use fixed sized types for ioctl structures Nilfs ioctl had structures not having fixed sized types such as: struct nilfs_argv { void *v_base; size_t v_nmembs; size_t v_size; int v_index; int v_flags; }; Further, some of them are wrongly aligned: e.g. struct nilfs_cpmode { __u64 cm_cno; int cm_mode; }; The size of wrongly aligned structures varies depending on architectures, and it breaks the identity of ioctl commands, which leads to arch dependent errors. Previously, these are compensated by using compat_ioctl. This fixes these problems and allows removal of compat ioctl. Since this will change sizes of those structures, binary compatibility for the past utilities will once break; new utilities have to be used instead. However, it would be helpful to avoid platform dependent problems in the long term. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/ioctl.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 85a291ccc1b..7fbd9fe1d03 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -41,6 +41,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, void *, size_t, size_t)) { void *buf; + void __user *base = (void __user *)(unsigned long)argv->v_base; size_t maxmembs, total, n; ssize_t nr; int ret, i; @@ -64,9 +65,8 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, n = (argv->v_nmembs - i < maxmembs) ? argv->v_nmembs - i : maxmembs; if ((dir & _IOC_WRITE) && - copy_from_user(buf, - (void __user *)argv->v_base + argv->v_size * i, - argv->v_size * n)) { + copy_from_user(buf, base + argv->v_size * i, + argv->v_size * n)) { ret = -EFAULT; break; } @@ -78,9 +78,8 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, break; } if ((dir & _IOC_READ) && - copy_to_user( - (void __user *)argv->v_base + argv->v_size * i, - buf, argv->v_size * nr)) { + copy_to_user(base + argv->v_size * i, buf, + argv->v_size * nr)) { ret = -EFAULT; break; } -- cgit v1.2.3 From 8082d36aed26c4fb6ed43e4008303682eabf839e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:53 -0700 Subject: nilfs2: remove compat ioctl code This removes compat code from the nilfs ioctls and applies the same function for both .ioctl and .compat_ioctl file operations. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/dir.c | 2 +- fs/nilfs2/file.c | 2 +- fs/nilfs2/ioctl.c | 228 ------------------------------------------------------ fs/nilfs2/nilfs.h | 1 - 4 files changed, 2 insertions(+), 231 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 1b7e6ddabbe..393316cd3ca 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -704,7 +704,7 @@ struct file_operations nilfs_dir_operations = { .readdir = nilfs_readdir, .ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = nilfs_compat_ioctl, + .compat_ioctl = nilfs_ioctl, #endif /* CONFIG_COMPAT */ .fsync = nilfs_sync_file, diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index cd38124372f..a2bd962ebd8 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -142,7 +142,7 @@ struct file_operations nilfs_file_operations = { .aio_write = generic_file_aio_write, .ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = nilfs_compat_ioctl, + .compat_ioctl = nilfs_ioctl, #endif /* CONFIG_COMPAT */ .mmap = nilfs_file_mmap, .open = generic_file_open, diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 7fbd9fe1d03..33aff8842ce 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -631,231 +631,3 @@ int nilfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, return -ENOTTY; } } - -/* compat_ioctl */ -#ifdef CONFIG_COMPAT -#include - -static int nilfs_compat_locked_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - int ret; - - lock_kernel(); - ret = nilfs_ioctl(inode, filp, cmd, arg); - unlock_kernel(); - return ret; -} - -static int -nilfs_compat_ioctl_uargv32_to_uargv(struct nilfs_argv32 __user *uargv32, - struct nilfs_argv __user *uargv) -{ - compat_uptr_t base; - compat_size_t nmembs, size; - compat_int_t index, flags; - - if (get_user(base, &uargv32->v_base) || - put_user(compat_ptr(base), &uargv->v_base) || - get_user(nmembs, &uargv32->v_nmembs) || - put_user(nmembs, &uargv->v_nmembs) || - get_user(size, &uargv32->v_size) || - put_user(size, &uargv->v_size) || - get_user(index, &uargv32->v_index) || - put_user(index, &uargv->v_index) || - get_user(flags, &uargv32->v_flags) || - put_user(flags, &uargv->v_flags)) - return -EFAULT; - return 0; -} - -static int -nilfs_compat_ioctl_uargv_to_uargv32(struct nilfs_argv __user *uargv, - struct nilfs_argv32 __user *uargv32) -{ - size_t nmembs; - - if (get_user(nmembs, &uargv->v_nmembs) || - put_user(nmembs, &uargv32->v_nmembs)) - return -EFAULT; - return 0; -} - -static int -nilfs_compat_ioctl_get_by_argv(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct nilfs_argv __user *uargv; - struct nilfs_argv32 __user *uargv32; - int ret; - - uargv = compat_alloc_user_space(sizeof(struct nilfs_argv)); - uargv32 = compat_ptr(arg); - ret = nilfs_compat_ioctl_uargv32_to_uargv(uargv32, uargv); - if (ret < 0) - return ret; - - ret = nilfs_compat_locked_ioctl(inode, filp, cmd, (unsigned long)uargv); - if (ret < 0) - return ret; - - return nilfs_compat_ioctl_uargv_to_uargv32(uargv, uargv32); -} - -static int -nilfs_compat_ioctl_change_cpmode(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct nilfs_cpmode __user *ucpmode; - struct nilfs_cpmode32 __user *ucpmode32; - int mode; - - ucpmode = compat_alloc_user_space(sizeof(struct nilfs_cpmode)); - ucpmode32 = compat_ptr(arg); - if (copy_in_user(&ucpmode->cm_cno, &ucpmode32->cm_cno, - sizeof(__u64)) || - get_user(mode, &ucpmode32->cm_mode) || - put_user(mode, &ucpmode->cm_mode)) - return -EFAULT; - - return nilfs_compat_locked_ioctl( - inode, filp, cmd, (unsigned long)ucpmode); -} - - -static inline int -nilfs_compat_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - return nilfs_compat_locked_ioctl(inode, filp, cmd, arg); -} - -static inline int -nilfs_compat_ioctl_get_cpinfo(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - return nilfs_compat_ioctl_get_by_argv(inode, filp, cmd, arg); -} - -static inline int -nilfs_compat_ioctl_get_cpstat(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - return nilfs_compat_locked_ioctl(inode, filp, cmd, arg); -} - -static inline int -nilfs_compat_ioctl_get_suinfo(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - return nilfs_compat_ioctl_get_by_argv(inode, filp, cmd, arg); -} - -static int -nilfs_compat_ioctl_get_sustat(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct nilfs_sustat __user *usustat; - struct nilfs_sustat32 __user *usustat32; - time_t ctime, nongc_ctime; - int ret; - - usustat = compat_alloc_user_space(sizeof(struct nilfs_sustat)); - ret = nilfs_compat_locked_ioctl(inode, filp, cmd, - (unsigned long)usustat); - if (ret < 0) - return ret; - - usustat32 = compat_ptr(arg); - if (copy_in_user(&usustat32->ss_nsegs, &usustat->ss_nsegs, - sizeof(__u64)) || - copy_in_user(&usustat32->ss_ncleansegs, &usustat->ss_ncleansegs, - sizeof(__u64)) || - copy_in_user(&usustat32->ss_ndirtysegs, &usustat->ss_ndirtysegs, - sizeof(__u64)) || - get_user(ctime, &usustat->ss_ctime) || - put_user(ctime, &usustat32->ss_ctime) || - get_user(nongc_ctime, &usustat->ss_nongc_ctime) || - put_user(nongc_ctime, &usustat32->ss_nongc_ctime)) - return -EFAULT; - return 0; -} - -static inline int -nilfs_compat_ioctl_get_vinfo(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - return nilfs_compat_ioctl_get_by_argv(inode, filp, cmd, arg); -} - -static inline int -nilfs_compat_ioctl_get_bdescs(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - return nilfs_compat_ioctl_get_by_argv(inode, filp, cmd, arg); -} - -static int -nilfs_compat_ioctl_clean_segments(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct nilfs_argv __user *uargv; - struct nilfs_argv32 __user *uargv32; - int i, ret; - - uargv = compat_alloc_user_space(sizeof(struct nilfs_argv) * 5); - uargv32 = compat_ptr(arg); - for (i = 0; i < 5; i++) { - ret = nilfs_compat_ioctl_uargv32_to_uargv(&uargv32[i], - &uargv[i]); - if (ret < 0) - return ret; - } - return nilfs_compat_locked_ioctl( - inode, filp, cmd, (unsigned long)uargv); -} - -static int nilfs_compat_ioctl_sync(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - return nilfs_compat_locked_ioctl(inode, filp, cmd, arg); -} - -long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = filp->f_dentry->d_inode; - - switch (cmd) { - case NILFS_IOCTL32_CHANGE_CPMODE: - return nilfs_compat_ioctl_change_cpmode( - inode, filp, NILFS_IOCTL_CHANGE_CPMODE, arg); - case NILFS_IOCTL_DELETE_CHECKPOINT: - return nilfs_compat_ioctl_delete_checkpoint( - inode, filp, cmd, arg); - case NILFS_IOCTL32_GET_CPINFO: - return nilfs_compat_ioctl_get_cpinfo( - inode, filp, NILFS_IOCTL_GET_CPINFO, arg); - case NILFS_IOCTL_GET_CPSTAT: - return nilfs_compat_ioctl_get_cpstat(inode, filp, cmd, arg); - case NILFS_IOCTL32_GET_SUINFO: - return nilfs_compat_ioctl_get_suinfo( - inode, filp, NILFS_IOCTL_GET_SUINFO, arg); - case NILFS_IOCTL32_GET_SUSTAT: - return nilfs_compat_ioctl_get_sustat( - inode, filp, NILFS_IOCTL_GET_SUSTAT, arg); - case NILFS_IOCTL32_GET_VINFO: - return nilfs_compat_ioctl_get_vinfo( - inode, filp, NILFS_IOCTL_GET_VINFO, arg); - case NILFS_IOCTL32_GET_BDESCS: - return nilfs_compat_ioctl_get_bdescs( - inode, filp, NILFS_IOCTL_GET_BDESCS, arg); - case NILFS_IOCTL32_CLEAN_SEGMENTS: - return nilfs_compat_ioctl_clean_segments( - inode, filp, NILFS_IOCTL_CLEAN_SEGMENTS, arg); - case NILFS_IOCTL_SYNC: - return nilfs_compat_ioctl_sync(inode, filp, cmd, arg); - default: - return -ENOIOCTLCMD; - } -} -#endif diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 48c070676cc..f767644a724 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -243,7 +243,6 @@ extern int nilfs_sync_file(struct file *, struct dentry *, int); /* ioctl.c */ int nilfs_ioctl(struct inode *, struct file *, unsigned int, unsigned long); -long nilfs_compat_ioctl(struct file *, unsigned int, unsigned long); int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, void __user *); /* inode.c */ -- cgit v1.2.3 From 7a9461939a46345860622ea36ff267ee4446f00f Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:53 -0700 Subject: nilfs2: use unlocked_ioctl Pekka Enberg suggested converting ->ioctl operations to use ->unlocked_ioctl to avoid BKL. The conversion was verified to be safe, so I will take it on this occasion. Cc: Pekka Enberg Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/dir.c | 2 +- fs/nilfs2/file.c | 2 +- fs/nilfs2/ioctl.c | 4 ++-- fs/nilfs2/nilfs.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 393316cd3ca..54100acc110 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -702,7 +702,7 @@ struct file_operations nilfs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = nilfs_readdir, - .ioctl = nilfs_ioctl, + .unlocked_ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = nilfs_ioctl, #endif /* CONFIG_COMPAT */ diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index a2bd962ebd8..6bd84a0d823 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -140,7 +140,7 @@ struct file_operations nilfs_file_operations = { .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, - .ioctl = nilfs_ioctl, + .unlocked_ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = nilfs_ioctl, #endif /* CONFIG_COMPAT */ diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 33aff8842ce..cfb27892ffe 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -600,9 +600,9 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, return 0; } -int nilfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, - unsigned long arg) +long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp->f_dentry->d_inode; void __user *argp = (void * __user *)arg; switch (cmd) { diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index f767644a724..d08fb1ce501 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -242,7 +242,7 @@ extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, extern int nilfs_sync_file(struct file *, struct dentry *, int); /* ioctl.c */ -int nilfs_ioctl(struct inode *, struct file *, unsigned int, unsigned long); +long nilfs_ioctl(struct file *, unsigned int, unsigned long); int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, void __user *); /* inode.c */ -- cgit v1.2.3 From 2c2e52fc4fca251e68f90821c9ff5cb18be4df58 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:54 -0700 Subject: nilfs2: extend nilfs_sustat ioctl struct This adds a new argument to the nilfs_sustat structure. The extended field allows to delete volatile active state of segments, which was needed to protect freshly-created segments from garbage collection but has confused code dealing with segments. This extension alleviates the mess and gives room for further simplifications. The volatile active flag is not persistent, so it's eliminable on this occasion without affecting compatibility other than the ioctl change. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/recovery.c | 32 +++++++++++++++++--------------- fs/nilfs2/segment.c | 39 +++++++++------------------------------ fs/nilfs2/sufile.c | 8 ++++++-- fs/nilfs2/super.c | 4 +++- fs/nilfs2/the_nilfs.c | 18 ------------------ fs/nilfs2/the_nilfs.h | 5 ++--- 6 files changed, 37 insertions(+), 69 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 877dc1ba23f..a4253f34e13 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -416,6 +416,7 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, struct nilfs_segment_entry *ent, *n; struct inode *sufile = nilfs->ns_sufile; __u64 segnum[4]; + time_t mtime; int err; int i; @@ -442,9 +443,9 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, /* * Collecting segments written after the latest super root. - * These are marked volatile active, and won't be reallocated in - * the next construction. + * These are marked dirty to avoid being reallocated in the next write. */ + mtime = get_seconds(); list_for_each_entry_safe(ent, n, head, list) { if (ent->segnum == segnum[0]) { list_del(&ent->list); @@ -454,17 +455,16 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, err = nilfs_open_segment_entry(ent, sufile); if (unlikely(err)) goto failed; - if (nilfs_segment_usage_clean(ent->raw_su)) { - nilfs_segment_usage_set_volatile_active(ent->raw_su); - /* Keep it open */ - } else { - /* Removing duplicated entries */ - list_del(&ent->list); - nilfs_close_segment_entry(ent, sufile); - nilfs_free_segment_entry(ent); + if (!nilfs_segment_usage_dirty(ent->raw_su)) { + /* make the segment garbage */ + ent->raw_su->su_nblocks = cpu_to_le32(0); + ent->raw_su->su_lastmod = cpu_to_le32(mtime); + nilfs_segment_usage_set_dirty(ent->raw_su); } + list_del(&ent->list); + nilfs_close_segment_entry(ent, sufile); + nilfs_free_segment_entry(ent); } - list_splice_init(head, nilfs->ns_used_segments.prev); /* * The segment having the latest super root is active, and @@ -882,10 +882,12 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, if (scan_newer) ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED; - else if (nilfs->ns_mount_state & NILFS_VALID_FS) - goto super_root_found; - - scan_newer = 1; + else { + nilfs->ns_prot_seq = ssi.seg_seq; + if (nilfs->ns_mount_state & NILFS_VALID_FS) + goto super_root_found; + scan_newer = 1; + } /* reset region for roll-forward */ pseg_start += ssi.nblocks; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 5db12d774a0..24d0fbd4271 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2229,13 +2229,6 @@ static void nilfs_segctor_reactivate_segments(struct nilfs_sc_info *sci, nilfs_segment_usage_set_active(ent->raw_su); nilfs_close_segment_entry(ent, sufile); } - - down_write(&nilfs->ns_sem); - head = &nilfs->ns_used_segments; - list_for_each_entry(ent, head, list) { - nilfs_segment_usage_set_volatile_active(ent->raw_su); - } - up_write(&nilfs->ns_sem); } static int nilfs_segctor_deactivate_segments(struct nilfs_sc_info *sci, @@ -2244,7 +2237,6 @@ static int nilfs_segctor_deactivate_segments(struct nilfs_sc_info *sci, struct nilfs_segment_buffer *segbuf, *last; struct nilfs_segment_entry *ent; struct inode *sufile = nilfs->ns_sufile; - struct list_head *head; int err; last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); @@ -2265,22 +2257,13 @@ static int nilfs_segctor_deactivate_segments(struct nilfs_sc_info *sci, BUG_ON(!buffer_dirty(ent->bh_su)); } - head = &sci->sc_active_segments; - list_for_each_entry(ent, head, list) { + list_for_each_entry(ent, &sci->sc_active_segments, list) { err = nilfs_open_segment_entry(ent, sufile); if (unlikely(err)) goto failed; nilfs_segment_usage_clear_active(ent->raw_su); BUG_ON(!buffer_dirty(ent->bh_su)); } - - down_write(&nilfs->ns_sem); - head = &nilfs->ns_used_segments; - list_for_each_entry(ent, head, list) { - /* clear volatile active for segments of older generations */ - nilfs_segment_usage_clear_volatile_active(ent->raw_su); - } - up_write(&nilfs->ns_sem); return 0; failed: @@ -2304,19 +2287,15 @@ static void nilfs_segctor_bead_completed_segments(struct nilfs_sc_info *sci) } } -static void -__nilfs_segctor_commit_deactivate_segments(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs) - +static void nilfs_segctor_commit_deactivate_segments(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs) { - struct nilfs_segment_entry *ent; - - list_splice_init(&sci->sc_active_segments, - nilfs->ns_used_segments.prev); + struct nilfs_segment_entry *ent, *n; - list_for_each_entry(ent, &nilfs->ns_used_segments, list) { - nilfs_segment_usage_set_volatile_active(ent->raw_su); - /* These segments are kept open */ + list_for_each_entry_safe(ent, n, &sci->sc_active_segments, list) { + list_del(&ent->list); + nilfs_close_segment_entry(ent, nilfs->ns_sufile); + nilfs_free_segment_entry(ent); } } @@ -2405,8 +2384,8 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (has_sr) { down_write(&nilfs->ns_sem); nilfs_update_last_segment(sbi, 1); - __nilfs_segctor_commit_deactivate_segments(sci, nilfs); up_write(&nilfs->ns_sem); + nilfs_segctor_commit_deactivate_segments(sci, nilfs); nilfs_segctor_commit_free_segments(sci); nilfs_segctor_clear_metadata_dirty(sci); } diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index b3674a8162a..cc714c72b13 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -446,6 +446,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) { struct buffer_head *header_bh; struct nilfs_sufile_header *header; + struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; void *kaddr; int ret; @@ -460,8 +461,11 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); - sustat->ss_ctime = NILFS_MDT(sufile)->mi_nilfs->ns_ctime; - sustat->ss_nongc_ctime = NILFS_MDT(sufile)->mi_nilfs->ns_nongc_ctime; + sustat->ss_ctime = nilfs->ns_ctime; + sustat->ss_nongc_ctime = nilfs->ns_nongc_ctime; + spin_lock(&nilfs->ns_last_segment_lock); + sustat->ss_prot_seq = nilfs->ns_prot_seq; + spin_unlock(&nilfs->ns_last_segment_lock); kunmap_atomic(kaddr, KM_USER0); brelse(header_bh); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 268b563d215..2f0e9f7bf15 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -262,8 +262,10 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi) printk(KERN_ERR "NILFS: unable to write superblock (err=%d)\n", err); else { - nilfs_dispose_used_segments(nilfs); clear_nilfs_discontinued(nilfs); + spin_lock(&nilfs->ns_last_segment_lock); + nilfs->ns_prot_seq = le64_to_cpu(nilfs->ns_sbp->s_last_seq); + spin_unlock(&nilfs->ns_last_segment_lock); } return err; diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 69b62558622..661ab762d76 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -71,7 +71,6 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) INIT_LIST_HEAD(&nilfs->ns_supers); spin_lock_init(&nilfs->ns_last_segment_lock); nilfs->ns_gc_inodes_h = NULL; - INIT_LIST_HEAD(&nilfs->ns_used_segments); init_rwsem(&nilfs->ns_segctor_sem); return nilfs; @@ -95,7 +94,6 @@ void put_nilfs(struct the_nilfs *nilfs) */ might_sleep(); if (nilfs_loaded(nilfs)) { - nilfs_dispose_used_segments(nilfs); nilfs_mdt_clear(nilfs->ns_sufile); nilfs_mdt_destroy(nilfs->ns_sufile); nilfs_mdt_clear(nilfs->ns_cpfile); @@ -463,22 +461,6 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) return err; } -void nilfs_dispose_used_segments(struct the_nilfs *nilfs) -{ - struct nilfs_segment_entry *ent, *n; - - /* nilfs->sem must be locked by the caller. */ - if (!nilfs_loaded(nilfs)) - return; - - list_for_each_entry_safe(ent, n, &nilfs->ns_used_segments, list) { - list_del_init(&ent->list); - nilfs_segment_usage_clear_volatile_active(ent->raw_su); - nilfs_close_segment_entry(ent, nilfs->ns_sufile); - nilfs_free_segment_entry(ent); - } -} - int nilfs_near_disk_full(struct the_nilfs *nilfs) { struct inode *sufile = nilfs->ns_sufile; diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 75da3730696..af566e78f7a 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -51,7 +51,6 @@ enum { * @ns_writer_refcount: number of referrers on ns_writer * @ns_sbh: buffer head of the on-disk super block * @ns_sbp: pointer to the super block data - * @ns_used_segments: list of full segments in volatile active state * @ns_supers: list of nilfs super block structs * @ns_seg_seq: segment sequence counter * @ns_segnum: index number of the latest full segment. @@ -65,6 +64,7 @@ enum { * @ns_last_pseg: start block number of the latest segment * @ns_last_seq: sequence value of the latest segment * @ns_last_cno: checkpoint number of the latest segment + * @ns_prot_seq: least sequence number of segments which must not be reclaimed * @ns_free_segments_count: counter of free segments * @ns_segctor_sem: segment constructor semaphore * @ns_dat: DAT file inode @@ -103,7 +103,6 @@ struct the_nilfs { */ struct buffer_head *ns_sbh; struct nilfs_super_block *ns_sbp; - struct list_head ns_used_segments; unsigned ns_mount_state; struct list_head ns_supers; @@ -132,6 +131,7 @@ struct the_nilfs { sector_t ns_last_pseg; u64 ns_last_seq; __u64 ns_last_cno; + u64 ns_prot_seq; unsigned long ns_free_segments_count; struct rw_semaphore ns_segctor_sem; @@ -188,7 +188,6 @@ void put_nilfs(struct the_nilfs *); int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); -void nilfs_dispose_used_segments(struct the_nilfs *); int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); int nilfs_near_disk_full(struct the_nilfs *); -- cgit v1.2.3 From 1f5abe7e7dbcd83e73212c6cb135a6106cea6a0b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:55 -0700 Subject: nilfs2: replace BUG_ON and BUG calls triggerable from ioctl Pekka Enberg advised me: > It would be nice if BUG(), BUG_ON(), and panic() calls would be > converted to proper error handling using WARN_ON() calls. The BUG() > call in nilfs_cpfile_delete_checkpoints(), for example, looks to be > triggerable from user-space via the ioctl() system call. This will follow the comment and keep them to a minimum. Acked-by: Pekka Enberg Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/btree.c | 27 +++++++----------- fs/nilfs2/cpfile.c | 38 +++++++++++++------------ fs/nilfs2/dat.c | 15 +++++----- fs/nilfs2/direct.c | 13 +++++++-- fs/nilfs2/inode.c | 19 ++++--------- fs/nilfs2/ioctl.c | 63 ++++++++++++++++++++++++++++-------------- fs/nilfs2/mdt.c | 4 +-- fs/nilfs2/nilfs.h | 1 - fs/nilfs2/page.c | 10 +++---- fs/nilfs2/recovery.c | 3 -- fs/nilfs2/segment.c | 78 ++++++++++++++++------------------------------------ fs/nilfs2/sufile.c | 25 ++++++++++------- fs/nilfs2/super.c | 8 ++++-- 13 files changed, 144 insertions(+), 160 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 53f0d4c31cb..6b37a276729 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -425,7 +425,6 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, index++; out: - BUG_ON(indexp == NULL); *indexp = index; return s == 0; @@ -477,8 +476,6 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, __u64 ptr; int level, index, found, ret; - BUG_ON(minlevel <= NILFS_BTREE_LEVEL_DATA); - node = nilfs_btree_get_root(btree); level = nilfs_btree_node_get_level(btree, node); if ((level < minlevel) || @@ -505,7 +502,7 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, if (index < nilfs_btree_node_nchildren_max(btree, node)) ptr = nilfs_btree_node_get_ptr(btree, node, index); else { - BUG_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); + WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); /* insert */ ptr = NILFS_BMAP_INVALID_PTR; } @@ -1366,7 +1363,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, } else { /* no siblings */ /* the only child of the root node */ - BUG_ON(level != nilfs_btree_height(btree) - 2); + WARN_ON(level != nilfs_btree_height(btree) - 2); if (nilfs_btree_node_get_nchildren(btree, node) - 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { path[level].bp_op = nilfs_btree_shrink; @@ -1543,7 +1540,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, break; case 3: nchildren = nilfs_btree_node_get_nchildren(btree, root); - BUG_ON(nchildren > 1); + WARN_ON(nchildren > 1); ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); ret = nilfs_bmap_get_block(bmap, ptr, &bh); if (ret < 0) @@ -1552,7 +1549,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, break; default: node = NULL; - BUG(); + return -EINVAL; } nchildren = nilfs_btree_node_get_nchildren(btree, node); @@ -1833,14 +1830,13 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, while ((++level < nilfs_btree_height(btree) - 1) && !buffer_dirty(path[level].bp_bh)) { - BUG_ON(buffer_nilfs_volatile(path[level].bp_bh)); + WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); ret = nilfs_btree_prepare_update_v(btree, path, level); if (ret < 0) goto out; } /* success */ - BUG_ON(maxlevelp == NULL); *maxlevelp = level - 1; return 0; @@ -1909,7 +1905,7 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, __u64 key; int level, ret; - BUG_ON(!buffer_dirty(bh)); + WARN_ON(!buffer_dirty(bh)); btree = (struct nilfs_btree *)bmap; path = nilfs_btree_alloc_path(btree); @@ -1928,12 +1924,9 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); if (ret < 0) { - /* BUG_ON(ret == -ENOENT); */ - if (ret == -ENOENT) { + if (unlikely(ret == -ENOENT)) printk(KERN_CRIT "%s: key = %llu, level == %d\n", __func__, (unsigned long long)key, level); - BUG(); - } goto out; } @@ -2117,7 +2110,7 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); if (ret < 0) { - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); goto out; } @@ -2175,12 +2168,12 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); if (ret < 0) { - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); goto out; } ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh); if (ret < 0) { - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); goto out; } diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 218b3441850..e90b60dfced 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -40,10 +40,7 @@ nilfs_cpfile_checkpoints_per_block(const struct inode *cpfile) static unsigned long nilfs_cpfile_get_blkoff(const struct inode *cpfile, __u64 cno) { - __u64 tcno; - - BUG_ON(cno == 0); /* checkpoint number 0 is invalid */ - tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; + __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); return (unsigned long)tcno; } @@ -96,7 +93,7 @@ nilfs_cpfile_block_sub_valid_checkpoints(const struct inode *cpfile, struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); unsigned int count; - BUG_ON(le32_to_cpu(cp->cp_checkpoints_count) < n); + WARN_ON(le32_to_cpu(cp->cp_checkpoints_count) < n); count = le32_to_cpu(cp->cp_checkpoints_count) - n; cp->cp_checkpoints_count = cpu_to_le32(count); return count; @@ -178,6 +175,8 @@ static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile, * %-ENOMEM - Insufficient amount of memory available. * * %-ENOENT - No such checkpoint. + * + * %-EINVAL - invalid checkpoint. */ int nilfs_cpfile_get_checkpoint(struct inode *cpfile, __u64 cno, @@ -191,8 +190,9 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile, void *kaddr; int ret; - BUG_ON(cno < 1 || cno > nilfs_mdt_cno(cpfile) || - (cno < nilfs_mdt_cno(cpfile) && create)); + if (unlikely(cno < 1 || cno > nilfs_mdt_cno(cpfile) || + (cno < nilfs_mdt_cno(cpfile) && create))) + return -EINVAL; down_write(&NILFS_MDT(cpfile)->mi_sem); @@ -288,12 +288,11 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, unsigned long tnicps; int ret, ncps, nicps, count, i; - if ((start == 0) || (start > end)) { - printk(KERN_CRIT "%s: start = %llu, end = %llu\n", - __func__, - (unsigned long long)start, - (unsigned long long)end); - BUG(); + if (unlikely(start == 0 || start > end)) { + printk(KERN_ERR "%s: invalid range of checkpoint numbers: " + "[%llu, %llu)\n", __func__, + (unsigned long long)start, (unsigned long long)end); + return -EINVAL; } /* cannot delete the latest checkpoint */ @@ -323,7 +322,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, cpfile, cno, cp_bh, kaddr); nicps = 0; for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) { - BUG_ON(nilfs_checkpoint_snapshot(cp)); + WARN_ON(nilfs_checkpoint_snapshot(cp)); if (!nilfs_checkpoint_invalid(cp)) { nilfs_checkpoint_set_invalid(cp); nicps++; @@ -393,6 +392,8 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, int n, ret; int ncps, i; + if (cno == 0) + return -ENOENT; /* checkpoint number 0 is invalid */ down_read(&NILFS_MDT(cpfile)->mi_sem); for (n = 0; cno < cur_cno && n < nci; cno += ncps) { @@ -532,9 +533,6 @@ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) ssize_t nci; int ret; - /* checkpoint number 0 is invalid */ - if (cno == 0) - return -ENOENT; nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, 1); if (nci < 0) return nci; @@ -582,6 +580,8 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) void *kaddr; int ret; + if (cno == 0) + return -ENOENT; /* checkpoint number 0 is invalid */ down_write(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); @@ -698,6 +698,8 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) void *kaddr; int ret; + if (cno == 0) + return -ENOENT; /* checkpoint number 0 is invalid */ down_write(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); @@ -813,6 +815,8 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) void *kaddr; int ret; + if (cno == 0) + return -ENOENT; /* checkpoint number 0 is invalid */ down_read(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 9360920f7d3..bb8a5818e7f 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -135,7 +135,7 @@ int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) int ret; ret = nilfs_dat_prepare_entry(dat, req, 0); - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); return ret; } @@ -157,7 +157,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, (unsigned long long)le64_to_cpu(entry->de_start), (unsigned long long)le64_to_cpu(entry->de_end), (unsigned long long)le64_to_cpu(entry->de_blocknr)); - BUG(); } entry->de_blocknr = cpu_to_le64(blocknr); kunmap_atomic(kaddr, KM_USER0); @@ -180,7 +179,7 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) ret = nilfs_dat_prepare_entry(dat, req, 0); if (ret < 0) { - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); return ret; } @@ -216,7 +215,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, end = start = le64_to_cpu(entry->de_start); if (!dead) { end = nilfs_mdt_cno(dat); - BUG_ON(start > end); + WARN_ON(start > end); } entry->de_end = cpu_to_le64(end); blocknr = le64_to_cpu(entry->de_blocknr); @@ -324,14 +323,16 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) return ret; kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); - if (entry->de_blocknr == cpu_to_le64(0)) { + if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__, (unsigned long long)vblocknr, (unsigned long long)le64_to_cpu(entry->de_start), (unsigned long long)le64_to_cpu(entry->de_end)); - BUG(); + kunmap_atomic(kaddr, KM_USER0); + brelse(entry_bh); + return -EINVAL; } - BUG_ON(blocknr == 0); + WARN_ON(blocknr == 0); entry->de_blocknr = cpu_to_le64(blocknr); kunmap_atomic(kaddr, KM_USER0); diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index e3ec2485008..c6379e48278 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -210,7 +210,6 @@ static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) if (lastkey == NILFS_DIRECT_KEY_MAX + 1) return -ENOENT; - BUG_ON(keyp == NULL); *keyp = lastkey; return 0; @@ -366,9 +365,17 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap, direct = (struct nilfs_direct *)bmap; key = nilfs_bmap_data_get_key(bmap, *bh); - BUG_ON(key > NILFS_DIRECT_KEY_MAX); + if (unlikely(key > NILFS_DIRECT_KEY_MAX)) { + printk(KERN_CRIT "%s: invalid key: %llu\n", __func__, + (unsigned long long)key); + return -EINVAL; + } ptr = nilfs_direct_get_ptr(direct, key); - BUG_ON(ptr == NILFS_BMAP_INVALID_PTR); + if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) { + printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__, + (unsigned long long)ptr); + return -EINVAL; + } return direct->d_ops->dop_assign(direct, key, ptr, bh, blocknr, binfo); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 4bf1e2c5bac..b6536bb2a32 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -61,12 +61,6 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, map_bh(bh_result, inode->i_sb, blknum); goto out; } - if (unlikely(ret == 1)) { - printk(KERN_ERR "nilfs_get_block: bmap_lookup returns " - "buffer_head pointer (blkoff=%llu, blknum=%lu)\n", - (unsigned long long)blkoff, blknum); - BUG(); - } /* data block was not found */ if (ret == -ENOENT && create) { struct nilfs_transaction_info ti; @@ -85,14 +79,14 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, * However, the page having this block must * be locked in this case. */ - printk(KERN_ERR + printk(KERN_WARNING "nilfs_get_block: a race condition " "while inserting a data block. " "(inode number=%lu, file block " "offset=%llu)\n", inode->i_ino, (unsigned long long)blkoff); - BUG(); + err = 0; } else if (err == -EINVAL) { nilfs_error(inode->i_sb, __func__, "broken bmap (inode=%lu)\n", @@ -621,7 +615,6 @@ void nilfs_truncate(struct inode *inode) struct nilfs_transaction_info ti; struct super_block *sb = inode->i_sb; struct nilfs_inode_info *ii = NILFS_I(inode); - int ret; if (!test_bit(NILFS_I_BMAP, &ii->i_state)) return; @@ -630,8 +623,7 @@ void nilfs_truncate(struct inode *inode) blocksize = sb->s_blocksize; blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; - ret = nilfs_transaction_begin(sb, &ti, 0); - BUG_ON(ret); + nilfs_transaction_begin(sb, &ti, 0); /* never fails */ block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); @@ -652,7 +644,6 @@ void nilfs_delete_inode(struct inode *inode) struct nilfs_transaction_info ti; struct super_block *sb = inode->i_sb; struct nilfs_inode_info *ii = NILFS_I(inode); - int err; if (unlikely(is_bad_inode(inode))) { if (inode->i_data.nrpages) @@ -660,8 +651,8 @@ void nilfs_delete_inode(struct inode *inode) clear_inode(inode); return; } - err = nilfs_transaction_begin(sb, &ti, 0); - BUG_ON(err); + nilfs_transaction_begin(sb, &ti, 0); /* never fails */ + if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index cfb27892ffe..108d281ebca 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -489,14 +489,14 @@ nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, ret = nilfs_mdt_mark_block_dirty(dat, bdescs[i].bd_offset); if (ret < 0) { - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); return ret; } } else { ret = nilfs_bmap_mark(bmap, bdescs[i].bd_offset, bdescs[i].bd_level); if (ret < 0) { - BUG_ON(ret == -ENOENT); + WARN_ON(ret == -ENOENT); return ret; } } @@ -519,7 +519,8 @@ nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, struct nilfs_sb_info *sbi = nilfs_get_writer(nilfs); int ret; - BUG_ON(!sbi); + if (unlikely(!sbi)) + return -EROFS; ret = nilfs_segctor_add_segments_to_be_freed( NILFS_SC(sbi), buf, nmembs); nilfs_put_writer(nilfs); @@ -539,6 +540,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, void __user *argp) { struct nilfs_argv argv[5]; + const char *msg; int dir, ret; if (copy_from_user(argv, argp, sizeof(argv))) @@ -546,31 +548,50 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, dir = _IOC_WRITE; ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], dir); - if (ret < 0) - goto out_move_blks; + if (ret < 0) { + msg = "cannot read source blocks"; + goto failed; + } ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], dir); - if (ret < 0) - goto out_del_cps; + if (ret < 0) { + /* + * can safely abort because checkpoints can be removed + * independently. + */ + msg = "cannot delete checkpoints"; + goto failed; + } ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], dir); - if (ret < 0) - goto out_free_vbns; + if (ret < 0) { + /* + * can safely abort because DAT file is updated atomically + * using a copy-on-write technique. + */ + msg = "cannot delete virtual blocks from DAT file"; + goto failed; + } ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], dir); - if (ret < 0) - goto out_free_vbns; + if (ret < 0) { + /* + * can safely abort because the operation is nondestructive. + */ + msg = "cannot mark copying blocks dirty"; + goto failed; + } ret = nilfs_ioctl_free_segments(nilfs, &argv[4], dir); - if (ret < 0) - goto out_free_segs; - + if (ret < 0) { + /* + * can safely abort because this operation is atomic. + */ + msg = "cannot set segments to be freed"; + goto failed; + } return 0; - out_free_segs: - BUG(); /* XXX: not implemented yet */ - out_free_vbns: - BUG();/* XXX: not implemented yet */ - out_del_cps: - BUG();/* XXX: not implemented yet */ - out_move_blks: + failed: nilfs_remove_all_gcinode(nilfs); + printk(KERN_ERR "NILFS: GC failed during preparation: %s: err=%d\n", + msg, ret); return ret; } diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index e0a632b86fe..47dd815433f 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -154,10 +154,8 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, ret = -EBUSY; goto failed_bh; } - } else { - BUG_ON(mode != READ); + } else /* mode == READ */ lock_buffer(bh); - } if (buffer_uptodate(bh)) { unlock_buffer(bh); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index d08fb1ce501..a7f5bc724e3 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -173,7 +173,6 @@ static inline void nilfs_set_transaction_flag(unsigned int flag) { struct nilfs_transaction_info *ti = current->journal_info; - BUG_ON(!ti); ti->ti_flags |= flag; } diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 7b18be8cd47..1bfbba9c0e9 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -417,7 +417,7 @@ repeat: dpage = find_lock_page(dmap, offset); if (dpage) { /* override existing page on the destination cache */ - BUG_ON(PageDirty(dpage)); + WARN_ON(PageDirty(dpage)); nilfs_copy_page(dpage, page, 0); unlock_page(dpage); page_cache_release(dpage); @@ -427,17 +427,15 @@ repeat: /* move the page to the destination cache */ spin_lock_irq(&smap->tree_lock); page2 = radix_tree_delete(&smap->page_tree, offset); - if (unlikely(page2 != page)) - NILFS_PAGE_BUG(page, "page removal failed " - "(offset=%lu, page2=%p)", - offset, page2); + WARN_ON(page2 != page); + smap->nrpages--; spin_unlock_irq(&smap->tree_lock); spin_lock_irq(&dmap->tree_lock); err = radix_tree_insert(&dmap->page_tree, offset, page); if (unlikely(err < 0)) { - BUG_ON(err == -EEXIST); + WARN_ON(err == -EEXIST); page->mapping = NULL; page_cache_release(page); /* for cache */ } else { diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index a4253f34e13..ef387b19682 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -92,9 +92,6 @@ static int nilfs_warn_segment_error(int err) printk(KERN_WARNING "NILFS warning: No super root in the last segment\n"); break; - case NILFS_SEG_VALID: - default: - BUG(); } return -EINVAL; } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 24d0fbd4271..9a87410985b 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -334,8 +334,7 @@ static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, { struct nilfs_transaction_info *cur_ti = current->journal_info; - BUG_ON(cur_ti); - BUG_ON(!ti); + WARN_ON(cur_ti); ti->ti_flags = NILFS_TI_WRITER; ti->ti_count = 0; ti->ti_save = cur_ti; @@ -546,8 +545,6 @@ static int nilfs_collect_file_data(struct nilfs_sc_info *sci, { int err; - /* BUG_ON(!buffer_dirty(bh)); */ - /* excluded by scan_dirty_data_buffers() */ err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); if (unlikely(err < 0)) return nilfs_handle_bmap_error(err, __func__, inode, @@ -566,8 +563,6 @@ static int nilfs_collect_file_node(struct nilfs_sc_info *sci, { int err; - /* BUG_ON(!buffer_dirty(bh)); */ - /* excluded by scan_dirty_node_buffers() */ err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); if (unlikely(err < 0)) return nilfs_handle_bmap_error(err, __func__, inode, @@ -579,7 +574,7 @@ static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, struct buffer_head *bh, struct inode *inode) { - BUG_ON(!buffer_dirty(bh)); + WARN_ON(!buffer_dirty(bh)); return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); } @@ -628,7 +623,7 @@ static int nilfs_collect_dat_data(struct nilfs_sc_info *sci, static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci, struct buffer_head *bh, struct inode *inode) { - BUG_ON(!buffer_dirty(bh)); + WARN_ON(!buffer_dirty(bh)); return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(struct nilfs_binfo_dat)); } @@ -862,9 +857,9 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) nilfs_mdt_mark_dirty(nilfs->ns_cpfile); nilfs_cpfile_put_checkpoint( nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); - } else { - BUG_ON(err == -EINVAL || err == -ENOENT); - } + } else + WARN_ON(err == -EINVAL || err == -ENOENT); + return err; } @@ -879,7 +874,7 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, &raw_cp, &bh_cp); if (unlikely(err)) { - BUG_ON(err == -EINVAL || err == -ENOENT); + WARN_ON(err == -EINVAL || err == -ENOENT); goto failed_ibh; } raw_cp->cp_snapshot_list.ssl_next = 0; @@ -944,7 +939,6 @@ static void nilfs_fill_in_super_root_crc(struct buffer_head *bh_sr, u32 seed) (struct nilfs_super_root *)bh_sr->b_data; u32 crc; - BUG_ON(NILFS_SR_BYTES > bh_sr->b_size); crc = crc32_le(seed, (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); @@ -1022,8 +1016,7 @@ static void nilfs_segctor_cancel_free_segments(struct nilfs_sc_info *sci, if (!(ent->flags & NILFS_SLH_FREED)) break; err = nilfs_sufile_cancel_free(sufile, ent->segnum); - BUG_ON(err); - + WARN_ON(err); /* do not happen */ ent->flags &= ~NILFS_SLH_FREED; } } @@ -1472,7 +1465,7 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, failed: list_for_each_entry_safe(segbuf, n, &list, sb_list) { ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); - BUG_ON(ret); + WARN_ON(ret); /* never fails */ list_del_init(&segbuf->sb_list); nilfs_segbuf_free(segbuf); } @@ -1488,7 +1481,7 @@ static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); if (nilfs->ns_nextnum != segbuf->sb_nextnum) { ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); - BUG_ON(ret); + WARN_ON(ret); /* never fails */ } if (segbuf->sb_io_error) { /* Case 1: The first segment failed */ @@ -1504,7 +1497,7 @@ static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); - BUG_ON(ret); + WARN_ON(ret); /* never fails */ if (!done && segbuf->sb_io_error) { if (segbuf->sb_segnum != nilfs->ns_nextnum) /* Case 2: extended segment (!= next) failed */ @@ -1558,7 +1551,7 @@ static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, &raw_su, &bh_su); - BUG_ON(ret); /* always succeed because bh_su is dirty */ + WARN_ON(ret); /* always succeed because bh_su is dirty */ live_blocks = segbuf->sb_sum.nblocks + (segbuf->sb_pseg_start - segbuf->sb_fseg_start); raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime); @@ -1579,7 +1572,7 @@ static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, &raw_su, &bh_su); - BUG_ON(ret); /* always succeed because bh_su is dirty */ + WARN_ON(ret); /* always succeed because bh_su is dirty */ raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start - segbuf->sb_fseg_start); nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su); @@ -1587,7 +1580,7 @@ static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, &raw_su, &bh_su); - BUG_ON(ret); /* always succeed */ + WARN_ON(ret); /* always succeed */ raw_su->su_nblocks = 0; nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su); @@ -1606,7 +1599,7 @@ static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci, list_del_init(&segbuf->sb_list); sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); - BUG_ON(ret); + WARN_ON(ret); nilfs_segbuf_free(segbuf); } } @@ -1923,7 +1916,6 @@ static int nilfs_page_has_uncleared_buffer(struct page *page) static void __nilfs_end_page_io(struct page *page, int err) { - /* BUG_ON(err > 0); */ if (!err) { if (!nilfs_page_buffers_clean(page)) __set_page_dirty_nobuffers(page); @@ -2262,7 +2254,7 @@ static int nilfs_segctor_deactivate_segments(struct nilfs_sc_info *sci, if (unlikely(err)) goto failed; nilfs_segment_usage_clear_active(ent->raw_su); - BUG_ON(!buffer_dirty(ent->bh_su)); + WARN_ON(!buffer_dirty(ent->bh_su)); } return 0; @@ -2340,7 +2332,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) /* Avoid empty segment */ if (sci->sc_stage.scnt == NILFS_ST_DONE && NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { - BUG_ON(mode == SC_LSEG_SR); nilfs_segctor_end_construction(sci, nilfs, 1); goto out; } @@ -2479,9 +2470,8 @@ int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci, struct inode *sufile = nilfs->ns_sufile; LIST_HEAD(list); __u64 *pnum; - const char *flag_name; size_t i; - int err, err2 = 0; + int err; for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) { ent = nilfs_alloc_segment_entry(*pnum); @@ -2495,32 +2485,12 @@ int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci, if (unlikely(err)) goto failed; - if (unlikely(le32_to_cpu(ent->raw_su->su_flags) != - (1UL << NILFS_SEGMENT_USAGE_DIRTY))) { - if (nilfs_segment_usage_clean(ent->raw_su)) - flag_name = "clean"; - else if (nilfs_segment_usage_active(ent->raw_su)) - flag_name = "active"; - else if (nilfs_segment_usage_volatile_active( - ent->raw_su)) - flag_name = "volatile active"; - else if (!nilfs_segment_usage_dirty(ent->raw_su)) - flag_name = "non-dirty"; - else - flag_name = "erroneous"; - - printk(KERN_ERR - "NILFS: %s segment is requested to be cleaned " - "(segnum=%llu)\n", - flag_name, (unsigned long long)ent->segnum); - err2 = -EINVAL; - } + if (unlikely(!nilfs_segment_usage_dirty(ent->raw_su))) + printk(KERN_WARNING "NILFS: unused segment is " + "requested to be cleaned (segnum=%llu)\n", + (unsigned long long)ent->segnum); nilfs_close_segment_entry(ent, sufile); } - if (unlikely(err2)) { - err = err2; - goto failed; - } list_splice(&list, sci->sc_cleaning_segments.prev); return 0; @@ -2705,8 +2675,6 @@ struct nilfs_segctor_req { static void nilfs_segctor_accept(struct nilfs_sc_info *sci, struct nilfs_segctor_req *req) { - BUG_ON(!sci); - req->sc_err = req->sb_err = 0; spin_lock(&sci->sc_state_lock); req->seq_accepted = sci->sc_seq_request; @@ -3107,7 +3075,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) if (flag || nilfs_segctor_confirm(sci)) nilfs_segctor_write_out(sci); - BUG_ON(!list_empty(&sci->sc_copied_buffers)); + WARN_ON(!list_empty(&sci->sc_copied_buffers)); if (!list_empty(&sci->sc_dirty_files)) { nilfs_warning(sbi->s_super, __func__, @@ -3120,7 +3088,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) if (!list_empty(&sci->sc_cleaning_segments)) nilfs_dispose_segment_list(&sci->sc_cleaning_segments); - BUG_ON(!list_empty(&sci->sc_segbufs)); + WARN_ON(!list_empty(&sci->sc_segbufs)); if (sci->sc_sketch_inode) { iput(sci->sc_sketch_inode); diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index cc714c72b13..4cf47e03a3a 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -231,10 +231,11 @@ int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) kaddr = kmap_atomic(su_bh->b_page, KM_USER0); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); - if (!nilfs_segment_usage_clean(su)) { - printk(KERN_CRIT "%s: segment %llu must be clean\n", + if (unlikely(!nilfs_segment_usage_clean(su))) { + printk(KERN_WARNING "%s: segment %llu must be clean\n", __func__, (unsigned long long)segnum); - BUG(); + kunmap_atomic(kaddr, KM_USER0); + goto out_su_bh; } nilfs_segment_usage_set_dirty(su); kunmap_atomic(kaddr, KM_USER0); @@ -249,11 +250,10 @@ int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); + out_su_bh: brelse(su_bh); - out_header: brelse(header_bh); - out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; @@ -317,7 +317,7 @@ int nilfs_sufile_freev(struct inode *sufile, __u64 *segnum, size_t nsegs) kaddr = kmap_atomic(su_bh[i]->b_page, KM_USER0); su = nilfs_sufile_block_get_segment_usage( sufile, segnum[i], su_bh[i], kaddr); - BUG_ON(nilfs_segment_usage_error(su)); + WARN_ON(nilfs_segment_usage_error(su)); nilfs_segment_usage_set_clean(su); kunmap_atomic(kaddr, KM_USER0); nilfs_mdt_mark_buffer_dirty(su_bh[i]); @@ -385,8 +385,8 @@ int nilfs_sufile_get_segment_usage(struct inode *sufile, __u64 segnum, int ret; /* segnum is 0 origin */ - BUG_ON(segnum >= nilfs_sufile_get_nsegments(sufile)); - + if (segnum >= nilfs_sufile_get_nsegments(sufile)) + return -EINVAL; down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &bh); if (ret < 0) @@ -515,6 +515,8 @@ int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp) * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - Invalid segment usage number. */ int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) { @@ -524,8 +526,11 @@ int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) void *kaddr; int ret; - BUG_ON(segnum >= nilfs_sufile_get_nsegments(sufile)); - + if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { + printk(KERN_WARNING "%s: invalid segment number: %llu\n", + __func__, (unsigned long long)segnum); + return -EINVAL; + } down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_header_block(sufile, &header_bh); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 2f0e9f7bf15..d0639a6aae9 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -841,8 +841,11 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, if (sb->s_flags & MS_RDONLY) { if (nilfs_test_opt(sbi, SNAPSHOT)) { - if (!nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, - sbi->s_snapshot_cno)) { + err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, + sbi->s_snapshot_cno); + if (err < 0) + goto failed_sbi; + if (!err) { printk(KERN_ERR "NILFS: The specified checkpoint is " "not a snapshot " @@ -1163,7 +1166,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, } else { struct nilfs_sb_info *sbi = NILFS_SB(s); - BUG_ON(!sbi || !sbi->s_nilfs); /* * s_umount protects super_block from unmount process; * It covers pointers of nilfs_sb_info and the_nilfs. -- cgit v1.2.3 From e62687468588f47c32256e3c8c36157c40111b6e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:55 -0700 Subject: nilfs2: super block operations fix endian bug This adds a missing endian conversion of checksum field in the super block. This fixes compatibility issue on big endian machines which will come to surface after supporting recovery of super block. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index d0639a6aae9..b7519c327ba 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -287,9 +287,9 @@ int nilfs_commit_super(struct nilfs_sb_info *sbi) sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks); sbp->s_wtime = cpu_to_le64(get_seconds()); sbp->s_sum = 0; - sbp->s_sum = crc32_le(nilfs->ns_crc_seed, (unsigned char *)sbp, - le16_to_cpu(sbp->s_bytes)); - + sbp->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, + (unsigned char *)sbp, + le16_to_cpu(sbp->s_bytes))); sbi->s_super->s_dirt = 0; return nilfs_sync_super(sbi); } -- cgit v1.2.3 From 458c5b0822a669d170fdb7bb16c9145f652ebe06 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:56 -0700 Subject: nilfs2: clean up sketch file The sketch file is a file to mark checkpoints with user data. It was experimentally introduced in the original implementation, and now obsolete. The file was handled differently with regular files; the file size got truncated when a checkpoint was created. This stops the special treatment and will treat it as a regular file. Most users are not affected because mkfs.nilfs2 no longer makes this file. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/inode.c | 35 ++--------------------------------- fs/nilfs2/segment.c | 49 +------------------------------------------------ fs/nilfs2/segment.h | 8 -------- 3 files changed, 3 insertions(+), 89 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b6536bb2a32..a1922b17662 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -418,30 +418,6 @@ int nilfs_read_inode_common(struct inode *inode, return 0; } -static int nilfs_read_sketch_inode(struct inode *inode) -{ - struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); - int err = 0; - - if (sbi->s_snapshot_cno) { - struct the_nilfs *nilfs = sbi->s_nilfs; - struct buffer_head *bh_cp; - struct nilfs_checkpoint *raw_cp; - - err = nilfs_cpfile_get_checkpoint( - nilfs->ns_cpfile, sbi->s_snapshot_cno, 0, &raw_cp, - &bh_cp); - if (likely(!err)) { - if (!nilfs_checkpoint_sketch(raw_cp)) - inode->i_size = 0; - nilfs_cpfile_put_checkpoint( - nilfs->ns_cpfile, sbi->s_snapshot_cno, bh_cp); - } - inode->i_flags |= S_NOCMTIME; - } - return err; -} - static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, struct inode *inode) { @@ -469,11 +445,6 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, inode->i_op = &nilfs_file_inode_operations; inode->i_fop = &nilfs_file_operations; inode->i_mapping->a_ops = &nilfs_aops; - if (unlikely(inode->i_ino == NILFS_SKETCH_INO)) { - err = nilfs_read_sketch_inode(inode); - if (unlikely(err)) - goto failed_unmap; - } } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &nilfs_dir_inode_operations; inode->i_fop = &nilfs_dir_operations; @@ -742,8 +713,7 @@ int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode, atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks); - if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state) || - unlikely(inode->i_ino == NILFS_SKETCH_INO)) + if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) return 0; spin_lock(&sbi->s_inode_lock); @@ -811,7 +781,6 @@ void nilfs_dirty_inode(struct inode *inode) return; } nilfs_transaction_begin(inode->i_sb, &ti, 0); - if (likely(inode->i_ino != NILFS_SKETCH_INO)) - nilfs_mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); nilfs_transaction_commit(inode->i_sb); /* never fails */ } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 9a87410985b..981c34a0cd6 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -67,7 +67,6 @@ enum { NILFS_ST_INIT = 0, NILFS_ST_GC, /* Collecting dirty blocks for GC */ NILFS_ST_FILE, - NILFS_ST_SKETCH, NILFS_ST_IFILE, NILFS_ST_CPFILE, NILFS_ST_SUFILE, @@ -887,8 +886,7 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); - if (sci->sc_sketch_inode && i_size_read(sci->sc_sketch_inode) > 0) - nilfs_checkpoint_set_sketch(raw_cp); + nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1); nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); return 0; @@ -923,11 +921,6 @@ static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci, nilfs_fill_in_file_bmap(ifile, ii); set_bit(NILFS_I_COLLECTED, &ii->i_state); } - if (sci->sc_sketch_inode) { - ii = NILFS_I(sci->sc_sketch_inode); - if (test_bit(NILFS_I_DIRTY, &ii->i_state)) - nilfs_fill_in_file_bmap(ifile, ii); - } } /* @@ -1228,26 +1221,6 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_stage.scnt = NILFS_ST_DONE; return 0; } - sci->sc_stage.scnt++; /* Fall through */ - case NILFS_ST_SKETCH: - if (mode == SC_LSEG_SR && sci->sc_sketch_inode) { - ii = NILFS_I(sci->sc_sketch_inode); - if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { - sci->sc_sketch_inode->i_ctime.tv_sec - = sci->sc_seg_ctime; - sci->sc_sketch_inode->i_mtime.tv_sec - = sci->sc_seg_ctime; - err = nilfs_mark_inode_dirty( - sci->sc_sketch_inode); - if (unlikely(err)) - goto break_or_fail; - } - err = nilfs_segctor_scan_file(sci, - sci->sc_sketch_inode, - &nilfs_sc_file_ops); - if (unlikely(err)) - goto break_or_fail; - } sci->sc_stage.scnt++; sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; /* Fall through */ @@ -2385,13 +2358,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) } while (sci->sc_stage.scnt != NILFS_ST_DONE); - /* Clearing sketch data */ - if (has_sr && sci->sc_sketch_inode) { - if (i_size_read(sci->sc_sketch_inode) == 0) - clear_bit(NILFS_I_DIRTY, - &NILFS_I(sci->sc_sketch_inode)->i_state); - i_size_write(sci->sc_sketch_inode, 0); - } out: nilfs_segctor_destroy_segment_buffers(sci); nilfs_segctor_check_out_files(sci, sbi); @@ -2971,11 +2937,6 @@ static int nilfs_segctor_init(struct nilfs_sc_info *sci, struct nilfs_recovery_info *ri) { int err; - struct inode *inode = nilfs_iget(sci->sc_super, NILFS_SKETCH_INO); - - sci->sc_sketch_inode = IS_ERR(inode) ? NULL : inode; - if (sci->sc_sketch_inode) - i_size_write(sci->sc_sketch_inode, 0); sci->sc_seq_done = sci->sc_seq_request; if (ri) @@ -2987,10 +2948,6 @@ static int nilfs_segctor_init(struct nilfs_sc_info *sci, if (ri) list_splice_init(&sci->sc_active_segments, ri->ri_used_segments.prev); - if (sci->sc_sketch_inode) { - iput(sci->sc_sketch_inode); - sci->sc_sketch_inode = NULL; - } } return err; } @@ -3090,10 +3047,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) WARN_ON(!list_empty(&sci->sc_segbufs)); - if (sci->sc_sketch_inode) { - iput(sci->sc_sketch_inode); - sci->sc_sketch_inode = NULL; - } down_write(&sbi->s_nilfs->ns_segctor_sem); kfree(sci); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 2dd39da9f38..fbd162d7170 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -108,7 +108,6 @@ struct nilfs_segsum_pointer { * @sc_nblk_this_inc: Number of blocks included in the current logical segment * @sc_seg_ctime: Creation time * @sc_flags: Internal flags - * @sc_sketch_inode: Inode of the sketch file * @sc_state_lock: spinlock for sc_state and so on * @sc_state: Segctord state flags * @sc_flush_request: inode bitmap of metadata files to be flushed @@ -158,13 +157,6 @@ struct nilfs_sc_info { unsigned long sc_flags; - /* - * Pointer to an inode of the sketch. - * This pointer is kept only while it contains data. - * We protect it with a semaphore of the segment constructor. - */ - struct inode *sc_sketch_inode; - spinlock_t sc_state_lock; unsigned long sc_state; unsigned long sc_flush_request; -- cgit v1.2.3 From c96fa464a567a2a8796009af0e79bc68af73f485 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:57 -0700 Subject: nilfs2: mark minor flag for checkpoint created by internal operation Nilfs creates checkpoints even for garbage collection or metadata updates such as checkpoint mode change. So, user often sees checkpoints created only by such internal operations. This is inconvenient in some situations. For example, application that monitors checkpoints and changes them to snapshots, will fall into an infinite loop because it cannot distinguish internally created checkpoints. This patch solves this sort of problem by adding a flag to checkpoint for identification. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/segment.c | 9 +++++++++ fs/nilfs2/segment.h | 3 +++ 2 files changed, 12 insertions(+) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 981c34a0cd6..2879704509f 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -462,6 +462,9 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci, sci->sc_binfo_ptr = sci->sc_finfo_ptr; nilfs_segctor_map_segsum_entry( sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); + + if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) + set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); /* skip finfo */ } @@ -887,6 +890,11 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); + if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) + nilfs_checkpoint_clear_minor(raw_cp); + else + nilfs_checkpoint_set_minor(raw_cp); + nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1); nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); return 0; @@ -2091,6 +2099,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, segbuf->sb_sum.seg_seq, nilfs->ns_cno); + clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); } else diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index fbd162d7170..bb7d417fec6 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -185,6 +185,9 @@ enum { NILFS_SC_SUPER_ROOT, /* The latest segment has a super root */ NILFS_SC_PRIOR_FLUSH, /* Requesting immediate flush without making a checkpoint */ + NILFS_SC_HAVE_DELTA, /* Next checkpoint will have update of files + other than DAT, cpfile, sufile, or files + moved by GC */ }; /* sc_state */ -- cgit v1.2.3 From cece552074c591970353ad48308d65f110aeaf28 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:58 -0700 Subject: nilfs2: simplify handling of active state of segments will reduce some lines of segment constructor. Previously, the state was complexly controlled through a list of segments in order to keep consistency in meta data of usage state of segments. Instead, this presents ``calculated'' active flags to userland cleaner program and stop maintaining its real flag on disk. Only by this fake flag, the cleaner cannot exactly know if each segment is reclaimable or not. However, the recent extension of nilfs_sustat ioctl struct (nilfs2-extend-nilfs_sustat-ioctl-struct.patch) can prevent the cleaner from reclaiming in-use segment wrongly. So, now I can apply this for simplification. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/recovery.c | 12 +--- fs/nilfs2/segbuf.c | 24 +------ fs/nilfs2/segbuf.h | 6 +- fs/nilfs2/segment.c | 173 ++++---------------------------------------------- fs/nilfs2/segment.h | 5 +- fs/nilfs2/sufile.c | 8 ++- fs/nilfs2/super.c | 4 +- fs/nilfs2/the_nilfs.h | 5 ++ 8 files changed, 29 insertions(+), 208 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index ef387b19682..6ab4c8fc5e9 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -463,16 +463,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, nilfs_free_segment_entry(ent); } - /* - * The segment having the latest super root is active, and - * should be deactivated on the next construction for recovery. - */ - err = -ENOMEM; - ent = nilfs_alloc_segment_entry(segnum[0]); - if (unlikely(!ent)) - goto failed; - list_add_tail(&ent->list, &ri->ri_used_segments); - /* Allocate new segments for recovery */ err = nilfs_sufile_alloc(sufile, &segnum[0]); if (unlikely(err)) @@ -757,7 +747,7 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, goto failed; } - err = nilfs_attach_segment_constructor(sbi, ri); + err = nilfs_attach_segment_constructor(sbi); if (unlikely(err)) goto failed; diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 3d3ea8351f6..1e68821b4a9 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -64,27 +64,17 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb) INIT_LIST_HEAD(&segbuf->sb_list); INIT_LIST_HEAD(&segbuf->sb_segsum_buffers); INIT_LIST_HEAD(&segbuf->sb_payload_buffers); - segbuf->sb_segent = NULL; return segbuf; } void nilfs_segbuf_free(struct nilfs_segment_buffer *segbuf) { - struct nilfs_segment_entry *ent = segbuf->sb_segent; - - if (ent != NULL && list_empty(&ent->list)) { - /* free isolated segment list head */ - nilfs_free_segment_entry(segbuf->sb_segent); - segbuf->sb_segent = NULL; - } kmem_cache_free(nilfs_segbuf_cachep, segbuf); } -int nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum, +void nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum, unsigned long offset, struct the_nilfs *nilfs) { - struct nilfs_segment_entry *ent; - segbuf->sb_segnum = segnum; nilfs_get_segment_range(nilfs, segnum, &segbuf->sb_fseg_start, &segbuf->sb_fseg_end); @@ -92,18 +82,6 @@ int nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum, segbuf->sb_pseg_start = segbuf->sb_fseg_start + offset; segbuf->sb_rest_blocks = segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; - - /* Attach a segment list head */ - ent = segbuf->sb_segent; - if (ent == NULL) { - segbuf->sb_segent = nilfs_alloc_segment_entry(segnum); - if (unlikely(!segbuf->sb_segent)) - return -ENOMEM; - } else { - BUG_ON(ent->bh_su || !list_empty(&ent->list)); - ent->segnum = segnum; - } - return 0; } void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf, diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index 25f2a5faa48..0c3076f4e59 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -68,7 +68,6 @@ struct nilfs_segsum_info { * struct nilfs_segment_buffer - Segment buffer * @sb_super: back pointer to a superblock struct * @sb_list: List head to chain this structure - * @sb_segent: Pointer for attaching a segment entry * @sb_sum: On-memory segment summary * @sb_segnum: Index number of the full segment * @sb_nextnum: Index number of the next full segment @@ -83,7 +82,6 @@ struct nilfs_segsum_info { struct nilfs_segment_buffer { struct super_block *sb_super; struct list_head sb_list; - struct nilfs_segment_entry *sb_segent; /* Segment information */ struct nilfs_segsum_info sb_sum; @@ -125,8 +123,8 @@ int __init nilfs_init_segbuf_cache(void); void nilfs_destroy_segbuf_cache(void); struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *); void nilfs_segbuf_free(struct nilfs_segment_buffer *); -int nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, - struct the_nilfs *); +void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, + struct the_nilfs *); void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, struct the_nilfs *); int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 2879704509f..e43558d50e7 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1304,25 +1304,6 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) return err; } -static int nilfs_segctor_terminate_segment(struct nilfs_sc_info *sci, - struct nilfs_segment_buffer *segbuf, - struct inode *sufile) -{ - struct nilfs_segment_entry *ent = segbuf->sb_segent; - int err; - - err = nilfs_open_segment_entry(ent, sufile); - if (unlikely(err)) - return err; - nilfs_mdt_mark_buffer_dirty(ent->bh_su); - nilfs_mdt_mark_dirty(sufile); - nilfs_close_segment_entry(ent, sufile); - - list_add_tail(&ent->list, &sci->sc_active_segments); - segbuf->sb_segent = NULL; - return 0; -} - static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum) { struct buffer_head *bh_su; @@ -1342,7 +1323,6 @@ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { struct nilfs_segment_buffer *segbuf, *n; - struct inode *sufile = nilfs->ns_sufile; __u64 nextnum; int err; @@ -1354,28 +1334,22 @@ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, } else segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); - err = nilfs_segbuf_map(segbuf, nilfs->ns_segnum, - nilfs->ns_pseg_offset, nilfs); - if (unlikely(err)) - return err; + nilfs_segbuf_map(segbuf, nilfs->ns_segnum, nilfs->ns_pseg_offset, + nilfs); if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { - err = nilfs_segctor_terminate_segment(sci, segbuf, sufile); - if (unlikely(err)) - return err; - nilfs_shift_to_next_segment(nilfs); - err = nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); + nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); } sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; - err = nilfs_touch_segusage(sufile, segbuf->sb_segnum); + err = nilfs_touch_segusage(nilfs->ns_sufile, segbuf->sb_segnum); if (unlikely(err)) return err; if (nilfs->ns_segnum == nilfs->ns_nextnum) { /* Start from the head of a new full segment */ - err = nilfs_sufile_alloc(sufile, &nextnum); + err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum); if (unlikely(err)) return err; } else @@ -1390,7 +1364,7 @@ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, list_del_init(&segbuf->sb_list); nilfs_segbuf_free(segbuf); } - return err; + return 0; } static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, @@ -1421,10 +1395,7 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, goto failed; /* map this buffer to region of segment on-disk */ - err = nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); - if (unlikely(err)) - goto failed_segbuf; - + nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks; /* allocate the next next full segment */ @@ -2177,102 +2148,6 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, spin_unlock(&sbi->s_inode_lock); } -/* - * Nasty routines to manipulate active flags on sufile. - * These would be removed in a future release. - */ -static void nilfs_segctor_reactivate_segments(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs) -{ - struct nilfs_segment_buffer *segbuf, *last; - struct nilfs_segment_entry *ent, *n; - struct inode *sufile = nilfs->ns_sufile; - struct list_head *head; - - last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); - nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) { - ent = segbuf->sb_segent; - if (!ent) - break; /* ignore unmapped segments (should check it?)*/ - nilfs_segment_usage_set_active(ent->raw_su); - nilfs_close_segment_entry(ent, sufile); - } - - head = &sci->sc_active_segments; - list_for_each_entry_safe(ent, n, head, list) { - nilfs_segment_usage_set_active(ent->raw_su); - nilfs_close_segment_entry(ent, sufile); - } -} - -static int nilfs_segctor_deactivate_segments(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs) -{ - struct nilfs_segment_buffer *segbuf, *last; - struct nilfs_segment_entry *ent; - struct inode *sufile = nilfs->ns_sufile; - int err; - - last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); - nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) { - /* - * Deactivate ongoing full segments. The last segment is kept - * active because it is a start point of recovery, and is not - * relocatable until the super block points to a newer - * checkpoint. - */ - ent = segbuf->sb_segent; - if (!ent) - break; /* ignore unmapped segments (should check it?)*/ - err = nilfs_open_segment_entry(ent, sufile); - if (unlikely(err)) - goto failed; - nilfs_segment_usage_clear_active(ent->raw_su); - BUG_ON(!buffer_dirty(ent->bh_su)); - } - - list_for_each_entry(ent, &sci->sc_active_segments, list) { - err = nilfs_open_segment_entry(ent, sufile); - if (unlikely(err)) - goto failed; - nilfs_segment_usage_clear_active(ent->raw_su); - WARN_ON(!buffer_dirty(ent->bh_su)); - } - return 0; - - failed: - nilfs_segctor_reactivate_segments(sci, nilfs); - return err; -} - -static void nilfs_segctor_bead_completed_segments(struct nilfs_sc_info *sci) -{ - struct nilfs_segment_buffer *segbuf, *last; - struct nilfs_segment_entry *ent; - - /* move each segbuf->sb_segent to the list of used active segments */ - last = NILFS_LAST_SEGBUF(&sci->sc_segbufs); - nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) { - ent = segbuf->sb_segent; - if (!ent) - break; /* ignore unmapped segments (should check it?)*/ - list_add_tail(&ent->list, &sci->sc_active_segments); - segbuf->sb_segent = NULL; - } -} - -static void nilfs_segctor_commit_deactivate_segments(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs) -{ - struct nilfs_segment_entry *ent, *n; - - list_for_each_entry_safe(ent, n, &sci->sc_active_segments, list) { - list_del(&ent->list); - nilfs_close_segment_entry(ent, nilfs->ns_sufile); - nilfs_free_segment_entry(ent); - } -} - /* * Main procedure of segment constructor */ @@ -2322,11 +2197,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (unlikely(err)) goto failed; - if (has_sr) { - err = nilfs_segctor_deactivate_segments(sci, nilfs); - if (unlikely(err)) - goto failed; - } if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile); @@ -2353,12 +2223,10 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_complete_write(sci); /* Commit segments */ - nilfs_segctor_bead_completed_segments(sci); if (has_sr) { down_write(&nilfs->ns_sem); nilfs_update_last_segment(sbi, 1); up_write(&nilfs->ns_sem); - nilfs_segctor_commit_deactivate_segments(sci, nilfs); nilfs_segctor_commit_free_segments(sci); nilfs_segctor_clear_metadata_dirty(sci); } @@ -2379,8 +2247,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) failed_to_make_up: if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) nilfs_redirty_inodes(&sci->sc_dirty_files); - if (has_sr) - nilfs_segctor_reactivate_segments(sci, nilfs); failed: if (nilfs_doing_gc()) @@ -2942,23 +2808,11 @@ static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) } } -static int nilfs_segctor_init(struct nilfs_sc_info *sci, - struct nilfs_recovery_info *ri) +static int nilfs_segctor_init(struct nilfs_sc_info *sci) { - int err; - sci->sc_seq_done = sci->sc_seq_request; - if (ri) - list_splice_init(&ri->ri_used_segments, - sci->sc_active_segments.prev); - err = nilfs_segctor_start_thread(sci); - if (err) { - if (ri) - list_splice_init(&sci->sc_active_segments, - ri->ri_used_segments.prev); - } - return err; + return nilfs_segctor_start_thread(sci); } /* @@ -2982,7 +2836,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); INIT_LIST_HEAD(&sci->sc_gc_inodes); - INIT_LIST_HEAD(&sci->sc_active_segments); INIT_LIST_HEAD(&sci->sc_cleaning_segments); INIT_LIST_HEAD(&sci->sc_copied_buffers); @@ -3048,8 +2901,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) "dirty file(s) after the final construction\n"); nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); } - if (!list_empty(&sci->sc_active_segments)) - nilfs_dispose_segment_list(&sci->sc_active_segments); if (!list_empty(&sci->sc_cleaning_segments)) nilfs_dispose_segment_list(&sci->sc_cleaning_segments); @@ -3064,7 +2915,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) /** * nilfs_attach_segment_constructor - attach a segment constructor * @sbi: nilfs_sb_info - * @ri: nilfs_recovery_info * * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, * initilizes it, and starts the segment constructor. @@ -3074,8 +2924,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) * * %-ENOMEM - Insufficient memory available. */ -int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, - struct nilfs_recovery_info *ri) +int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi) { struct the_nilfs *nilfs = sbi->s_nilfs; int err; @@ -3087,7 +2936,7 @@ int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, return -ENOMEM; nilfs_attach_writer(nilfs, sbi); - err = nilfs_segctor_init(NILFS_SC(sbi), ri); + err = nilfs_segctor_init(NILFS_SC(sbi)); if (err) { nilfs_detach_writer(nilfs, sbi); kfree(sbi->s_sc_info); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index bb7d417fec6..4a64eb82f1f 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -90,7 +90,6 @@ struct nilfs_segsum_pointer { * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written - * @sc_active_segments: List of active segments that were already written out * @sc_cleaning_segments: List of segments to be freed through construction * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data * @sc_dsync_inode: inode whose data pages are written for a sync operation @@ -132,7 +131,6 @@ struct nilfs_sc_info { struct list_head sc_dirty_files; struct list_head sc_gc_inodes; - struct list_head sc_active_segments; struct list_head sc_cleaning_segments; struct list_head sc_copied_buffers; @@ -232,8 +230,7 @@ extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, __u64 *, size_t); extern void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *); -extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *, - struct nilfs_recovery_info *); +extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *); extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); /* recovery.c */ diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 4cf47e03a3a..c774cf397e2 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -158,7 +158,6 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) if (!nilfs_segment_usage_clean(su)) continue; /* found a clean segment */ - nilfs_segment_usage_set_active(su); nilfs_segment_usage_set_dirty(su); kunmap_atomic(kaddr, KM_USER0); @@ -591,6 +590,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, struct buffer_head *su_bh; struct nilfs_segment_usage *su; size_t susz = NILFS_MDT(sufile)->mi_entry_size; + struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; void *kaddr; unsigned long nsegs, segusages_per_block; ssize_t n; @@ -623,7 +623,11 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, for (j = 0; j < n; j++, su = (void *)su + susz) { si[i + j].sui_lastmod = le64_to_cpu(su->su_lastmod); si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); - si[i + j].sui_flags = le32_to_cpu(su->su_flags); + si[i + j].sui_flags = le32_to_cpu(su->su_flags) & + ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); + if (nilfs_segment_is_active(nilfs, segnum + i + j)) + si[i + j].sui_flags |= + (1UL << NILFS_SEGMENT_USAGE_ACTIVE); } kunmap_atomic(kaddr, KM_USER0); brelse(su_bh); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index b7519c327ba..ef31e9a51c8 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -868,7 +868,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, } if (!(sb->s_flags & MS_RDONLY)) { - err = nilfs_attach_segment_constructor(sbi, NULL); + err = nilfs_attach_segment_constructor(sbi); if (err) goto failed_checkpoint; } @@ -1001,7 +1001,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) nilfs_clear_opt(sbi, SNAPSHOT); sbi->s_snapshot_cno = 0; - err = nilfs_attach_segment_constructor(sbi, NULL); + err = nilfs_attach_segment_constructor(sbi); if (err) goto rw_remount_failed; diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index af566e78f7a..d750e48257c 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -280,4 +280,9 @@ static inline __u64 nilfs_last_cno(struct the_nilfs *nilfs) return cno; } +static inline int nilfs_segment_is_active(struct the_nilfs *nilfs, __u64 n) +{ + return n == nilfs->ns_segnum || n == nilfs->ns_nextnum; +} + #endif /* _THE_NILFS_H */ -- cgit v1.2.3 From e339ad31f59925b48a92ee3947692fdf9758b8c7 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:01:59 -0700 Subject: nilfs2: introduce secondary super block The former versions didn't have extra super blocks. This improves the weak point by introducing another super block at unused region in tail of the partition. This doesn't break disk format compatibility; older versions just ingore the secondary super block, and new versions just recover it if it doesn't exist. The partition created by an old mkfs may not have unused region, but in that case, the secondary super block will not be added. This doesn't make more redundant copies of the super block; it is a future work. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/nilfs.h | 7 +- fs/nilfs2/recovery.c | 1 - fs/nilfs2/segment.c | 8 +- fs/nilfs2/segment.h | 2 - fs/nilfs2/super.c | 229 +++++++++++++++++++++----------------------------- fs/nilfs2/the_nilfs.c | 180 +++++++++++++++++++++++++++++++++------ fs/nilfs2/the_nilfs.h | 18 +++- 7 files changed, 270 insertions(+), 175 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index a7f5bc724e3..19af5ab8627 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -275,13 +275,10 @@ extern void nilfs_error(struct super_block *, const char *, const char *, ...) extern void nilfs_warning(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); extern struct nilfs_super_block * -nilfs_load_super_block(struct super_block *, struct buffer_head **); -extern struct nilfs_super_block * -nilfs_reload_super_block(struct super_block *, struct buffer_head **, int); +nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); extern int nilfs_store_magic_and_option(struct super_block *, struct nilfs_super_block *, char *); -extern void nilfs_update_last_segment(struct nilfs_sb_info *, int); -extern int nilfs_commit_super(struct nilfs_sb_info *); +extern int nilfs_commit_super(struct nilfs_sb_info *, int); extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64); extern void nilfs_detach_checkpoint(struct nilfs_sb_info *); diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 6ab4c8fc5e9..6ade0963fc1 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -870,7 +870,6 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, if (scan_newer) ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED; else { - nilfs->ns_prot_seq = ssi.seg_seq; if (nilfs->ns_mount_state & NILFS_VALID_FS) goto super_root_found; scan_newer = 1; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index e43558d50e7..fb70ec3be20 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2068,7 +2068,8 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) if (update_sr) { nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, - segbuf->sb_sum.seg_seq, nilfs->ns_cno); + segbuf->sb_sum.seg_seq, nilfs->ns_cno++); + sbi->s_super->s_dirt = 1; clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); @@ -2224,9 +2225,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) /* Commit segments */ if (has_sr) { - down_write(&nilfs->ns_sem); - nilfs_update_last_segment(sbi, 1); - up_write(&nilfs->ns_sem); nilfs_segctor_commit_free_segments(sci); nilfs_segctor_clear_metadata_dirty(sci); } @@ -2564,7 +2562,7 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && nilfs_discontinued(nilfs)) { down_write(&nilfs->ns_sem); - req->sb_err = nilfs_commit_super(sbi); + req->sb_err = nilfs_commit_super(sbi, 0); up_write(&nilfs->ns_sem); } } diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 4a64eb82f1f..a98fc1ed0bb 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -206,8 +206,6 @@ enum { logical segment with a super root */ #define NILFS_SC_DEFAULT_SR_FREQ 30 /* Maximum frequency of super root creation */ -#define NILFS_SC_DEFAULT_SB_FREQ 30 /* Minimum interval of periodical - update of superblock (reserved) */ /* * The default threshold amount of data, in block counts. diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index ef31e9a51c8..e2ced824c62 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -103,8 +103,9 @@ void nilfs_error(struct super_block *sb, const char *function, down_write(&nilfs->ns_sem); if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { nilfs->ns_mount_state |= NILFS_ERROR_FS; - nilfs->ns_sbp->s_state |= cpu_to_le16(NILFS_ERROR_FS); - nilfs_commit_super(sbi); + nilfs->ns_sbp[0]->s_state |= + cpu_to_le16(NILFS_ERROR_FS); + nilfs_commit_super(sbi, 1); } up_write(&nilfs->ns_sem); @@ -208,90 +209,106 @@ static void nilfs_clear_inode(struct inode *inode) nilfs_btnode_cache_clear(&ii->i_btnode_cache); } -/** - * nilfs_update_last_segment - change pointer to the latest segment - * @sbi: nilfs_sb_info - * @update_cno: flag whether to update checkpoint number. - * - * nilfs_update_last_segment() changes information in the super block - * after a partial segment is written out successfully. The super - * block is marked dirty. It will be written out at the next VFS sync - * operations such as sync_supers() and generic_shutdown_super(). - */ -void nilfs_update_last_segment(struct nilfs_sb_info *sbi, int update_cno) -{ - struct the_nilfs *nilfs = sbi->s_nilfs; - struct nilfs_super_block *sbp = nilfs->ns_sbp; - - /* nilfs->sem must be locked by the caller. */ - spin_lock(&nilfs->ns_last_segment_lock); - if (update_cno) - nilfs->ns_last_cno = nilfs->ns_cno++; - sbp->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); - sbp->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); - sbp->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); - spin_unlock(&nilfs->ns_last_segment_lock); - - sbi->s_super->s_dirt = 1; /* must be set if delaying the call of - nilfs_commit_super() */ -} - -static int nilfs_sync_super(struct nilfs_sb_info *sbi) +static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb) { struct the_nilfs *nilfs = sbi->s_nilfs; int err; int barrier_done = 0; if (nilfs_test_opt(sbi, BARRIER)) { - set_buffer_ordered(nilfs->ns_sbh); + set_buffer_ordered(nilfs->ns_sbh[0]); barrier_done = 1; } retry: - set_buffer_dirty(nilfs->ns_sbh); - err = sync_dirty_buffer(nilfs->ns_sbh); + set_buffer_dirty(nilfs->ns_sbh[0]); + err = sync_dirty_buffer(nilfs->ns_sbh[0]); if (err == -EOPNOTSUPP && barrier_done) { nilfs_warning(sbi->s_super, __func__, "barrier-based sync failed. " "disabling barriers\n"); nilfs_clear_opt(sbi, BARRIER); barrier_done = 0; - clear_buffer_ordered(nilfs->ns_sbh); + clear_buffer_ordered(nilfs->ns_sbh[0]); goto retry; } - if (unlikely(err)) + if (unlikely(err)) { printk(KERN_ERR "NILFS: unable to write superblock (err=%d)\n", err); - else { + if (err == -EIO && nilfs->ns_sbh[1]) { + nilfs_fall_back_super_block(nilfs); + goto retry; + } + } else { + struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; + + /* + * The latest segment becomes trailable from the position + * written in superblock. + */ clear_nilfs_discontinued(nilfs); - spin_lock(&nilfs->ns_last_segment_lock); - nilfs->ns_prot_seq = le64_to_cpu(nilfs->ns_sbp->s_last_seq); - spin_unlock(&nilfs->ns_last_segment_lock); + + /* update GC protection for recent segments */ + if (nilfs->ns_sbh[1]) { + sbp = NULL; + if (dupsb) { + set_buffer_dirty(nilfs->ns_sbh[1]); + if (!sync_dirty_buffer(nilfs->ns_sbh[1])) + sbp = nilfs->ns_sbp[1]; + } + } + if (sbp) { + spin_lock(&nilfs->ns_last_segment_lock); + nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); + spin_unlock(&nilfs->ns_last_segment_lock); + } } return err; } -int nilfs_commit_super(struct nilfs_sb_info *sbi) +int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb) { struct the_nilfs *nilfs = sbi->s_nilfs; - struct nilfs_super_block *sbp = nilfs->ns_sbp; + struct nilfs_super_block **sbp = nilfs->ns_sbp; sector_t nfreeblocks; + time_t t; int err; /* nilfs->sem must be locked by the caller. */ + if (sbp[0]->s_magic != NILFS_SUPER_MAGIC) { + if (sbp[1] && sbp[1]->s_magic == NILFS_SUPER_MAGIC) + nilfs_swap_super_block(nilfs); + else { + printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", + sbi->s_super->s_id); + return -EIO; + } + } err = nilfs_count_free_blocks(nilfs, &nfreeblocks); if (unlikely(err)) { printk(KERN_ERR "NILFS: failed to count free blocks\n"); return err; } - sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks); - sbp->s_wtime = cpu_to_le64(get_seconds()); - sbp->s_sum = 0; - sbp->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, - (unsigned char *)sbp, - le16_to_cpu(sbp->s_bytes))); + spin_lock(&nilfs->ns_last_segment_lock); + sbp[0]->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); + sbp[0]->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); + sbp[0]->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); + spin_unlock(&nilfs->ns_last_segment_lock); + + t = get_seconds(); + nilfs->ns_sbwtime[0] = t; + sbp[0]->s_free_blocks_count = cpu_to_le64(nfreeblocks); + sbp[0]->s_wtime = cpu_to_le64(t); + sbp[0]->s_sum = 0; + sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, + (unsigned char *)sbp[0], + nilfs->ns_sbsize)); + if (dupsb && sbp[1]) { + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); + nilfs->ns_sbwtime[1] = t; + } sbi->s_super->s_dirt = 0; - return nilfs_sync_super(sbi); + return nilfs_sync_super(sbi, dupsb); } static void nilfs_put_super(struct super_block *sb) @@ -303,8 +320,8 @@ static void nilfs_put_super(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) { down_write(&nilfs->ns_sem); - nilfs->ns_sbp->s_state = cpu_to_le16(nilfs->ns_mount_state); - nilfs_commit_super(sbi); + nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); + nilfs_commit_super(sbi, 1); up_write(&nilfs->ns_sem); } @@ -330,7 +347,7 @@ static void nilfs_put_super(struct super_block *sb) * 2. down_write(&nilfs->ns_sem) * * Inside NILFS, locking ns_sem is enough to protect s_dirt and the buffer - * of the super block (nilfs->ns_sbp). + * of the super block (nilfs->ns_sbp[]). * * In most cases, VFS functions call lock_super() before calling these * methods. So we must be careful not to bring on deadlocks when using @@ -346,8 +363,19 @@ static void nilfs_write_super(struct super_block *sb) struct the_nilfs *nilfs = sbi->s_nilfs; down_write(&nilfs->ns_sem); - if (!(sb->s_flags & MS_RDONLY)) - nilfs_commit_super(sbi); + if (!(sb->s_flags & MS_RDONLY)) { + struct nilfs_super_block **sbp = nilfs->ns_sbp; + u64 t = get_seconds(); + int dupsb; + + if (!nilfs_discontinued(nilfs) && t >= nilfs->ns_sbwtime[0] && + t < nilfs->ns_sbwtime[0] + NILFS_SB_FREQ) { + up_write(&nilfs->ns_sem); + return; + } + dupsb = sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ; + nilfs_commit_super(sbi, dupsb); + } sb->s_dirt = 0; up_write(&nilfs->ns_sem); } @@ -436,7 +464,7 @@ static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi) down_write(&nilfs->ns_sem); if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { nilfs->ns_mount_state |= NILFS_VALID_FS; - err = nilfs_commit_super(sbi); + err = nilfs_commit_super(sbi, 1); if (likely(!err)) printk(KERN_INFO "NILFS: recovery complete.\n"); } @@ -652,7 +680,7 @@ nilfs_set_default_options(struct nilfs_sb_info *sbi, static int nilfs_setup_super(struct nilfs_sb_info *sbi) { struct the_nilfs *nilfs = sbi->s_nilfs; - struct nilfs_super_block *sbp = nilfs->ns_sbp; + struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; int max_mnt_count = le16_to_cpu(sbp->s_max_mnt_count); int mnt_count = le16_to_cpu(sbp->s_mnt_count); @@ -674,88 +702,29 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi) sbp->s_mnt_count = cpu_to_le16(mnt_count + 1); sbp->s_state = cpu_to_le16(le16_to_cpu(sbp->s_state) & ~NILFS_VALID_FS); sbp->s_mtime = cpu_to_le64(get_seconds()); - return nilfs_commit_super(sbi); + return nilfs_commit_super(sbi, 1); } -struct nilfs_super_block * -nilfs_load_super_block(struct super_block *sb, struct buffer_head **pbh) +struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, + u64 pos, int blocksize, + struct buffer_head **pbh) { - int blocksize; - unsigned long offset, sb_index; - - /* - * Adjusting block size - * Blocksize will be enlarged when it is smaller than hardware - * sector size. - * Disk format of superblock does not change. - */ - blocksize = sb_min_blocksize(sb, BLOCK_SIZE); - if (!blocksize) { - printk(KERN_ERR - "NILFS: unable to set blocksize of superblock\n"); - return NULL; - } - sb_index = NILFS_SB_OFFSET_BYTES / blocksize; - offset = NILFS_SB_OFFSET_BYTES % blocksize; + unsigned long long sb_index = pos; + unsigned long offset; + offset = do_div(sb_index, blocksize); *pbh = sb_bread(sb, sb_index); - if (!*pbh) { - printk(KERN_ERR "NILFS: unable to read superblock\n"); + if (!*pbh) return NULL; - } return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); } -struct nilfs_super_block * -nilfs_reload_super_block(struct super_block *sb, struct buffer_head **pbh, - int blocksize) -{ - struct nilfs_super_block *sbp; - unsigned long offset, sb_index; - int hw_blocksize = bdev_hardsect_size(sb->s_bdev); - - if (blocksize < hw_blocksize) { - printk(KERN_ERR - "NILFS: blocksize %d too small for device " - "(sector-size = %d).\n", - blocksize, hw_blocksize); - goto failed_sbh; - } - brelse(*pbh); - sb_set_blocksize(sb, blocksize); - - sb_index = NILFS_SB_OFFSET_BYTES / blocksize; - offset = NILFS_SB_OFFSET_BYTES % blocksize; - - *pbh = sb_bread(sb, sb_index); - if (!*pbh) { - printk(KERN_ERR - "NILFS: cannot read superblock on 2nd try.\n"); - goto failed; - } - - sbp = (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); - if (sbp->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { - printk(KERN_ERR - "NILFS: !? Magic mismatch on 2nd try.\n"); - goto failed_sbh; - } - return sbp; - - failed_sbh: - brelse(*pbh); - - failed: - return NULL; -} - int nilfs_store_magic_and_option(struct super_block *sb, struct nilfs_super_block *sbp, char *data) { struct nilfs_sb_info *sbi = NILFS_SB(sb); - /* trying to fill super (1st stage) */ sb->s_magic = le16_to_cpu(sbp->s_magic); /* FS independent flags */ @@ -763,11 +732,6 @@ int nilfs_store_magic_and_option(struct super_block *sb, sb->s_flags |= MS_NOATIME; #endif - if (sb->s_magic != NILFS_SUPER_MAGIC) { - printk("NILFS: Can't find nilfs on dev %s.\n", sb->s_id); - return -EINVAL; - } - nilfs_set_default_options(sbi, sbp); sbi->s_resuid = le16_to_cpu(sbp->s_def_resuid); @@ -775,10 +739,7 @@ int nilfs_store_magic_and_option(struct super_block *sb, sbi->s_interval = le32_to_cpu(sbp->s_c_interval); sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); - if (!parse_options(data, sb)) - return -EINVAL; - - return 0; + return !parse_options(data, sb) ? -EINVAL : 0 ; } /** @@ -967,12 +928,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) * the RDONLY flag and then mark the partition as valid again. */ down_write(&nilfs->ns_sem); - sbp = nilfs->ns_sbp; + sbp = nilfs->ns_sbp[0]; if (!(sbp->s_state & le16_to_cpu(NILFS_VALID_FS)) && (nilfs->ns_mount_state & NILFS_VALID_FS)) sbp->s_state = cpu_to_le16(nilfs->ns_mount_state); sbp->s_mtime = cpu_to_le64(get_seconds()); - nilfs_commit_super(sbi); + nilfs_commit_super(sbi, 1); up_write(&nilfs->ns_sem); } else { /* diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 661ab762d76..33400cf0bbe 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "nilfs.h" #include "segment.h" #include "alloc.h" @@ -105,7 +106,8 @@ void put_nilfs(struct the_nilfs *nilfs) } if (nilfs_init(nilfs)) { nilfs_destroy_gccache(nilfs); - brelse(nilfs->ns_sbh); + brelse(nilfs->ns_sbh[0]); + brelse(nilfs->ns_sbh[1]); } kfree(nilfs); } @@ -115,6 +117,7 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, { struct buffer_head *bh_sr; struct nilfs_super_root *raw_sr; + struct nilfs_super_block **sbp = nilfs->ns_sbp; unsigned dat_entry_size, segment_usage_size, checkpoint_size; unsigned inode_size; int err; @@ -124,9 +127,9 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, return err; down_read(&nilfs->ns_sem); - dat_entry_size = le16_to_cpu(nilfs->ns_sbp->s_dat_entry_size); - checkpoint_size = le16_to_cpu(nilfs->ns_sbp->s_checkpoint_size); - segment_usage_size = le16_to_cpu(nilfs->ns_sbp->s_segment_usage_size); + dat_entry_size = le16_to_cpu(sbp[0]->s_dat_entry_size); + checkpoint_size = le16_to_cpu(sbp[0]->s_checkpoint_size); + segment_usage_size = le16_to_cpu(sbp[0]->s_segment_usage_size); up_read(&nilfs->ns_sem); inode_size = nilfs->ns_inode_size; @@ -270,11 +273,8 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) nilfs_mdt_destroy(nilfs->ns_dat); goto failed; } - if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED) { - down_write(&nilfs->ns_sem); - nilfs_update_last_segment(sbi, 0); - up_write(&nilfs->ns_sem); - } + if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED) + sbi->s_super->s_dirt = 1; } set_nilfs_loaded(nilfs); @@ -296,9 +296,8 @@ static unsigned long long nilfs_max_size(unsigned int blkbits) return res; } -static int -nilfs_store_disk_layout(struct the_nilfs *nilfs, struct super_block *sb, - struct nilfs_super_block *sbp) +static int nilfs_store_disk_layout(struct the_nilfs *nilfs, + struct nilfs_super_block *sbp) { if (le32_to_cpu(sbp->s_rev_level) != NILFS_CURRENT_REV) { printk(KERN_ERR "NILFS: revision mismatch " @@ -309,6 +308,10 @@ nilfs_store_disk_layout(struct the_nilfs *nilfs, struct super_block *sb, NILFS_CURRENT_REV, NILFS_MINOR_REV); return -EINVAL; } + nilfs->ns_sbsize = le16_to_cpu(sbp->s_bytes); + if (nilfs->ns_sbsize > BLOCK_SIZE) + return -EINVAL; + nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); @@ -330,6 +333,122 @@ nilfs_store_disk_layout(struct the_nilfs *nilfs, struct super_block *sb, return 0; } +static int nilfs_valid_sb(struct nilfs_super_block *sbp) +{ + static unsigned char sum[4]; + const int sumoff = offsetof(struct nilfs_super_block, s_sum); + size_t bytes; + u32 crc; + + if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC) + return 0; + bytes = le16_to_cpu(sbp->s_bytes); + if (bytes > BLOCK_SIZE) + return 0; + crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp, + sumoff); + crc = crc32_le(crc, sum, 4); + crc = crc32_le(crc, (unsigned char *)sbp + sumoff + 4, + bytes - sumoff - 4); + return crc == le32_to_cpu(sbp->s_sum); +} + +static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) +{ + return offset < ((le64_to_cpu(sbp->s_nsegments) * + le32_to_cpu(sbp->s_blocks_per_segment)) << + (le32_to_cpu(sbp->s_log_block_size) + 10)); +} + +static void nilfs_release_super_block(struct the_nilfs *nilfs) +{ + int i; + + for (i = 0; i < 2; i++) { + if (nilfs->ns_sbp[i]) { + brelse(nilfs->ns_sbh[i]); + nilfs->ns_sbh[i] = NULL; + nilfs->ns_sbp[i] = NULL; + } + } +} + +void nilfs_fall_back_super_block(struct the_nilfs *nilfs) +{ + brelse(nilfs->ns_sbh[0]); + nilfs->ns_sbh[0] = nilfs->ns_sbh[1]; + nilfs->ns_sbp[0] = nilfs->ns_sbp[1]; + nilfs->ns_sbh[1] = NULL; + nilfs->ns_sbp[1] = NULL; +} + +void nilfs_swap_super_block(struct the_nilfs *nilfs) +{ + struct buffer_head *tsbh = nilfs->ns_sbh[0]; + struct nilfs_super_block *tsbp = nilfs->ns_sbp[0]; + + nilfs->ns_sbh[0] = nilfs->ns_sbh[1]; + nilfs->ns_sbp[0] = nilfs->ns_sbp[1]; + nilfs->ns_sbh[1] = tsbh; + nilfs->ns_sbp[1] = tsbp; +} + +static int nilfs_load_super_block(struct the_nilfs *nilfs, + struct super_block *sb, int blocksize, + struct nilfs_super_block **sbpp) +{ + struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct buffer_head **sbh = nilfs->ns_sbh; + u64 sb2off = NILFS_SB2_OFFSET_BYTES(nilfs->ns_bdev->bd_inode->i_size); + int valid[2], swp = 0; + + sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize, + &sbh[0]); + sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]); + + if (!sbp[0]) { + if (!sbp[1]) { + printk(KERN_ERR "NILFS: unable to read superblock\n"); + return -EIO; + } + printk(KERN_WARNING + "NILFS warning: unable to read primary superblock\n"); + } else if (!sbp[1]) + printk(KERN_WARNING + "NILFS warning: unable to read secondary superblock\n"); + + valid[0] = nilfs_valid_sb(sbp[0]); + valid[1] = nilfs_valid_sb(sbp[1]); + swp = valid[1] && + (!valid[0] || + le64_to_cpu(sbp[1]->s_wtime) > le64_to_cpu(sbp[0]->s_wtime)); + + if (valid[swp] && nilfs_sb2_bad_offset(sbp[swp], sb2off)) { + brelse(sbh[1]); + sbh[1] = NULL; + sbp[1] = NULL; + swp = 0; + } + if (!valid[swp]) { + nilfs_release_super_block(nilfs); + printk(KERN_ERR "NILFS: Can't find nilfs on dev %s.\n", + sb->s_id); + return -EINVAL; + } + + if (swp) { + printk(KERN_WARNING "NILFS warning: broken superblock. " + "using spare superblock.\n"); + nilfs_swap_super_block(nilfs); + } + + nilfs->ns_sbwtime[0] = le64_to_cpu(sbp[0]->s_wtime); + nilfs->ns_sbwtime[1] = valid[!swp] ? le64_to_cpu(sbp[1]->s_wtime) : 0; + nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq); + *sbpp = sbp[0]; + return 0; +} + /** * init_nilfs - initialize a NILFS instance. * @nilfs: the_nilfs structure @@ -352,16 +471,15 @@ nilfs_store_disk_layout(struct the_nilfs *nilfs, struct super_block *sb, int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) { struct super_block *sb = sbi->s_super; - struct buffer_head *sbh; struct nilfs_super_block *sbp; struct backing_dev_info *bdi; int blocksize; - int err = 0; + int err; down_write(&nilfs->ns_sem); if (nilfs_init(nilfs)) { /* Load values from existing the_nilfs */ - sbp = nilfs->ns_sbp; + sbp = nilfs->ns_sbp[0]; err = nilfs_store_magic_and_option(sb, sbp, data); if (err) goto out; @@ -377,36 +495,50 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) goto out; } - sbp = nilfs_load_super_block(sb, &sbh); - if (!sbp) { + blocksize = sb_min_blocksize(sb, BLOCK_SIZE); + if (!blocksize) { + printk(KERN_ERR "NILFS: unable to set blocksize\n"); err = -EINVAL; goto out; } + err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); + if (err) + goto out; + err = nilfs_store_magic_and_option(sb, sbp, data); if (err) goto failed_sbh; blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); if (sb->s_blocksize != blocksize) { - sbp = nilfs_reload_super_block(sb, &sbh, blocksize); - if (!sbp) { + int hw_blocksize = bdev_hardsect_size(sb->s_bdev); + + if (blocksize < hw_blocksize) { + printk(KERN_ERR + "NILFS: blocksize %d too small for device " + "(sector-size = %d).\n", + blocksize, hw_blocksize); err = -EINVAL; + goto failed_sbh; + } + nilfs_release_super_block(nilfs); + sb_set_blocksize(sb, blocksize); + + err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); + if (err) goto out; /* not failed_sbh; sbh is released automatically when reloading fails. */ - } } nilfs->ns_blocksize_bits = sb->s_blocksize_bits; - err = nilfs_store_disk_layout(nilfs, sb, sbp); + err = nilfs_store_disk_layout(nilfs, sbp); if (err) goto failed_sbh; sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); - nilfs->ns_sbh = sbh; - nilfs->ns_sbp = sbp; bdi = nilfs->ns_bdev->bd_inode_backing_dev_info; if (!bdi) @@ -443,7 +575,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) return err; failed_sbh: - brelse(sbh); + nilfs_release_super_block(nilfs); goto out; } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index d750e48257c..30fe58778d0 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -49,8 +49,10 @@ enum { * @ns_sem: semaphore for shared states * @ns_writer_mutex: mutex protecting ns_writer attach/detach * @ns_writer_refcount: number of referrers on ns_writer - * @ns_sbh: buffer head of the on-disk super block - * @ns_sbp: pointer to the super block data + * @ns_sbh: buffer heads of on-disk super blocks + * @ns_sbp: pointers to super block data + * @ns_sbwtime: previous write time of super blocks + * @ns_sbsize: size of valid data in super block * @ns_supers: list of nilfs super block structs * @ns_seg_seq: segment sequence counter * @ns_segnum: index number of the latest full segment. @@ -101,8 +103,10 @@ struct the_nilfs { * - protecting s_dirt in the super_block struct * (see nilfs_write_super) and the following fields. */ - struct buffer_head *ns_sbh; - struct nilfs_super_block *ns_sbp; + struct buffer_head *ns_sbh[2]; + struct nilfs_super_block *ns_sbp[2]; + time_t ns_sbwtime[2]; + unsigned ns_sbsize; unsigned ns_mount_state; struct list_head ns_supers; @@ -182,6 +186,10 @@ THE_NILFS_FNS(INIT, init) THE_NILFS_FNS(LOADED, loaded) THE_NILFS_FNS(DISCONTINUED, discontinued) +/* Minimum interval of periodical update of superblocks (in seconds) */ +#define NILFS_SB_FREQ 10 +#define NILFS_ALTSB_FREQ 60 /* spare superblock */ + void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); struct the_nilfs *alloc_nilfs(struct block_device *); void put_nilfs(struct the_nilfs *); @@ -190,6 +198,8 @@ int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); int nilfs_near_disk_full(struct the_nilfs *); +void nilfs_fall_back_super_block(struct the_nilfs *); +void nilfs_swap_super_block(struct the_nilfs *); static inline void get_nilfs(struct the_nilfs *nilfs) -- cgit v1.2.3 From 612392307cb09e49051225092cbbd7049bd8db93 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 6 Apr 2009 19:02:00 -0700 Subject: nilfs2: support nanosecond timestamp After a review of user's feedback for finding out other compatibility issues, I found nilfs improperly initializes timestamps in inode; CURRENT_TIME was used there instead of CURRENT_TIME_SEC even though nilfs didn't have nanosecond timestamps on disk. A few users gave us the report that the tar program sometimes failed to expand symbolic links on nilfs, and it turned out to be the cause. Instead of applying the above displacement, I've decided to support nanosecond timestamps on this occation. Fortunetaly, a needless 64-bit field was in the nilfs_inode struct, and I found it's available for this purpose without impact for the users. So, this will do the enhancement and resolve the tar problem. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/gcinode.c | 1 - fs/nilfs2/inode.c | 13 ++++++------- fs/nilfs2/nilfs.h | 1 - fs/nilfs2/super.c | 1 + 4 files changed, 7 insertions(+), 9 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 77615aabc7e..19d2102b6a6 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -226,7 +226,6 @@ static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino, ii->i_flags = 0; ii->i_state = 1 << NILFS_I_GCINODE; ii->i_bh = NULL; - ii->i_dtime = 0; nilfs_bmap_init_gc(ii->i_bmap); return inode; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index a1922b17662..49ab4a49bb4 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -306,7 +306,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) /* ii->i_file_acl = 0; */ /* ii->i_dir_acl = 0; */ - ii->i_dtime = 0; ii->i_dir_start_lookup = 0; #ifdef CONFIG_NILFS_FS_POSIX_ACL ii->i_acl = NULL; @@ -390,11 +389,10 @@ int nilfs_read_inode_common(struct inode *inode, inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); - inode->i_atime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; - inode->i_mtime.tv_nsec = 0; - ii->i_dtime = le64_to_cpu(raw_inode->i_dtime); - if (inode->i_nlink == 0 && (inode->i_mode == 0 || ii->i_dtime)) + inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); + inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + if (inode->i_nlink == 0 && inode->i_mode == 0) return -EINVAL; /* this inode is deleted */ inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); @@ -505,9 +503,10 @@ void nilfs_write_inode_common(struct inode *inode, raw_inode->i_size = cpu_to_le64(inode->i_size); raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); + raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); - raw_inode->i_dtime = cpu_to_le64(ii->i_dtime); raw_inode->i_flags = cpu_to_le32(ii->i_flags); raw_inode->i_generation = cpu_to_le32(inode->i_generation); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 19af5ab8627..7558c977db0 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -48,7 +48,6 @@ struct nilfs_inode_info { struct nilfs_bmap *i_bmap; union nilfs_bmap_union i_bmap_union; __u64 i_xattr; /* sector_t ??? */ - __u32 i_dtime; __u32 i_dir_start_lookup; __u64 i_cno; /* check point number for GC inode */ struct address_space i_btnode_cache; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index e2ced824c62..e117e1ea9bf 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -792,6 +792,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, sb->s_op = &nilfs_sops; sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; + sb->s_time_gran = 1; if (!nilfs_loaded(nilfs)) { err = load_nilfs(nilfs, sbi); -- cgit v1.2.3 From c306af23e19d3c94c9229263c39fe487e915e774 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 26 Mar 2009 10:16:57 +0900 Subject: nilfs2: return f_fsid for statfs2 This follows the change of Coly Li's series ("fs: return f_fsid for statfs(2)"), and make nilfs2 return f_fsid info for statfs(2). Acked-by: Coly Li Signed-off-by: Ryusuke Konishi --- fs/nilfs2/super.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index e117e1ea9bf..8a965f9523a 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -476,11 +476,12 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sbi->s_nilfs; + u64 id = huge_encode_dev(sb->s_bdev->bd_dev); unsigned long long blocks; unsigned long overhead; unsigned long nrsvblocks; sector_t nfreeblocks; - struct the_nilfs *nilfs = sbi->s_nilfs; int err; /* @@ -514,6 +515,9 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = atomic_read(&sbi->s_inodes_count); buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ buf->f_namelen = NILFS_NAME_LEN; + buf->f_fsid.val[0] = (u32)id; + buf->f_fsid.val[1] = (u32)(id >> 32); + return 0; } -- cgit v1.2.3 From bcb48891b05b4179edc86298d3dccb2ce90d5413 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 27 Mar 2009 02:51:39 +0900 Subject: nilfs2: fix lockdep recursive locking warning on bmap The bmap semaphore of DAT file can be held while a bmap of other files is locked. This has caused the following false detection of lockdep check: mount.nilfs2/4667 is trying to acquire lock: (&bmap->b_sem){..--}, at: [] nilfs_bmap_lookup_at_level+0x1a/0x74 [nilfs2] but task is already holding lock: (&bmap->b_sem){..--}, at: [] nilfs_bmap_lookup_at_level+0x1a/0x74 [nilfs2] This will fix the false detection by distinguishing semaphores of the DAT and other files. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/bmap.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 24638e059bf..064279e33bb 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -688,6 +688,8 @@ static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = { .bpop_translate = NULL, }; +static struct lock_class_key nilfs_bmap_dat_lock_key; + /** * nilfs_bmap_read - read a bmap from an inode * @bmap: bmap @@ -715,6 +717,7 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) bmap->b_pops = &nilfs_bmap_ptr_ops_p; bmap->b_last_allocated_key = 0; /* XXX: use macro */ bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; + lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); break; case NILFS_CPFILE_INO: case NILFS_SUFILE_INO: @@ -772,6 +775,7 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) { memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union)); init_rwsem(&gcbmap->b_sem); + lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; } @@ -779,5 +783,6 @@ void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) { memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union)); init_rwsem(&bmap->b_sem); + lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; } -- cgit v1.2.3 From c2698e50e304cd29a7836f05452359a3306a405e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 27 Mar 2009 02:53:12 +0900 Subject: nilfs2: fix lockdep recursive locking warning on meta data files This fixes the following false detection of lockdep against nilfs meta data files: ============================================= [ INFO: possible recursive locking detected ] 2.6.29 #26 --------------------------------------------- mount.nilfs2/4185 is trying to acquire lock: (&mi->mi_sem){----}, at: [] nilfs_sufile_get_stat+0x1e/0x105 [nilfs2] but task is already holding lock: (&mi->mi_sem){----}, at: [] nilfs_count_free_blocks+0x48/0x84 [nilfs2] Signed-off-by: Ryusuke Konishi --- fs/nilfs2/the_nilfs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 33400cf0bbe..7f65b3be4aa 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -115,6 +115,7 @@ void put_nilfs(struct the_nilfs *nilfs) static int nilfs_load_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, sector_t sr_block) { + static struct lock_class_key dat_lock_key; struct buffer_head *bh_sr; struct nilfs_super_root *raw_sr; struct nilfs_super_block **sbp = nilfs->ns_sbp; @@ -163,6 +164,9 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, if (unlikely(err)) goto failed_sufile; + lockdep_set_class(&NILFS_MDT(nilfs->ns_dat)->mi_sem, &dat_lock_key); + lockdep_set_class(&NILFS_MDT(nilfs->ns_gc_dat)->mi_sem, &dat_lock_key); + nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size, sizeof(struct nilfs_cpfile_header)); -- cgit v1.2.3 From e7a7402c0d392dcadc74cae8922f8fae4667605a Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 27 Mar 2009 10:49:11 +0900 Subject: nilfs2: remove module version A MODULE_VERSION() macro has been used in out-of-tree nilfs modules, but it's needless and not updated in tree. So, this removes it along with the version declaration. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/nilfs.h | 5 ----- fs/nilfs2/super.c | 1 - 2 files changed, 6 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 7558c977db0..3d0c18a16db 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -34,11 +34,6 @@ #include "bmap.h" #include "bmap_union.h" -/* - * NILFS filesystem version - */ -#define NILFS_VERSION "2.0.5" - /* * nilfs inode data in memory */ diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 8a965f9523a..6989b03e97a 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -63,7 +63,6 @@ MODULE_AUTHOR("NTT Corp."); MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " "(NILFS)"); -MODULE_VERSION(NILFS_VERSION); MODULE_LICENSE("GPL"); static int nilfs_remount(struct super_block *sb, int *flags, char *data); -- cgit v1.2.3 From 3efb55b496952e0d29a9ec66d0ceaab175c4e8ca Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 30 Mar 2009 00:50:19 +0900 Subject: nilfs2: simplify handling of active state of segments fix This fixes a bug of ("nilfs2: simplify handling of active state of segments") patch. The patch did not take account that a base index is increased in nilfs_sufile_get_suinfo() function if requested entries go across block boundary on sufile. Due to this bug, the active flag sometimes appears on wrong segments and has induced malfunction of garbage collection. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/sufile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index c774cf397e2..1ef2b4d9d79 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -625,7 +625,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); si[i + j].sui_flags = le32_to_cpu(su->su_flags) & ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); - if (nilfs_segment_is_active(nilfs, segnum + i + j)) + if (nilfs_segment_is_active(nilfs, segnum + j)) si[i + j].sui_flags |= (1UL << NILFS_SEGMENT_USAGE_ACTIVE); } -- cgit v1.2.3 From 88072faf9a32c92f37c15065496bb6eb309aebe3 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 5 Apr 2009 15:03:16 +0900 Subject: nilfs2: fix wrong accounting and duplicate brelse in nilfs_sufile_set_error The nilfs_sufile_set_error() function wrongly adjusts the number of dirty segments instead of the number of clean segments. In addition, the function calls brelse() twice for the same buffer head. This fixes these bugs. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/sufile.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 1ef2b4d9d79..8b2f93ca1e1 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -523,7 +523,7 @@ int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) struct nilfs_segment_usage *su; struct nilfs_sufile_header *header; void *kaddr; - int ret; + int suclean, ret; if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { printk(KERN_WARNING "%s: invalid segment number: %llu\n", @@ -546,16 +546,19 @@ int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) brelse(su_bh); goto out_header; } + suclean = nilfs_segment_usage_clean(su); nilfs_segment_usage_set_error(su); kunmap_atomic(kaddr, KM_USER0); - brelse(su_bh); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); - le64_add_cpu(&header->sh_ndirtysegs, -1); - kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(header_bh); + if (suclean) { + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = nilfs_sufile_block_get_header(sufile, header_bh, + kaddr); + le64_add_cpu(&header->sh_ncleansegs, -1); + kunmap_atomic(kaddr, KM_USER0); + nilfs_mdt_mark_buffer_dirty(header_bh); + } nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); brelse(su_bh); -- cgit v1.2.3 From a703018f7bbec8109419318f5d51f235fdce5155 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 5 Apr 2009 18:24:11 +0900 Subject: nilfs2: segment usage file cleanups This will simplify sufile.c by sharing common code which repeatedly appears in routines updating a segment usage entry; a wrapper function nilfs_sufile_update() is introduced for the purpose, and counter modifications are integrated to a new function nilfs_sufile_mod_counter(). This is a preparation for the successive bugfix patch ("nilfs2: fix possible mismatch of sufile counters on recovery"). Signed-off-by: Ryusuke Konishi --- fs/nilfs2/sufile.c | 268 +++++++++++++++-------------------------------------- fs/nilfs2/sufile.h | 67 +++++++++++++- 2 files changed, 140 insertions(+), 195 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 8b2f93ca1e1..07013f58dfe 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -93,6 +93,52 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, create, NULL, bhp); } +static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, + u64 ncleanadd, u64 ndirtyadd) +{ + struct nilfs_sufile_header *header; + void *kaddr; + + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = kaddr + bh_offset(header_bh); + le64_add_cpu(&header->sh_ncleansegs, ncleanadd); + le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); + kunmap_atomic(kaddr, KM_USER0); + + nilfs_mdt_mark_buffer_dirty(header_bh); +} + +int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, + void (*dofunc)(struct inode *, __u64, + struct buffer_head *, + struct buffer_head *)) +{ + struct buffer_head *header_bh, *bh; + int ret; + + if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { + printk(KERN_WARNING "%s: invalid segment number: %llu\n", + __func__, (unsigned long long)segnum); + return -EINVAL; + } + down_write(&NILFS_MDT(sufile)->mi_sem); + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out_sem; + + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, create, &bh); + if (!ret) { + dofunc(sufile, segnum, header_bh, bh); + brelse(bh); + } + brelse(header_bh); + + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + /** * nilfs_sufile_alloc - allocate a segment * @sufile: inode of segment usage file @@ -113,7 +159,6 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) { struct buffer_head *header_bh, *su_bh; - struct the_nilfs *nilfs; struct nilfs_sufile_header *header; struct nilfs_segment_usage *su; size_t susz = NILFS_MDT(sufile)->mi_entry_size; @@ -124,8 +169,6 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) down_write(&NILFS_MDT(sufile)->mi_sem); - nilfs = NILFS_MDT(sufile)->mi_nilfs; - ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; @@ -192,165 +235,55 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) return ret; } -/** - * nilfs_sufile_cancel_free - - * @sufile: inode of segment usage file - * @segnum: segment number - * - * Description: - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - */ -int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) +void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, + struct buffer_head *header_bh, + struct buffer_head *su_bh) { - struct buffer_head *header_bh, *su_bh; - struct the_nilfs *nilfs; - struct nilfs_sufile_header *header; struct nilfs_segment_usage *su; void *kaddr; - int ret; - - down_write(&NILFS_MDT(sufile)->mi_sem); - - nilfs = NILFS_MDT(sufile)->mi_nilfs; - - ret = nilfs_sufile_get_header_block(sufile, &header_bh); - if (ret < 0) - goto out_sem; - - ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); - if (ret < 0) - goto out_header; kaddr = kmap_atomic(su_bh->b_page, KM_USER0); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum, su_bh, kaddr); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (unlikely(!nilfs_segment_usage_clean(su))) { printk(KERN_WARNING "%s: segment %llu must be clean\n", __func__, (unsigned long long)segnum); kunmap_atomic(kaddr, KM_USER0); - goto out_su_bh; + return; } nilfs_segment_usage_set_dirty(su); kunmap_atomic(kaddr, KM_USER0); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); - le64_add_cpu(&header->sh_ncleansegs, -1); - le64_add_cpu(&header->sh_ndirtysegs, 1); - kunmap_atomic(kaddr, KM_USER0); - - nilfs_mdt_mark_buffer_dirty(header_bh); + nilfs_sufile_mod_counter(header_bh, -1, 1); nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); - - out_su_bh: - brelse(su_bh); - out_header: - brelse(header_bh); - out_sem: - up_write(&NILFS_MDT(sufile)->mi_sem); - return ret; } -/** - * nilfs_sufile_freev - free segments - * @sufile: inode of segment usage file - * @segnum: array of segment numbers - * @nsegs: number of segments - * - * Description: nilfs_sufile_freev() frees segments specified by @segnum and - * @nsegs, which must have been returned by a previous call to - * nilfs_sufile_alloc(). - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - */ -#define NILFS_SUFILE_FREEV_PREALLOC 16 -int nilfs_sufile_freev(struct inode *sufile, __u64 *segnum, size_t nsegs) +void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, + struct buffer_head *header_bh, + struct buffer_head *su_bh) { - struct buffer_head *header_bh, **su_bh, - *su_bh_prealloc[NILFS_SUFILE_FREEV_PREALLOC]; - struct the_nilfs *nilfs; - struct nilfs_sufile_header *header; struct nilfs_segment_usage *su; void *kaddr; - int ret, i; + int sudirty; - down_write(&NILFS_MDT(sufile)->mi_sem); - - nilfs = NILFS_MDT(sufile)->mi_nilfs; - - /* prepare resources */ - if (nsegs <= NILFS_SUFILE_FREEV_PREALLOC) - su_bh = su_bh_prealloc; - else { - su_bh = kmalloc(sizeof(*su_bh) * nsegs, GFP_NOFS); - if (su_bh == NULL) { - ret = -ENOMEM; - goto out_sem; - } - } - - ret = nilfs_sufile_get_header_block(sufile, &header_bh); - if (ret < 0) - goto out_su_bh; - for (i = 0; i < nsegs; i++) { - ret = nilfs_sufile_get_segment_usage_block(sufile, segnum[i], - 0, &su_bh[i]); - if (ret < 0) - goto out_bh; - } - - /* free segments */ - for (i = 0; i < nsegs; i++) { - kaddr = kmap_atomic(su_bh[i]->b_page, KM_USER0); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum[i], su_bh[i], kaddr); - WARN_ON(nilfs_segment_usage_error(su)); - nilfs_segment_usage_set_clean(su); + kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + if (nilfs_segment_usage_clean(su)) { + printk(KERN_WARNING "%s: segment %llu is already clean\n", + __func__, (unsigned long long)segnum); kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(su_bh[i]); + return; } - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); - le64_add_cpu(&header->sh_ncleansegs, nsegs); - le64_add_cpu(&header->sh_ndirtysegs, -(u64)nsegs); - kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(header_bh); - nilfs_mdt_mark_dirty(sufile); - - out_bh: - for (i--; i >= 0; i--) - brelse(su_bh[i]); - brelse(header_bh); + WARN_ON(nilfs_segment_usage_error(su)); + WARN_ON(!nilfs_segment_usage_dirty(su)); - out_su_bh: - if (su_bh != su_bh_prealloc) - kfree(su_bh); - - out_sem: - up_write(&NILFS_MDT(sufile)->mi_sem); - return ret; -} + sudirty = nilfs_segment_usage_dirty(su); + nilfs_segment_usage_set_clean(su); + kunmap_atomic(kaddr, KM_USER0); + nilfs_mdt_mark_buffer_dirty(su_bh); -/** - * nilfs_sufile_free - - * @sufile: - * @segnum: - */ -int nilfs_sufile_free(struct inode *sufile, __u64 segnum) -{ - return nilfs_sufile_freev(sufile, &segnum, 1); + nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); + nilfs_mdt_mark_dirty(sufile); } /** @@ -500,75 +433,28 @@ int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp) return ret; } -/** - * nilfs_sufile_set_error - mark a segment as erroneous - * @sufile: inode of segment usage file - * @segnum: segment number - * - * Description: nilfs_sufile_set_error() marks the segment specified by - * @segnum as erroneous. The error segment will never be used again. - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid segment usage number. - */ -int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) +void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, + struct buffer_head *header_bh, + struct buffer_head *su_bh) { - struct buffer_head *header_bh, *su_bh; struct nilfs_segment_usage *su; - struct nilfs_sufile_header *header; void *kaddr; - int suclean, ret; - - if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { - printk(KERN_WARNING "%s: invalid segment number: %llu\n", - __func__, (unsigned long long)segnum); - return -EINVAL; - } - down_write(&NILFS_MDT(sufile)->mi_sem); - - ret = nilfs_sufile_get_header_block(sufile, &header_bh); - if (ret < 0) - goto out_sem; - ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); - if (ret < 0) - goto out_header; + int suclean; kaddr = kmap_atomic(su_bh->b_page, KM_USER0); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_error(su)) { kunmap_atomic(kaddr, KM_USER0); - brelse(su_bh); - goto out_header; + return; } suclean = nilfs_segment_usage_clean(su); - nilfs_segment_usage_set_error(su); kunmap_atomic(kaddr, KM_USER0); - if (suclean) { - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header(sufile, header_bh, - kaddr); - le64_add_cpu(&header->sh_ncleansegs, -1); - kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(header_bh); - } + if (suclean) + nilfs_sufile_mod_counter(header_bh, -1, 0); nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); - brelse(su_bh); - - out_header: - brelse(header_bh); - - out_sem: - up_write(&NILFS_MDT(sufile)->mi_sem); - return ret; } /** diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index d595f33a768..449a6e2671b 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -36,9 +36,6 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) } int nilfs_sufile_alloc(struct inode *, __u64 *); -int nilfs_sufile_cancel_free(struct inode *, __u64); -int nilfs_sufile_freev(struct inode *, __u64 *, size_t); -int nilfs_sufile_free(struct inode *, __u64); int nilfs_sufile_get_segment_usage(struct inode *, __u64, struct nilfs_segment_usage **, struct buffer_head **); @@ -46,9 +43,71 @@ void nilfs_sufile_put_segment_usage(struct inode *, __u64, struct buffer_head *); int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); -int nilfs_sufile_set_error(struct inode *, __u64); ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *, size_t); +int nilfs_sufile_update(struct inode *, __u64, int, + void (*dofunc)(struct inode *, __u64, + struct buffer_head *, + struct buffer_head *)); +void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, + struct buffer_head *); +void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *, + struct buffer_head *); +void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, + struct buffer_head *); + +/** + * nilfs_sufile_cancel_free - + * @sufile: inode of segment usage file + * @segnum: segment number + * + * Description: + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +static inline int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) +{ + return nilfs_sufile_update(sufile, segnum, 0, + nilfs_sufile_do_cancel_free); +} + +/** + * nilfs_sufile_free - free segment + * @sufile: inode of segment usage file + * @segnum: segment number to be freed + */ +static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum) +{ + return nilfs_sufile_update(sufile, segnum, 0, nilfs_sufile_do_free); +} + +/** + * nilfs_sufile_set_error - mark a segment as erroneous + * @sufile: inode of segment usage file + * @segnum: segment number + * + * Description: nilfs_sufile_set_error() marks the segment specified by + * @segnum as erroneous. The error segment will never be used again. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - Invalid segment usage number. + */ +static inline int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) +{ + return nilfs_sufile_update(sufile, segnum, 0, + nilfs_sufile_do_set_error); +} #endif /* _NILFS_SUFILE_H */ -- cgit v1.2.3 From c85399c2da8b86de8f6877980294fa1a4a88a5a4 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 5 Apr 2009 18:30:58 +0900 Subject: nilfs2: fix possible mismatch of sufile counters on recovery On-disk counters ndirtysegs and ncleansegs of sufile, can go wrong after roll-forward recovery because nilfs_prepare_segment_for_recovery() function marks segments dirty without adjusting value of these counters. This fixes the problem by adding a function to sufile which does the operation adjusting the counters, and by letting the recovery function use it. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/recovery.c | 20 ++++---------------- fs/nilfs2/sufile.c | 29 +++++++++++++++++++++++++++++ fs/nilfs2/sufile.h | 12 ++++++++++++ 3 files changed, 45 insertions(+), 16 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 6ade0963fc1..4fc081e47d7 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -413,7 +413,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, struct nilfs_segment_entry *ent, *n; struct inode *sufile = nilfs->ns_sufile; __u64 segnum[4]; - time_t mtime; int err; int i; @@ -442,24 +441,13 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, * Collecting segments written after the latest super root. * These are marked dirty to avoid being reallocated in the next write. */ - mtime = get_seconds(); list_for_each_entry_safe(ent, n, head, list) { - if (ent->segnum == segnum[0]) { - list_del(&ent->list); - nilfs_free_segment_entry(ent); - continue; - } - err = nilfs_open_segment_entry(ent, sufile); - if (unlikely(err)) - goto failed; - if (!nilfs_segment_usage_dirty(ent->raw_su)) { - /* make the segment garbage */ - ent->raw_su->su_nblocks = cpu_to_le32(0); - ent->raw_su->su_lastmod = cpu_to_le32(mtime); - nilfs_segment_usage_set_dirty(ent->raw_su); + if (ent->segnum != segnum[0]) { + err = nilfs_sufile_scrap(sufile, ent->segnum); + if (unlikely(err)) + goto failed; } list_del(&ent->list); - nilfs_close_segment_entry(ent, sufile); nilfs_free_segment_entry(ent); } diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 07013f58dfe..98e68677f04 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -258,6 +258,35 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, nilfs_mdt_mark_dirty(sufile); } +void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, + struct buffer_head *header_bh, + struct buffer_head *su_bh) +{ + struct nilfs_segment_usage *su; + void *kaddr; + int clean, dirty; + + kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) && + su->su_nblocks == cpu_to_le32(0)) { + kunmap_atomic(kaddr, KM_USER0); + return; + } + clean = nilfs_segment_usage_clean(su); + dirty = nilfs_segment_usage_dirty(su); + + /* make the segment garbage */ + su->su_lastmod = cpu_to_le64(0); + su->su_nblocks = cpu_to_le32(0); + su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY); + kunmap_atomic(kaddr, KM_USER0); + + nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); + nilfs_mdt_mark_buffer_dirty(su_bh); + nilfs_mdt_mark_dirty(sufile); +} + void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, struct buffer_head *header_bh, struct buffer_head *su_bh) diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 449a6e2671b..a2e2efd4ade 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -52,6 +52,8 @@ int nilfs_sufile_update(struct inode *, __u64, int, struct buffer_head *)); void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, struct buffer_head *); +void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *, + struct buffer_head *); void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *, struct buffer_head *); void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, @@ -77,6 +79,16 @@ static inline int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) nilfs_sufile_do_cancel_free); } +/** + * nilfs_sufile_scrap - make a segment garbage + * @sufile: inode of segment usage file + * @segnum: segment number to be freed + */ +static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum) +{ + return nilfs_sufile_update(sufile, segnum, 1, nilfs_sufile_do_scrap); +} + /** * nilfs_sufile_free - free segment * @sufile: inode of segment usage file -- cgit v1.2.3