From fd98496f467b3d26d05ab1498f41718b5ef13de5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 5 Jan 2009 21:34:13 -0500 Subject: jbd2: Add barrier not supported test to journal_wait_on_commit_record Xen doesn't report that barriers are not supported until buffer I/O is reported as completed, instead of when the buffer I/O is submitted. Add a check and a fallback codepath to journal_wait_on_commit_record() to detect this case, so that attempts to mount ext4 filesystems on LVM/devicemapper devices on Xen guests don't blow up with an "Aborting journal on device XXX"; "Remounting filesystem read-only" error. Thanks to Andreas Sundstrom for reporting this issue. Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/jbd2/commit.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'fs/jbd2') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index ebc667bc54a..6393fd0d804 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -25,6 +25,7 @@ #include #include #include +#include /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -168,12 +169,34 @@ static int journal_submit_commit_record(journal_t *journal, * This function along with journal_submit_commit_record * allows to write the commit record asynchronously. */ -static int journal_wait_on_commit_record(struct buffer_head *bh) +static int journal_wait_on_commit_record(journal_t *journal, + struct buffer_head *bh) { int ret = 0; +retry: clear_buffer_dirty(bh); wait_on_buffer(bh); + if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { + printk(KERN_WARNING + "JBD2: wait_on_commit_record: sync failed on %s - " + "disabling barriers\n", journal->j_devname); + spin_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_BARRIER; + spin_unlock(&journal->j_state_lock); + + lock_buffer(bh); + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + bh->b_end_io = journal_end_buffer_io_sync; + + ret = submit_bh(WRITE_SYNC, bh); + if (ret) { + unlock_buffer(bh); + return ret; + } + goto retry; + } if (unlikely(!buffer_uptodate(bh))) ret = -EIO; @@ -799,7 +822,7 @@ wait_for_iobuf: __jbd2_journal_abort_hard(journal); } if (!err && !is_journal_aborted(journal)) - err = journal_wait_on_commit_record(cbh); + err = journal_wait_on_commit_record(journal, cbh); if (err) jbd2_journal_abort(journal, err); -- cgit v1.2.3 From e07f7183a486cf9783d1f8c9d2997b5b39eeb2d4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 26 Nov 2008 01:14:26 -0500 Subject: jbd2: improve jbd2 fsync batching This patch removes the static sleep time in favor of a more self optimizing approach where we measure the average amount of time it takes to commit a transaction to disk and the ammount of time a transaction has been running. If somebody does a sync write or an fsync() traditionally we would sleep for 1 jiffies, which depending on the value of HZ could be a significant amount of time compared to how long it takes to commit a transaction to the underlying storage. With this patch instead of sleeping for a jiffie, we check to see if the amount of time this transaction has been running is less than the average commit time, and if it is we sleep for the delta using schedule_hrtimeout to give us a higher precision sleep time. This greatly benefits high end storage where you could end up sleeping for longer than it takes to commit the transaction and therefore sitting idle instead of allowing the transaction to be committed by keeping the sleep time to a minimum so you are sure to always be doing something. Signed-off-by: Josef Bacik Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 14 +++++++++++++ fs/jbd2/transaction.c | 58 ++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 58 insertions(+), 14 deletions(-) (limited to 'fs/jbd2') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6393fd0d804..f22d1828ea8 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -355,6 +355,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) int flags; int err; unsigned long long blocknr; + ktime_t start_time; + u64 commit_time; char *tagp = NULL; journal_header_t *header; journal_block_tag_t *tag = NULL; @@ -481,6 +483,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction->t_state = T_FLUSH; journal->j_committing_transaction = commit_transaction; journal->j_running_transaction = NULL; + start_time = ktime_get(); commit_transaction->t_log_start = journal->j_head; wake_up(&journal->j_wait_transaction_locked); spin_unlock(&journal->j_state_lock); @@ -995,6 +998,17 @@ restart_loop: J_ASSERT(commit_transaction == journal->j_committing_transaction); journal->j_commit_sequence = commit_transaction->t_tid; journal->j_committing_transaction = NULL; + commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); + + /* + * weight the commit time higher than the average time so we don't + * react too strongly to vast changes in the commit time + */ + if (likely(journal->j_average_commit_time)) + journal->j_average_commit_time = (commit_time + + journal->j_average_commit_time*3) / 4; + else + journal->j_average_commit_time = commit_time; spin_unlock(&journal->j_state_lock); if (journal->j_commit_callback) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 39b7805a599..13dcbc990f4 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -25,6 +25,7 @@ #include #include #include +#include static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); @@ -48,6 +49,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) { transaction->t_journal = journal; transaction->t_state = T_RUNNING; + transaction->t_start_time = ktime_get(); transaction->t_tid = journal->j_transaction_sequence++; transaction->t_expires = jiffies + journal->j_commit_interval; spin_lock_init(&transaction->t_handle_lock); @@ -1193,7 +1195,7 @@ int jbd2_journal_stop(handle_t *handle) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; - int old_handle_count, err; + int err; pid_t pid; J_ASSERT(journal_current_handle() == handle); @@ -1216,24 +1218,52 @@ int jbd2_journal_stop(handle_t *handle) /* * Implement synchronous transaction batching. If the handle * was synchronous, don't force a commit immediately. Let's - * yield and let another thread piggyback onto this transaction. - * Keep doing that while new threads continue to arrive. - * It doesn't cost much - we're about to run a commit and sleep - * on IO anyway. Speeds up many-threaded, many-dir operations - * by 30x or more... + * yield and let another thread piggyback onto this + * transaction. Keep doing that while new threads continue to + * arrive. It doesn't cost much - we're about to run a commit + * and sleep on IO anyway. Speeds up many-threaded, many-dir + * operations by 30x or more... + * + * We try and optimize the sleep time against what the + * underlying disk can do, instead of having a static sleep + * time. This is useful for the case where our storage is so + * fast that it is more optimal to go ahead and force a flush + * and wait for the transaction to be committed than it is to + * wait for an arbitrary amount of time for new writers to + * join the transaction. We achieve this by measuring how + * long it takes to commit a transaction, and compare it with + * how long this transaction has been running, and if run time + * < commit time then we sleep for the delta and commit. This + * greatly helps super fast disks that would see slowdowns as + * more threads started doing fsyncs. * - * But don't do this if this process was the most recent one to - * perform a synchronous write. We do this to detect the case where a - * single process is doing a stream of sync writes. No point in waiting - * for joiners in that case. + * But don't do this if this process was the most recent one + * to perform a synchronous write. We do this to detect the + * case where a single process is doing a stream of sync + * writes. No point in waiting for joiners in that case. */ pid = current->pid; if (handle->h_sync && journal->j_last_sync_writer != pid) { + u64 commit_time, trans_time; + journal->j_last_sync_writer = pid; - do { - old_handle_count = transaction->t_handle_count; - schedule_timeout_uninterruptible(1); - } while (old_handle_count != transaction->t_handle_count); + + spin_lock(&journal->j_state_lock); + commit_time = journal->j_average_commit_time; + spin_unlock(&journal->j_state_lock); + + trans_time = ktime_to_ns(ktime_sub(ktime_get(), + transaction->t_start_time)); + + commit_time = min_t(u64, commit_time, + 1000*jiffies_to_usecs(1)); + + if (trans_time < commit_time) { + ktime_t expires = ktime_add_ns(ktime_get(), + commit_time); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); + } } current->journal_info = NULL; -- cgit v1.2.3 From d7cfa4684d82f58e5d7cb73b8a3c88c169937f25 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 17 Dec 2008 00:20:45 -0500 Subject: ext4: display average commit time Display the average commit time (which is used by the ext4 fsync batching patch) in /proc/fs/jbd2/*/info for performance tuning purposes. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/jbd2') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e70d657a19f..74d87290381 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -40,6 +40,7 @@ #include #include +#include EXPORT_SYMBOL(jbd2_journal_start); EXPORT_SYMBOL(jbd2_journal_restart); @@ -824,6 +825,8 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v) jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid)); seq_printf(seq, " %ums logging transaction\n", jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid)); + seq_printf(seq, " %luus average transaction commit time\n", + do_div(s->journal->j_average_commit_time, 1000)); seq_printf(seq, " %lu handles per transaction\n", s->stats->u.run.rs_handle_count / s->stats->ts_tid); seq_printf(seq, " %lu blocks per transaction\n", -- cgit v1.2.3 From 30773840c19cea60dcef39545960d541b1ac1cf8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Jan 2009 20:27:38 -0500 Subject: ext4: add fsync batch tuning knobs Add new mount options, min_batch_time and max_batch_time, which controls how long the jbd2 layer should wait for additional filesystem operations to get batched with a synchronous write transaction. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 2 ++ fs/jbd2/transaction.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/jbd2') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 74d87290381..fd1d7557a09 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -964,6 +964,8 @@ static journal_t * journal_init_common (void) spin_lock_init(&journal->j_state_lock); journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); + journal->j_min_batch_time = 0; + journal->j_max_batch_time = 15000; /* 15ms */ /* The journal is marked for error until we succeed with recovery! */ journal->j_flags = JBD2_ABORT; diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 13dcbc990f4..48c21bac5a5 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1255,8 +1255,10 @@ int jbd2_journal_stop(handle_t *handle) trans_time = ktime_to_ns(ktime_sub(ktime_get(), transaction->t_start_time)); + commit_time = max_t(u64, commit_time, + 1000*journal->j_min_batch_time); commit_time = min_t(u64, commit_time, - 1000*jiffies_to_usecs(1)); + 1000*journal->j_max_batch_time); if (trans_time < commit_time) { ktime_t expires = ktime_add_ns(ktime_get(), -- cgit v1.2.3 From 1a0d3786dd57dbd74f340322054c3d618b999dcf Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 5 Nov 2008 00:09:22 -0500 Subject: jbd2: Remove a large array of bh's from the stack of the checkpoint routine jbd2_log_do_checkpoint()n is one of the kernel's largest stack users. Move the array of buffer head's from the stack of jbd2_log_do_checkpoint() to the in-core journal structure. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 22 +++++++++------------- fs/jbd2/journal.c | 2 ++ 2 files changed, 11 insertions(+), 13 deletions(-) (limited to 'fs/jbd2') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 9497718fe92..adc08ec875e 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -249,16 +249,14 @@ restart: return ret; } -#define NR_BATCH 64 - static void -__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) +__flush_batch(journal_t *journal, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, bhs); + ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); for (i = 0; i < *batch_count; i++) { - struct buffer_head *bh = bhs[i]; + struct buffer_head *bh = journal->j_chkpt_bhs[i]; clear_buffer_jwrite(bh); BUFFER_TRACE(bh, "brelse"); __brelse(bh); @@ -277,8 +275,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ static int __process_buffer(journal_t *journal, struct journal_head *jh, - struct buffer_head **bhs, int *batch_count, - transaction_t *transaction) + int *batch_count, transaction_t *transaction) { struct buffer_head *bh = jh2bh(jh); int ret = 0; @@ -325,14 +322,14 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, get_bh(bh); J_ASSERT_BH(bh, !buffer_jwrite(bh)); set_buffer_jwrite(bh); - bhs[*batch_count] = bh; + journal->j_chkpt_bhs[*batch_count] = bh; __buffer_relink_io(jh); jbd_unlock_bh_state(bh); transaction->t_chp_stats.cs_written++; (*batch_count)++; - if (*batch_count == NR_BATCH) { + if (*batch_count == JBD2_NR_BATCH) { spin_unlock(&journal->j_list_lock); - __flush_batch(journal, bhs, batch_count); + __flush_batch(journal, batch_count); ret = 1; } } @@ -388,7 +385,6 @@ restart: if (journal->j_checkpoint_transactions == transaction && transaction->t_tid == this_tid) { int batch_count = 0; - struct buffer_head *bhs[NR_BATCH]; struct journal_head *jh; int retry = 0, err; @@ -402,7 +398,7 @@ restart: retry = 1; break; } - retry = __process_buffer(journal, jh, bhs, &batch_count, + retry = __process_buffer(journal, jh, &batch_count, transaction); if (retry < 0 && !result) result = retry; @@ -419,7 +415,7 @@ restart: spin_unlock(&journal->j_list_lock); retry = 1; } - __flush_batch(journal, bhs, &batch_count); + __flush_batch(journal, &batch_count); } if (retry) { diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index fd1d7557a09..34ef9805720 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1477,7 +1477,9 @@ int jbd2_journal_destroy(journal_t *journal) spin_lock(&journal->j_list_lock); while (journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); + mutex_lock(&journal->j_checkpoint_mutex); jbd2_log_do_checkpoint(journal); + mutex_unlock(&journal->j_checkpoint_mutex); spin_lock(&journal->j_list_lock); } -- cgit v1.2.3 From fb68407b0d9efba962c03f55009c797e22f024bc Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 6 Nov 2008 17:50:21 -0500 Subject: jbd2: Call journal commit callback without holding j_list_lock Avoid freeing the transaction in __jbd2_journal_drop_transaction() so the journal commit callback can run without holding j_list_lock, to avoid lock contention on this spinlock. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/jbd2/checkpoint.c | 2 +- fs/jbd2/commit.c | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'fs/jbd2') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index adc08ec875e..17159cacbd9 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -682,6 +682,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) safely remove this transaction from the log */ __jbd2_journal_drop_transaction(journal, transaction); + kfree(transaction); /* Just in case anybody was waiting for more transactions to be checkpointed... */ @@ -756,5 +757,4 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact J_ASSERT(journal->j_running_transaction != transaction); jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); - kfree(transaction); } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f22d1828ea8..0ad84162c42 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -363,7 +363,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) int space_left = 0; int first_tag = 0; int tag_flag; - int i; + int i, to_free = 0; int tag_bytes = journal_tag_bytes(journal); struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; @@ -1011,12 +1011,10 @@ restart_loop: journal->j_average_commit_time = commit_time; spin_unlock(&journal->j_state_lock); - if (journal->j_commit_callback) - journal->j_commit_callback(journal, commit_transaction); - if (commit_transaction->t_checkpoint_list == NULL && commit_transaction->t_checkpoint_io_list == NULL) { __jbd2_journal_drop_transaction(journal, commit_transaction); + to_free = 1; } else { if (journal->j_checkpoint_transactions == NULL) { journal->j_checkpoint_transactions = commit_transaction; @@ -1035,11 +1033,16 @@ restart_loop: } spin_unlock(&journal->j_list_lock); + if (journal->j_commit_callback) + journal->j_commit_callback(journal, commit_transaction); + trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", - journal->j_devname, journal->j_commit_sequence, + journal->j_devname, commit_transaction->t_tid, journal->j_tail_sequence); jbd_debug(1, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); + if (to_free) + kfree(commit_transaction); wake_up(&journal->j_wait_done_commit); } -- cgit v1.2.3 From c31910672376dfb8d020e32afa7249763bcd924a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 6 Jan 2009 11:14:25 -0500 Subject: ext4: Remove code to create the journal inode This code has been obsolete in quite some time, since the supported method for adding a journal inode is to use tune2fs (or to creating new filesystem with a journal via mke2fs or mkfs.ext4). Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 72 ------------------------------------------------------- 1 file changed, 72 deletions(-) (limited to 'fs/jbd2') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 34ef9805720..b10d7283ba5 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -66,7 +66,6 @@ EXPORT_SYMBOL(jbd2_journal_update_format); EXPORT_SYMBOL(jbd2_journal_check_used_features); EXPORT_SYMBOL(jbd2_journal_check_available_features); EXPORT_SYMBOL(jbd2_journal_set_features); -EXPORT_SYMBOL(jbd2_journal_create); EXPORT_SYMBOL(jbd2_journal_load); EXPORT_SYMBOL(jbd2_journal_destroy); EXPORT_SYMBOL(jbd2_journal_abort); @@ -1162,77 +1161,6 @@ static int journal_reset(journal_t *journal) return jbd2_journal_start_thread(journal); } -/** - * int jbd2_journal_create() - Initialise the new journal file - * @journal: Journal to create. This structure must have been initialised - * - * Given a journal_t structure which tells us which disk blocks we can - * use, create a new journal superblock and initialise all of the - * journal fields from scratch. - **/ -int jbd2_journal_create(journal_t *journal) -{ - unsigned long long blocknr; - struct buffer_head *bh; - journal_superblock_t *sb; - int i, err; - - if (journal->j_maxlen < JBD2_MIN_JOURNAL_BLOCKS) { - printk (KERN_ERR "Journal length (%d blocks) too short.\n", - journal->j_maxlen); - journal_fail_superblock(journal); - return -EINVAL; - } - - if (journal->j_inode == NULL) { - /* - * We don't know what block to start at! - */ - printk(KERN_EMERG - "%s: creation of journal on external device!\n", - __func__); - BUG(); - } - - /* Zero out the entire journal on disk. We cannot afford to - have any blocks on disk beginning with JBD2_MAGIC_NUMBER. */ - jbd_debug(1, "JBD: Zeroing out journal blocks...\n"); - for (i = 0; i < journal->j_maxlen; i++) { - err = jbd2_journal_bmap(journal, i, &blocknr); - if (err) - return err; - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - lock_buffer(bh); - memset (bh->b_data, 0, journal->j_blocksize); - BUFFER_TRACE(bh, "marking dirty"); - mark_buffer_dirty(bh); - BUFFER_TRACE(bh, "marking uptodate"); - set_buffer_uptodate(bh); - unlock_buffer(bh); - __brelse(bh); - } - - sync_blockdev(journal->j_dev); - jbd_debug(1, "JBD: journal cleared.\n"); - - /* OK, fill in the initial static fields in the new superblock */ - sb = journal->j_superblock; - - sb->s_header.h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); - sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); - - sb->s_blocksize = cpu_to_be32(journal->j_blocksize); - sb->s_maxlen = cpu_to_be32(journal->j_maxlen); - sb->s_first = cpu_to_be32(1); - - journal->j_transaction_sequence = 1; - - journal->j_flags &= ~JBD2_ABORT; - journal->j_format_version = 2; - - return journal_reset(journal); -} - /** * void jbd2_journal_update_superblock() - Update journal sb on disk. * @journal: The journal to update. -- cgit v1.2.3 From 4a9bf99b205448ec1f0cbdee1776a29f9c503ce4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Jan 2009 22:56:44 -0500 Subject: jbd2: Add pid and journal device name to the "kjournald2 starting" message Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/jbd2') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b10d7283ba5..fe20e40ee7c 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -131,8 +131,9 @@ static int kjournald2(void *arg) journal->j_task = current; wake_up(&journal->j_wait_done_commit); - printk(KERN_INFO "kjournald2 starting. Commit interval %ld seconds\n", - journal->j_commit_interval / HZ); + printk(KERN_INFO "kjournald2 starting: pid %d, dev %s, " + "commit interval %ld seconds\n", current->pid, + journal->j_devname, journal->j_commit_interval / HZ); /* * And now, wait forever for commit wakeup events. -- cgit v1.2.3 From 40a1984d22294ab202f616e432bb8d3481897675 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 4 Jan 2009 19:55:57 -0500 Subject: jbd2: Submit writes to the journal using WRITE_SYNC Since we will be waiting the write of the commit record to the journal to complete in journal_submit_commit_record(), submit it using WRITE_SYNC. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/jbd2') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0ad84162c42..073124a29b8 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -138,7 +138,7 @@ static int journal_submit_commit_record(journal_t *journal, set_buffer_ordered(bh); barrier_done = 1; } - ret = submit_bh(WRITE, bh); + ret = submit_bh(WRITE_SYNC, bh); if (barrier_done) clear_buffer_ordered(bh); @@ -159,7 +159,7 @@ static int journal_submit_commit_record(journal_t *journal, lock_buffer(bh); set_buffer_uptodate(bh); clear_buffer_dirty(bh); - ret = submit_bh(WRITE, bh); + ret = submit_bh(WRITE_SYNC, bh); } *cbh = bh; return ret; -- cgit v1.2.3 From 4b905671d2ea09fd48fed72c581df17e40823f39 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 Jan 2009 14:53:35 -0500 Subject: jbd2: Fix oops in jbd2_journal_init_inode() on corrupted fs On 32-bit system with CONFIG_LBD getblk can fail because provided block number is too big. Add error checks so we fail gracefully if getblk() returns NULL (which can also happen on memory allocation failures). Thanks to David Maciejak from Fortinet's FortiGuard Global Security Research Team for reporting this bug. http://bugzilla.kernel.org/show_bug.cgi?id=12370 Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" cc: stable@kernel.org --- fs/jbd2/journal.c | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) (limited to 'fs/jbd2') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index fe20e40ee7c..2932c8f5519 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -632,6 +632,8 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) return NULL; bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + if (!bh) + return NULL; lock_buffer(bh); memset(bh->b_data, 0, journal->j_blocksize); set_buffer_uptodate(bh); @@ -1021,15 +1023,14 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, /* journal descriptor can store up to n blocks -bzzz */ journal->j_blocksize = blocksize; + jbd2_stats_proc_init(journal); n = journal->j_blocksize / sizeof(journal_block_tag_t); journal->j_wbufsize = n; journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", __func__); - kfree(journal); - journal = NULL; - goto out; + goto out_err; } journal->j_dev = bdev; journal->j_fs_dev = fs_dev; @@ -1039,14 +1040,22 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, p = journal->j_devname; while ((p = strchr(p, '/'))) *p = '!'; - jbd2_stats_proc_init(journal); bh = __getblk(journal->j_dev, start, journal->j_blocksize); - J_ASSERT(bh != NULL); + if (!bh) { + printk(KERN_ERR + "%s: Cannot get buffer for journal superblock\n", + __func__); + goto out_err; + } journal->j_sb_buffer = bh; journal->j_superblock = (journal_superblock_t *)bh->b_data; -out: + return journal; +out_err: + jbd2_stats_proc_exit(journal); + kfree(journal); + return NULL; } /** @@ -1094,9 +1103,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) if (!journal->j_wbuf) { printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", __func__); - jbd2_stats_proc_exit(journal); - kfree(journal); - return NULL; + goto out_err; } err = jbd2_journal_bmap(journal, 0, &blocknr); @@ -1104,17 +1111,24 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) if (err) { printk(KERN_ERR "%s: Cannnot locate journal superblock\n", __func__); - jbd2_stats_proc_exit(journal); - kfree(journal); - return NULL; + goto out_err; } bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - J_ASSERT(bh != NULL); + if (!bh) { + printk(KERN_ERR + "%s: Cannot get buffer for journal superblock\n", + __func__); + goto out_err; + } journal->j_sb_buffer = bh; journal->j_superblock = (journal_superblock_t *)bh->b_data; return journal; +out_err: + jbd2_stats_proc_exit(journal); + kfree(journal); + return NULL; } /* -- cgit v1.2.3