aboutsummaryrefslogtreecommitdiff
path: root/fs/jbd2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/checkpoint.c22
-rw-r--r--fs/jbd2/commit.c255
-rw-r--r--fs/jbd2/journal.c368
-rw-r--r--fs/jbd2/recovery.c151
-rw-r--r--fs/jbd2/revoke.c6
-rw-r--r--fs/jbd2/transaction.c34
6 files changed, 756 insertions, 80 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 3fccde7ba00..1b7f282c1ae 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -232,7 +232,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/
static int __process_buffer(journal_t *journal, struct journal_head *jh,
- struct buffer_head **bhs, int *batch_count)
+ struct buffer_head **bhs, int *batch_count,
+ transaction_t *transaction)
{
struct buffer_head *bh = jh2bh(jh);
int ret = 0;
@@ -250,6 +251,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
transaction_t *t = jh->b_transaction;
tid_t tid = t->t_tid;
+ transaction->t_chp_stats.cs_forced_to_close++;
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
jbd2_log_start_commit(journal, tid);
@@ -279,6 +281,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
bhs[*batch_count] = bh;
__buffer_relink_io(jh);
jbd_unlock_bh_state(bh);
+ transaction->t_chp_stats.cs_written++;
(*batch_count)++;
if (*batch_count == NR_BATCH) {
spin_unlock(&journal->j_list_lock);
@@ -322,6 +325,8 @@ int jbd2_log_do_checkpoint(journal_t *journal)
if (!journal->j_checkpoint_transactions)
goto out;
transaction = journal->j_checkpoint_transactions;
+ if (transaction->t_chp_stats.cs_chp_time == 0)
+ transaction->t_chp_stats.cs_chp_time = jiffies;
this_tid = transaction->t_tid;
restart:
/*
@@ -346,7 +351,8 @@ restart:
retry = 1;
break;
}
- retry = __process_buffer(journal, jh, bhs,&batch_count);
+ retry = __process_buffer(journal, jh, bhs, &batch_count,
+ transaction);
if (!retry && lock_need_resched(&journal->j_list_lock)){
spin_unlock(&journal->j_list_lock);
retry = 1;
@@ -602,15 +608,15 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
/*
* There is one special case to worry about: if we have just pulled the
- * buffer off a committing transaction's forget list, then even if the
- * checkpoint list is empty, the transaction obviously cannot be
- * dropped!
+ * buffer off a running or committing transaction's checkpoing list,
+ * then even if the checkpoint list is empty, the transaction obviously
+ * cannot be dropped!
*
- * The locking here around j_committing_transaction is a bit sleazy.
+ * The locking here around t_state is a bit sleazy.
* See the comment at the end of jbd2_journal_commit_transaction().
*/
- if (transaction == journal->j_committing_transaction) {
- JBUFFER_TRACE(jh, "belongs to committing transaction");
+ if (transaction->t_state != T_FINISHED) {
+ JBUFFER_TRACE(jh, "belongs to running/committing transaction");
goto out;
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6986f334c64..da8d0eb3b7b 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -20,6 +20,8 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
+#include <linux/jiffies.h>
+#include <linux/crc32.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
@@ -92,19 +94,23 @@ static int inverted_lock(journal_t *journal, struct buffer_head *bh)
return 1;
}
-/* Done it all: now write the commit record. We should have
+/*
+ * Done it all: now submit the commit record. We should have
* cleaned up our previous buffers by now, so if we are in abort
* mode we can now just skip the rest of the journal write
* entirely.
*
* Returns 1 if the journal needs to be aborted or 0 on success
*/
-static int journal_write_commit_record(journal_t *journal,
- transaction_t *commit_transaction)
+static int journal_submit_commit_record(journal_t *journal,
+ transaction_t *commit_transaction,
+ struct buffer_head **cbh,
+ __u32 crc32_sum)
{
struct journal_head *descriptor;
+ struct commit_header *tmp;
struct buffer_head *bh;
- int i, ret;
+ int ret;
int barrier_done = 0;
if (is_journal_aborted(journal))
@@ -116,21 +122,33 @@ static int journal_write_commit_record(journal_t *journal,
bh = jh2bh(descriptor);
- /* AKPM: buglet - add `i' to tmp! */
- for (i = 0; i < bh->b_size; i += 512) {
- journal_header_t *tmp = (journal_header_t*)bh->b_data;
- tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
- tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
- tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
+ tmp = (struct commit_header *)bh->b_data;
+ tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
+ tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
+ tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
+
+ if (JBD2_HAS_COMPAT_FEATURE(journal,
+ JBD2_FEATURE_COMPAT_CHECKSUM)) {
+ tmp->h_chksum_type = JBD2_CRC32_CHKSUM;
+ tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
+ tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
}
- JBUFFER_TRACE(descriptor, "write commit block");
+ JBUFFER_TRACE(descriptor, "submit commit block");
+ lock_buffer(bh);
+
set_buffer_dirty(bh);
- if (journal->j_flags & JBD2_BARRIER) {
+ set_buffer_uptodate(bh);
+ bh->b_end_io = journal_end_buffer_io_sync;
+
+ if (journal->j_flags & JBD2_BARRIER &&
+ !JBD2_HAS_COMPAT_FEATURE(journal,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
set_buffer_ordered(bh);
barrier_done = 1;
}
- ret = sync_dirty_buffer(bh);
+ ret = submit_bh(WRITE, bh);
+
/* is it possible for another commit to fail at roughly
* the same time as this one? If so, we don't want to
* trust the barrier flag in the super, but instead want
@@ -151,14 +169,72 @@ static int journal_write_commit_record(journal_t *journal,
clear_buffer_ordered(bh);
set_buffer_uptodate(bh);
set_buffer_dirty(bh);
- ret = sync_dirty_buffer(bh);
+ ret = submit_bh(WRITE, bh);
}
- put_bh(bh); /* One for getblk() */
- jbd2_journal_put_journal_head(descriptor);
+ *cbh = bh;
+ return ret;
+}
+
+/*
+ * This function along with journal_submit_commit_record
+ * allows to write the commit record asynchronously.
+ */
+static int journal_wait_on_commit_record(struct buffer_head *bh)
+{
+ int ret = 0;
+
+ clear_buffer_dirty(bh);
+ wait_on_buffer(bh);
+
+ if (unlikely(!buffer_uptodate(bh)))
+ ret = -EIO;
+ put_bh(bh); /* One for getblk() */
+ jbd2_journal_put_journal_head(bh2jh(bh));
- return (ret == -EIO);
+ return ret;
}
+/*
+ * Wait for all submitted IO to complete.
+ */
+static int journal_wait_on_locked_list(journal_t *journal,
+ transaction_t *commit_transaction)
+{
+ int ret = 0;
+ struct journal_head *jh;
+
+ while (commit_transaction->t_locked_list) {
+ struct buffer_head *bh;
+
+ jh = commit_transaction->t_locked_list->b_tprev;
+ bh = jh2bh(jh);
+ get_bh(bh);
+ if (buffer_locked(bh)) {
+ spin_unlock(&journal->j_list_lock);
+ wait_on_buffer(bh);
+ if (unlikely(!buffer_uptodate(bh)))
+ ret = -EIO;
+ spin_lock(&journal->j_list_lock);
+ }
+ if (!inverted_lock(journal, bh)) {
+ put_bh(bh);
+ spin_lock(&journal->j_list_lock);
+ continue;
+ }
+ if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
+ __jbd2_journal_unfile_buffer(jh);
+ jbd_unlock_bh_state(bh);
+ jbd2_journal_remove_journal_head(bh);
+ put_bh(bh);
+ } else {
+ jbd_unlock_bh_state(bh);
+ }
+ put_bh(bh);
+ cond_resched_lock(&journal->j_list_lock);
+ }
+ return ret;
+ }
+
static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
{
int i;
@@ -274,7 +350,21 @@ write_out_data:
journal_do_submit_data(wbuf, bufs);
}
-static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
+static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
+{
+ struct page *page = bh->b_page;
+ char *addr;
+ __u32 checksum;
+
+ addr = kmap_atomic(page, KM_USER0);
+ checksum = crc32_be(crc32_sum,
+ (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
+ kunmap_atomic(addr, KM_USER0);
+
+ return checksum;
+}
+
+static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
unsigned long long block)
{
tag->t_blocknr = cpu_to_be32(block & (u32)~0);
@@ -290,6 +380,7 @@ static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
*/
void jbd2_journal_commit_transaction(journal_t *journal)
{
+ struct transaction_stats_s stats;
transaction_t *commit_transaction;
struct journal_head *jh, *new_jh, *descriptor;
struct buffer_head **wbuf = journal->j_wbuf;
@@ -305,6 +396,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
int tag_flag;
int i;
int tag_bytes = journal_tag_bytes(journal);
+ struct buffer_head *cbh = NULL; /* For transactional checksums */
+ __u32 crc32_sum = ~0;
/*
* First job: lock down the current transaction and wait for
@@ -337,6 +430,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
spin_lock(&journal->j_state_lock);
commit_transaction->t_state = T_LOCKED;
+ stats.u.run.rs_wait = commit_transaction->t_max_wait;
+ stats.u.run.rs_locked = jiffies;
+ stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
+ stats.u.run.rs_locked);
+
spin_lock(&commit_transaction->t_handle_lock);
while (commit_transaction->t_updates) {
DEFINE_WAIT(wait);
@@ -407,6 +505,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
jbd2_journal_switch_revoke_table(journal);
+ stats.u.run.rs_flushing = jiffies;
+ stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
+ stats.u.run.rs_flushing);
+
commit_transaction->t_state = T_FLUSH;
journal->j_committing_transaction = commit_transaction;
journal->j_running_transaction = NULL;
@@ -440,38 +542,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
journal_submit_data_buffers(journal, commit_transaction);
/*
- * Wait for all previously submitted IO to complete.
+ * Wait for all previously submitted IO to complete if commit
+ * record is to be written synchronously.
*/
spin_lock(&journal->j_list_lock);
- while (commit_transaction->t_locked_list) {
- struct buffer_head *bh;
+ if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
+ err = journal_wait_on_locked_list(journal,
+ commit_transaction);
- jh = commit_transaction->t_locked_list->b_tprev;
- bh = jh2bh(jh);
- get_bh(bh);
- if (buffer_locked(bh)) {
- spin_unlock(&journal->j_list_lock);
- wait_on_buffer(bh);
- if (unlikely(!buffer_uptodate(bh)))
- err = -EIO;
- spin_lock(&journal->j_list_lock);
- }
- if (!inverted_lock(journal, bh)) {
- put_bh(bh);
- spin_lock(&journal->j_list_lock);
- continue;
- }
- if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
- __jbd2_journal_unfile_buffer(jh);
- jbd_unlock_bh_state(bh);
- jbd2_journal_remove_journal_head(bh);
- put_bh(bh);
- } else {
- jbd_unlock_bh_state(bh);
- }
- put_bh(bh);
- cond_resched_lock(&journal->j_list_lock);
- }
spin_unlock(&journal->j_list_lock);
if (err)
@@ -498,6 +577,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
commit_transaction->t_state = T_COMMIT;
+ stats.u.run.rs_logging = jiffies;
+ stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
+ stats.u.run.rs_logging);
+ stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
+ stats.u.run.rs_blocks_logged = 0;
+
descriptor = NULL;
bufs = 0;
while (commit_transaction->t_buffers) {
@@ -639,6 +724,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
start_journal_io:
for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i];
+ /*
+ * Compute checksum.
+ */
+ if (JBD2_HAS_COMPAT_FEATURE(journal,
+ JBD2_FEATURE_COMPAT_CHECKSUM)) {
+ crc32_sum =
+ jbd2_checksum_data(crc32_sum, bh);
+ }
+
lock_buffer(bh);
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
@@ -646,6 +740,7 @@ start_journal_io:
submit_bh(WRITE, bh);
}
cond_resched();
+ stats.u.run.rs_blocks_logged += bufs;
/* Force a new descriptor to be generated next
time round the loop. */
@@ -654,6 +749,23 @@ start_journal_io:
}
}
+ /* Done it all: now write the commit record asynchronously. */
+
+ if (JBD2_HAS_INCOMPAT_FEATURE(journal,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
+ err = journal_submit_commit_record(journal, commit_transaction,
+ &cbh, crc32_sum);
+ if (err)
+ __jbd2_journal_abort_hard(journal);
+
+ spin_lock(&journal->j_list_lock);
+ err = journal_wait_on_locked_list(journal,
+ commit_transaction);
+ spin_unlock(&journal->j_list_lock);
+ if (err)
+ __jbd2_journal_abort_hard(journal);
+ }
+
/* Lo and behold: we have just managed to send a transaction to
the log. Before we can commit it, wait for the IO so far to
complete. Control buffers being written are on the
@@ -753,8 +865,14 @@ wait_for_iobuf:
jbd_debug(3, "JBD: commit phase 6\n");
- if (journal_write_commit_record(journal, commit_transaction))
- err = -EIO;
+ if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
+ err = journal_submit_commit_record(journal, commit_transaction,
+ &cbh, crc32_sum);
+ if (err)
+ __jbd2_journal_abort_hard(journal);
+ }
+ err = journal_wait_on_commit_record(cbh);
if (err)
jbd2_journal_abort(journal, err);
@@ -816,6 +934,7 @@ restart_loop:
cp_transaction = jh->b_cp_transaction;
if (cp_transaction) {
JBUFFER_TRACE(jh, "remove from old cp transaction");
+ cp_transaction->t_chp_stats.cs_dropped++;
__jbd2_journal_remove_checkpoint(jh);
}
@@ -867,10 +986,10 @@ restart_loop:
}
spin_unlock(&journal->j_list_lock);
/*
- * This is a bit sleazy. We borrow j_list_lock to protect
- * journal->j_committing_transaction in __jbd2_journal_remove_checkpoint.
- * Really, __jbd2_journal_remove_checkpoint should be using j_state_lock but
- * it's a bit hassle to hold that across __jbd2_journal_remove_checkpoint
+ * This is a bit sleazy. We use j_list_lock to protect transition
+ * of a transaction into T_FINISHED state and calling
+ * __jbd2_journal_drop_transaction(). Otherwise we could race with
+ * other checkpointing code processing the transaction...
*/
spin_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock);
@@ -890,6 +1009,36 @@ restart_loop:
J_ASSERT(commit_transaction->t_state == T_COMMIT);
+ commit_transaction->t_start = jiffies;
+ stats.u.run.rs_logging = jbd2_time_diff(stats.u.run.rs_logging,
+ commit_transaction->t_start);
+
+ /*
+ * File the transaction for history
+ */
+ stats.ts_type = JBD2_STATS_RUN;
+ stats.ts_tid = commit_transaction->t_tid;
+ stats.u.run.rs_handle_count = commit_transaction->t_handle_count;
+ spin_lock(&journal->j_history_lock);
+ memcpy(journal->j_history + journal->j_history_cur, &stats,
+ sizeof(stats));
+ if (++journal->j_history_cur == journal->j_history_max)
+ journal->j_history_cur = 0;
+
+ /*
+ * Calculate overall stats
+ */
+ journal->j_stats.ts_tid++;
+ journal->j_stats.u.run.rs_wait += stats.u.run.rs_wait;
+ journal->j_stats.u.run.rs_running += stats.u.run.rs_running;
+ journal->j_stats.u.run.rs_locked += stats.u.run.rs_locked;
+ journal->j_stats.u.run.rs_flushing += stats.u.run.rs_flushing;
+ journal->j_stats.u.run.rs_logging += stats.u.run.rs_logging;
+ journal->j_stats.u.run.rs_handle_count += stats.u.run.rs_handle_count;
+ journal->j_stats.u.run.rs_blocks += stats.u.run.rs_blocks;
+ journal->j_stats.u.run.rs_blocks_logged += stats.u.run.rs_blocks_logged;
+ spin_unlock(&journal->j_history_lock);
+
commit_transaction->t_state = T_FINISHED;
J_ASSERT(commit_transaction == journal->j_committing_transaction);
journal->j_commit_sequence = commit_transaction->t_tid;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 6ddc5531587..96ba846992e 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -36,6 +36,7 @@
#include <linux/poison.h>
#include <linux/proc_fs.h>
#include <linux/debugfs.h>
+#include <linux/seq_file.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -640,6 +641,312 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
return jbd2_journal_add_journal_head(bh);
}
+struct jbd2_stats_proc_session {
+ journal_t *journal;
+ struct transaction_stats_s *stats;
+ int start;
+ int max;
+};
+
+static void *jbd2_history_skip_empty(struct jbd2_stats_proc_session *s,
+ struct transaction_stats_s *ts,
+ int first)
+{
+ if (ts == s->stats + s->max)
+ ts = s->stats;
+ if (!first && ts == s->stats + s->start)
+ return NULL;
+ while (ts->ts_type == 0) {
+ ts++;
+ if (ts == s->stats + s->max)
+ ts = s->stats;
+ if (ts == s->stats + s->start)
+ return NULL;
+ }
+ return ts;
+
+}
+
+static void *jbd2_seq_history_start(struct seq_file *seq, loff_t *pos)
+{
+ struct jbd2_stats_proc_session *s = seq->private;
+ struct transaction_stats_s *ts;
+ int l = *pos;
+
+ if (l == 0)
+ return SEQ_START_TOKEN;
+ ts = jbd2_history_skip_empty(s, s->stats + s->start, 1);
+ if (!ts)
+ return NULL;
+ l--;
+ while (l) {
+ ts = jbd2_history_skip_empty(s, ++ts, 0);
+ if (!ts)
+ break;
+ l--;
+ }
+ return ts;
+}
+
+static void *jbd2_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct jbd2_stats_proc_session *s = seq->private;
+ struct transaction_stats_s *ts = v;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return jbd2_history_skip_empty(s, s->stats + s->start, 1);
+ else
+ return jbd2_history_skip_empty(s, ++ts, 0);
+}
+
+static int jbd2_seq_history_show(struct seq_file *seq, void *v)
+{
+ struct transaction_stats_s *ts = v;
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
+ "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
+ "wait", "run", "lock", "flush", "log", "hndls",
+ "block", "inlog", "ctime", "write", "drop",
+ "close");
+ return 0;
+ }
+ if (ts->ts_type == JBD2_STATS_RUN)
+ seq_printf(seq, "%-4s %-5lu %-5u %-5u %-5u %-5u %-5u "
+ "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
+ jiffies_to_msecs(ts->u.run.rs_wait),
+ jiffies_to_msecs(ts->u.run.rs_running),
+ jiffies_to_msecs(ts->u.run.rs_locked),
+ jiffies_to_msecs(ts->u.run.rs_flushing),
+ jiffies_to_msecs(ts->u.run.rs_logging),
+ ts->u.run.rs_handle_count,
+ ts->u.run.rs_blocks,
+ ts->u.run.rs_blocks_logged);
+ else if (ts->ts_type == JBD2_STATS_CHECKPOINT)
+ seq_printf(seq, "%-4s %-5lu %48s %-5u %-5lu %-5lu %-5lu\n",
+ "C", ts->ts_tid, " ",
+ jiffies_to_msecs(ts->u.chp.cs_chp_time),
+ ts->u.chp.cs_written, ts->u.chp.cs_dropped,
+ ts->u.chp.cs_forced_to_close);
+ else
+ J_ASSERT(0);
+ return 0;
+}
+
+static void jbd2_seq_history_stop(struct seq_file *seq, void *v)
+{
+}
+
+static struct seq_operations jbd2_seq_history_ops = {
+ .start = jbd2_seq_history_start,
+ .next = jbd2_seq_history_next,
+ .stop = jbd2_seq_history_stop,
+ .show = jbd2_seq_history_show,
+};
+
+static int jbd2_seq_history_open(struct inode *inode, struct file *file)
+{
+ journal_t *journal = PDE(inode)->data;
+ struct jbd2_stats_proc_session *s;
+ int rc, size;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (s == NULL)
+ return -ENOMEM;
+ size = sizeof(struct transaction_stats_s) * journal->j_history_max;
+ s->stats = kmalloc(size, GFP_KERNEL);
+ if (s->stats == NULL) {
+ kfree(s);
+ return -ENOMEM;
+ }
+ spin_lock(&journal->j_history_lock);
+ memcpy(s->stats, journal->j_history, size);
+ s->max = journal->j_history_max;
+ s->start = journal->j_history_cur % s->max;
+ spin_unlock(&journal->j_history_lock);
+
+ rc = seq_open(file, &jbd2_seq_history_ops);
+ if (rc == 0) {
+ struct seq_file *m = file->private_data;
+ m->private = s;
+ } else {
+ kfree(s->stats);
+ kfree(s);
+ }
+ return rc;
+
+}
+
+static int jbd2_seq_history_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct jbd2_stats_proc_session *s = seq->private;
+
+ kfree(s->stats);
+ kfree(s);
+ return seq_release(inode, file);
+}
+
+static struct file_operations jbd2_seq_history_fops = {
+ .owner = THIS_MODULE,
+ .open = jbd2_seq_history_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = jbd2_seq_history_release,
+};
+
+static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)
+{
+ return *pos ? NULL : SEQ_START_TOKEN;
+}
+
+static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ return NULL;
+}
+
+static int jbd2_seq_info_show(struct seq_file *seq, void *v)
+{
+ struct jbd2_stats_proc_session *s = seq->private;
+
+ if (v != SEQ_START_TOKEN)
+ return 0;
+ seq_printf(seq, "%lu transaction, each upto %u blocks\n",
+ s->stats->ts_tid,
+ s->journal->j_max_transaction_buffers);
+ if (s->stats->ts_tid == 0)
+ return 0;
+ seq_printf(seq, "average: \n %ums waiting for transaction\n",
+ jiffies_to_msecs(s->stats->u.run.rs_wait / s->stats->ts_tid));
+ seq_printf(seq, " %ums running transaction\n",
+ jiffies_to_msecs(s->stats->u.run.rs_running / s->stats->ts_tid));
+ seq_printf(seq, " %ums transaction was being locked\n",
+ jiffies_to_msecs(s->stats->u.run.rs_locked / s->stats->ts_tid));
+ seq_printf(seq, " %ums flushing data (in ordered mode)\n",
+ jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid));
+ seq_printf(seq, " %ums logging transaction\n",
+ jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid));
+ seq_printf(seq, " %lu handles per transaction\n",
+ s->stats->u.run.rs_handle_count / s->stats->ts_tid);
+ seq_printf(seq, " %lu blocks per transaction\n",
+ s->stats->u.run.rs_blocks / s->stats->ts_tid);
+ seq_printf(seq, " %lu logged blocks per transaction\n",
+ s->stats->u.run.rs_blocks_logged / s->stats->ts_tid);
+ return 0;
+}
+
+static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
+{
+}
+
+static struct seq_operations jbd2_seq_info_ops = {
+ .start = jbd2_seq_info_start,
+ .next = jbd2_seq_info_next,
+ .stop = jbd2_seq_info_stop,
+ .show = jbd2_seq_info_show,
+};
+
+static int jbd2_seq_info_open(struct inode *inode, struct file *file)
+{
+ journal_t *journal = PDE(inode)->data;
+ struct jbd2_stats_proc_session *s;
+ int rc, size;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (s == NULL)
+ return -ENOMEM;
+ size = sizeof(struct transaction_stats_s);
+ s->stats = kmalloc(size, GFP_KERNEL);
+ if (s->stats == NULL) {
+ kfree(s);
+ return -ENOMEM;
+ }
+ spin_lock(&journal->j_history_lock);
+ memcpy(s->stats, &journal->j_stats, size);
+ s->journal = journal;
+ spin_unlock(&journal->j_history_lock);
+
+ rc = seq_open(file, &jbd2_seq_info_ops);
+ if (rc == 0) {
+ struct seq_file *m = file->private_data;
+ m->private = s;
+ } else {
+ kfree(s->stats);
+ kfree(s);
+ }
+ return rc;
+
+}
+
+static int jbd2_seq_info_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct jbd2_stats_proc_session *s = seq->private;
+ kfree(s->stats);
+ kfree(s);
+ return seq_release(inode, file);
+}
+
+static struct file_operations jbd2_seq_info_fops = {
+ .owner = THIS_MODULE,
+ .open = jbd2_seq_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = jbd2_seq_info_release,
+};
+
+static struct proc_dir_entry *proc_jbd2_stats;
+
+static void jbd2_stats_proc_init(journal_t *journal)
+{
+ char name[BDEVNAME_SIZE];
+
+ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
+ journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats);
+ if (journal->j_proc_entry) {
+ struct proc_dir_entry *p;
+ p = create_proc_entry("history", S_IRUGO,
+ journal->j_proc_entry);
+ if (p) {
+ p->proc_fops = &jbd2_seq_history_fops;
+ p->data = journal;
+ p = create_proc_entry("info", S_IRUGO,
+ journal->j_proc_entry);
+ if (p) {
+ p->proc_fops = &jbd2_seq_info_fops;
+ p->data = journal;
+ }
+ }
+ }
+}
+
+static void jbd2_stats_proc_exit(journal_t *journal)
+{
+ char name[BDEVNAME_SIZE];
+
+ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
+ remove_proc_entry("info", journal->j_proc_entry);
+ remove_proc_entry("history", journal->j_proc_entry);
+ remove_proc_entry(name, proc_jbd2_stats);
+}
+
+static void journal_init_stats(journal_t *journal)
+{
+ int size;
+
+ if (!proc_jbd2_stats)
+ return;
+
+ journal->j_history_max = 100;
+ size = sizeof(struct transaction_stats_s) * journal->j_history_max;
+ journal->j_history = kzalloc(size, GFP_KERNEL);
+ if (!journal->j_history) {
+ journal->j_history_max = 0;
+ return;
+ }
+ spin_lock_init(&journal->j_history_lock);
+}
+
/*
* Management for journal control blocks: functions to create and
* destroy journal_t structures, and to initialise and read existing
@@ -681,6 +988,9 @@ static journal_t * journal_init_common (void)
kfree(journal);
goto fail;
}
+
+ journal_init_stats(journal);
+
return journal;
fail:
return NULL;
@@ -735,6 +1045,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
journal->j_fs_dev = fs_dev;
journal->j_blk_offset = start;
journal->j_maxlen = len;
+ jbd2_stats_proc_init(journal);
bh = __getblk(journal->j_dev, start, journal->j_blocksize);
J_ASSERT(bh != NULL);
@@ -773,6 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
journal->j_blocksize = inode->i_sb->s_blocksize;
+ jbd2_stats_proc_init(journal);
/* journal descriptor can store up to n blocks -bzzz */
n = journal->j_blocksize / sizeof(journal_block_tag_t);
@@ -1153,6 +1465,8 @@ void jbd2_journal_destroy(journal_t *journal)
brelse(journal->j_sb_buffer);
}
+ if (journal->j_proc_entry)
+ jbd2_stats_proc_exit(journal);
if (journal->j_inode)
iput(journal->j_inode);
if (journal->j_revoke)
@@ -1264,6 +1578,32 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
return 1;
}
+/*
+ * jbd2_journal_clear_features () - Clear a given journal feature in the
+ * superblock
+ * @journal: Journal to act on.
+ * @compat: bitmask of compatible features
+ * @ro: bitmask of features that force read-only mount
+ * @incompat: bitmask of incompatible features
+ *
+ * Clear a given journal feature as present on the
+ * superblock.
+ */
+void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
+ unsigned long ro, unsigned long incompat)
+{
+ journal_superblock_t *sb;
+
+ jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
+ compat, ro, incompat);
+
+ sb = journal->j_superblock;
+
+ sb->s_feature_compat &= ~cpu_to_be32(compat);
+ sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
+ sb->s_feature_incompat &= ~cpu_to_be32(incompat);
+}
+EXPORT_SYMBOL(jbd2_journal_clear_features);
/**
* int jbd2_journal_update_format () - Update on-disk journal structure.
@@ -1633,7 +1973,7 @@ static int journal_init_jbd2_journal_head_cache(void)
jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
sizeof(struct journal_head),
0, /* offset */
- 0, /* flags */
+ SLAB_TEMPORARY, /* flags */
NULL); /* ctor */
retval = 0;
if (jbd2_journal_head_cache == 0) {
@@ -1900,6 +2240,28 @@ static void __exit jbd2_remove_debugfs_entry(void)
#endif
+#ifdef CONFIG_PROC_FS
+
+#define JBD2_STATS_PROC_NAME "fs/jbd2"
+
+static void __init jbd2_create_jbd_stats_proc_entry(void)
+{
+ proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL);
+}
+
+static void __exit jbd2_remove_jbd_stats_proc_entry(void)
+{
+ if (proc_jbd2_stats)
+ remove_proc_entry(JBD2_STATS_PROC_NAME, NULL);
+}
+
+#else
+
+#define jbd2_create_jbd_stats_proc_entry() do {} while (0)
+#define jbd2_remove_jbd_stats_proc_entry() do {} while (0)
+
+#endif
+
struct kmem_cache *jbd2_handle_cache;
static int __init journal_init_handle_cache(void)
@@ -1907,7 +2269,7 @@ static int __init journal_init_handle_cache(void)
jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle",
sizeof(handle_t),
0, /* offset */
- 0, /* flags */
+ SLAB_TEMPORARY, /* flags */
NULL); /* ctor */
if (jbd2_handle_cache == NULL) {
printk(KERN_EMERG "JBD: failed to create handle cache\n");
@@ -1955,6 +2317,7 @@ static int __init journal_init(void)
if (ret != 0)
jbd2_journal_destroy_caches();
jbd2_create_debugfs_entry();
+ jbd2_create_jbd_stats_proc_entry();
return ret;
}
@@ -1966,6 +2329,7 @@ static void __exit journal_exit(void)
printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
#endif
jbd2_remove_debugfs_entry();
+ jbd2_remove_jbd_stats_proc_entry();
jbd2_journal_destroy_caches();
}
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index d0ce627539e..921680663fa 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -21,6 +21,7 @@
#include <linux/jbd2.h>
#include <linux/errno.h>
#include <linux/slab.h>
+#include <linux/crc32.h>
#endif
/*
@@ -316,6 +317,37 @@ static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag
return block;
}
+/*
+ * calc_chksums calculates the checksums for the blocks described in the
+ * descriptor block.
+ */
+static int calc_chksums(journal_t *journal, struct buffer_head *bh,
+ unsigned long *next_log_block, __u32 *crc32_sum)
+{
+ int i, num_blks, err;
+ unsigned long io_block;
+ struct buffer_head *obh;
+
+ num_blks = count_tags(journal, bh);
+ /* Calculate checksum of the descriptor block. */
+ *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
+
+ for (i = 0; i < num_blks; i++) {
+ io_block = (*next_log_block)++;
+ wrap(journal, *next_log_block);
+ err = jread(&obh, journal, io_block);
+ if (err) {
+ printk(KERN_ERR "JBD: IO error %d recovering block "
+ "%lu in log\n", err, io_block);
+ return 1;
+ } else {
+ *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
+ obh->b_size);
+ }
+ }
+ return 0;
+}
+
static int do_one_pass(journal_t *journal,
struct recovery_info *info, enum passtype pass)
{
@@ -328,6 +360,7 @@ static int do_one_pass(journal_t *journal,
unsigned int sequence;
int blocktype;
int tag_bytes = journal_tag_bytes(journal);
+ __u32 crc32_sum = ~0; /* Transactional Checksums */
/* Precompute the maximum metadata descriptors in a descriptor block */
int MAX_BLOCKS_PER_DESC;
@@ -419,12 +452,26 @@ static int do_one_pass(journal_t *journal,
switch(blocktype) {
case JBD2_DESCRIPTOR_BLOCK:
/* If it is a valid descriptor block, replay it
- * in pass REPLAY; otherwise, just skip over the
- * blocks it describes. */
+ * in pass REPLAY; if journal_checksums enabled, then
+ * calculate checksums in PASS_SCAN, otherwise,
+ * just skip over the blocks it describes. */
if (pass != PASS_REPLAY) {
+ if (pass == PASS_SCAN &&
+ JBD2_HAS_COMPAT_FEATURE(journal,
+ JBD2_FEATURE_COMPAT_CHECKSUM) &&
+ !info->end_transaction) {
+ if (calc_chksums(journal, bh,
+ &next_log_block,
+ &crc32_sum)) {
+ put_bh(bh);
+ break;
+ }
+ put_bh(bh);
+ continue;
+ }
next_log_block += count_tags(journal, bh);
wrap(journal, next_log_block);
- brelse(bh);
+ put_bh(bh);
continue;
}
@@ -516,9 +563,96 @@ static int do_one_pass(journal_t *journal,
continue;
case JBD2_COMMIT_BLOCK:
- /* Found an expected commit block: not much to
- * do other than move on to the next sequence
+ /* How to differentiate between interrupted commit
+ * and journal corruption ?
+ *
+ * {nth transaction}
+ * Checksum Verification Failed
+ * |
+ * ____________________
+ * | |
+ * async_commit sync_commit
+ * | |
+ * | GO TO NEXT "Journal Corruption"
+ * | TRANSACTION
+ * |
+ * {(n+1)th transanction}
+ * |
+ * _______|______________
+ * | |
+ * Commit block found Commit block not found
+ * | |
+ * "Journal Corruption" |
+ * _____________|_________
+ * | |
+ * nth trans corrupt OR nth trans
+ * and (n+1)th interrupted interrupted
+ * before commit block
+ * could reach the disk.
+ * (Cannot find the difference in above
+ * mentioned conditions. Hence assume
+ * "Interrupted Commit".)
+ */
+
+ /* Found an expected commit block: if checksums
+ * are present verify them in PASS_SCAN; else not
+ * much to do other than move on to the next sequence
* number. */
+ if (pass == PASS_SCAN &&
+ JBD2_HAS_COMPAT_FEATURE(journal,
+ JBD2_FEATURE_COMPAT_CHECKSUM)) {
+ int chksum_err, chksum_seen;
+ struct commit_header *cbh =
+ (struct commit_header *)bh->b_data;
+ unsigned found_chksum =
+ be32_to_cpu(cbh->h_chksum[0]);
+
+ chksum_err = chksum_seen = 0;
+
+ if (info->end_transaction) {
+ printk(KERN_ERR "JBD: Transaction %u "
+ "found to be corrupt.\n",
+ next_commit_ID - 1);
+ brelse(bh);
+ break;
+ }
+
+ if (crc32_sum == found_chksum &&
+ cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
+ cbh->h_chksum_size ==
+ JBD2_CRC32_CHKSUM_SIZE)
+ chksum_seen = 1;
+ else if (!(cbh->h_chksum_type == 0 &&
+ cbh->h_chksum_size == 0 &&
+ found_chksum == 0 &&
+ !chksum_seen))
+ /*
+ * If fs is mounted using an old kernel and then
+ * kernel with journal_chksum is used then we
+ * get a situation where the journal flag has
+ * checksum flag set but checksums are not
+ * present i.e chksum = 0, in the individual
+ * commit blocks.
+ * Hence to avoid checksum failures, in this
+ * situation, this extra check is added.
+ */
+ chksum_err = 1;
+
+ if (chksum_err) {
+ info->end_transaction = next_commit_ID;
+
+ if (!JBD2_HAS_COMPAT_FEATURE(journal,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
+ printk(KERN_ERR
+ "JBD: Transaction %u "
+ "found to be corrupt.\n",
+ next_commit_ID);
+ brelse(bh);
+ break;
+ }
+ }
+ crc32_sum = ~0;
+ }
brelse(bh);
next_commit_ID++;
continue;
@@ -554,9 +688,10 @@ static int do_one_pass(journal_t *journal,
* transaction marks the end of the valid log.
*/
- if (pass == PASS_SCAN)
- info->end_transaction = next_commit_ID;
- else {
+ if (pass == PASS_SCAN) {
+ if (!info->end_transaction)
+ info->end_transaction = next_commit_ID;
+ } else {
/* It's really bad news if different passes end up at
* different places (but possible due to IO errors). */
if (info->end_transaction != next_commit_ID) {
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 3595fd432d5..df36f42e19e 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -171,13 +171,15 @@ int __init jbd2_journal_init_revoke_caches(void)
{
jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
sizeof(struct jbd2_revoke_record_s),
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ 0,
+ SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
+ NULL);
if (jbd2_revoke_record_cache == 0)
return -ENOMEM;
jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
sizeof(struct jbd2_revoke_table_s),
- 0, 0, NULL);
+ 0, SLAB_TEMPORARY, NULL);
if (jbd2_revoke_table_cache == 0) {
kmem_cache_destroy(jbd2_revoke_record_cache);
jbd2_revoke_record_cache = NULL;
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b1fcf2b3dca..b9b0b6f899b 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -54,11 +54,13 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
spin_lock_init(&transaction->t_handle_lock);
/* Set up the commit timer for the new transaction. */
- journal->j_commit_timer.expires = transaction->t_expires;
+ journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
add_timer(&journal->j_commit_timer);
J_ASSERT(journal->j_running_transaction == NULL);
journal->j_running_transaction = transaction;
+ transaction->t_max_wait = 0;
+ transaction->t_start = jiffies;
return transaction;
}
@@ -85,6 +87,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
int nblocks = handle->h_buffer_credits;
transaction_t *new_transaction = NULL;
int ret = 0;
+ unsigned long ts = jiffies;
if (nblocks > journal->j_max_transaction_buffers) {
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@@ -217,6 +220,12 @@ repeat_locked:
/* OK, account for the buffers that this operation expects to
* use and add the handle to the running transaction. */
+ if (time_after(transaction->t_start, ts)) {
+ ts = jbd2_time_diff(ts, transaction->t_start);
+ if (ts > transaction->t_max_wait)
+ transaction->t_max_wait = ts;
+ }
+
handle->h_transaction = transaction;
transaction->t_outstanding_credits += nblocks;
transaction->t_updates++;
@@ -232,6 +241,8 @@ out:
return ret;
}
+static struct lock_class_key jbd2_handle_key;
+
/* Allocate a new handle. This should probably be in a slab... */
static handle_t *new_handle(int nblocks)
{
@@ -242,6 +253,9 @@ static handle_t *new_handle(int nblocks)
handle->h_buffer_credits = nblocks;
handle->h_ref = 1;
+ lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle",
+ &jbd2_handle_key, 0);
+
return handle;
}
@@ -284,7 +298,11 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
jbd2_free_handle(handle);
current->journal_info = NULL;
handle = ERR_PTR(err);
+ goto out;
}
+
+ lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+out:
return handle;
}
@@ -1164,7 +1182,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
}
/* That test should have eliminated the following case: */
- J_ASSERT_JH(jh, jh->b_frozen_data == 0);
+ J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
JBUFFER_TRACE(jh, "file as BJ_Metadata");
spin_lock(&journal->j_list_lock);
@@ -1410,6 +1428,8 @@ int jbd2_journal_stop(handle_t *handle)
spin_unlock(&journal->j_state_lock);
}
+ lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
+
jbd2_free_handle(handle);
return err;
}
@@ -1512,7 +1532,7 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
if (jh->b_jlist != BJ_None)
- J_ASSERT_JH(jh, transaction != 0);
+ J_ASSERT_JH(jh, transaction != NULL);
switch (jh->b_jlist) {
case BJ_None:
@@ -1581,11 +1601,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
if (buffer_locked(bh) || buffer_dirty(bh))
goto out;
- if (jh->b_next_transaction != 0)
+ if (jh->b_next_transaction != NULL)
goto out;
spin_lock(&journal->j_list_lock);
- if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) {
+ if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) {
if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
/* A written-back ordered data buffer */
JBUFFER_TRACE(jh, "release data");
@@ -1593,7 +1613,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_remove_journal_head(bh);
__brelse(bh);
}
- } else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) {
+ } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
/* written-back checkpointed metadata buffer */
if (jh->b_jlist == BJ_None) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
@@ -1953,7 +1973,7 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
J_ASSERT_JH(jh, jh->b_transaction == transaction ||
- jh->b_transaction == 0);
+ jh->b_transaction == NULL);
if (jh->b_transaction && jh->b_jlist == jlist)
return;