From e2566d577a90a1f4219c4420ef3b3596dad953f8 Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Wed, 8 Apr 2015 23:02:30 +0800 Subject: [PATCH] LU-4090 fsfilt: don't wait (forever) for stale tid In the case where an inode has a very stale transaction id (tid) in i_datasync_tid or i_sync_tid, it's possible that after a very large (2**31) number of transactions, that the tid number space might wrap, causing tid_geq()'s calculations to fail. fsfilt_ext3_commit_async() might call jbd2_log_start_commit() with a stale tid, and subsequently call jbd2_log_wait_commit() with the same stale tid, and then wait for a very long time. Similar fix as kernel commit d76a3a77113db020d9bb1e894822869410450bd9 --------------------------------------------------------------------- Another kernel patch from Linux kernel commit commit 3469a32a1e948c54204b5dd6f7476a7d11349e9e (v3.14-rc2) jbd2: don't hold j_state_lock while calling wake_up() The j_state_lock is one of the hottest locks in the jbd2 layer and thus one of its scalability bottlenecks. We don't need to be holding the j_state_lock while we are calling wake_up(&journal->j_wait_commit), so release the lock a little bit earlier. Signed-off-by: Bobi Jam Change-Id: I63d9a0e579bfadf30c74c1d273dc8c7bc6156eaf --- .../jbd2-log_wait_for_space-2.6-rhel5.patch | 38 ++++++++++++++++++++++ lustre/lvfs/fsfilt_ext3.c | 38 ++++++++++++++++------ 2 files changed, 66 insertions(+), 10 deletions(-) diff --git a/lustre/kernel_patches/patches/jbd2-log_wait_for_space-2.6-rhel5.patch b/lustre/kernel_patches/patches/jbd2-log_wait_for_space-2.6-rhel5.patch index 16db364..627a495 100644 --- a/lustre/kernel_patches/patches/jbd2-log_wait_for_space-2.6-rhel5.patch +++ b/lustre/kernel_patches/patches/jbd2-log_wait_for_space-2.6-rhel5.patch @@ -47,6 +47,20 @@ the odds of it happening. Signed-off-by: Paul Gortmaker Signed-off-by: "Theodore Ts'o" +Another patch from linux vanilla kernel commit +3469a32a1e948c54204b5dd6f7476a7d11349e9e (v3.14-rc2) + +jbd2: don't hold j_state_lock while calling wake_up() + +The j_state_lock is one of the hottest locks in the jbd2 layer and +thus one of its scalability bottlenecks. + +We don't need to be holding the j_state_lock while we are calling +wake_up(&journal->j_wait_commit), so release the lock a little bit +earlier. + +Signed-off-by: "Theodore Ts'o" + Index: linux-2.6.18-348.3.1.el5-b18/fs/jbd2/checkpoint.c =================================================================== --- linux-2.6.18-348.3.1.el5-b18.orig/fs/jbd2/checkpoint.c @@ -67,3 +81,27 @@ Index: linux-2.6.18-348.3.1.el5-b18/fs/jbd2/checkpoint.c } else { printk(KERN_ERR "%s: needed %d blocks and " "only had %d space available\n", +Index: linux-2.6.18-348.3.1.el5-b18/fs/jbd2/journal.c +=================================================================== +--- linux-2.6.18-348.3.1.el5-b18.orig/fs/jbd2/journal.c ++++ linux-2.6.18-348.3.1.el5-b18/fs/jbd2/journal.c +@@ -237,8 +237,8 @@ static void journal_kill_thread(journal_ + journal->j_flags |= JBD2_UNMOUNT; + + while (journal->j_task) { +- wake_up(&journal->j_wait_commit); + spin_unlock(&journal->j_state_lock); ++ wake_up(&journal->j_wait_commit); + wait_event(journal->j_wait_done_commit, journal->j_task == NULL); + spin_lock(&journal->j_state_lock); + } +@@ -561,8 +561,8 @@ int jbd2_log_wait_commit(journal_t *jour + while (tid_gt(tid, journal->j_commit_sequence)) { + jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", + tid, journal->j_commit_sequence); +- wake_up(&journal->j_wait_commit); + spin_unlock(&journal->j_state_lock); ++ wake_up(&journal->j_wait_commit); + wait_event(journal->j_wait_done_commit, + !tid_gt(tid, journal->j_commit_sequence)); + spin_lock(&journal->j_state_lock); diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 498115c..2b66311 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -542,13 +542,14 @@ static int fsfilt_ext3_commit(struct inode *inode, void *h, int force_sync) } static int fsfilt_ext3_commit_async(struct inode *inode, void *h, - void **wait_handle) + void **wait_handle) { - unsigned long tid; - transaction_t *transaction; - handle_t *handle = h; - journal_t *journal; - int rc; + unsigned long tid; + transaction_t *transaction; + handle_t *handle = h; + journal_t *journal; + int rc; + bool need_to_wait = true; LASSERT(current->journal_info == handle); @@ -562,11 +563,28 @@ static int fsfilt_ext3_commit_async(struct inode *inode, void *h, CERROR("error while stopping transaction: %d\n", rc); return rc; } - fsfilt_log_start_commit(journal, tid); - *wait_handle = (void *) tid; - CDEBUG(D_INODE, "commit async: %lu\n", (unsigned long) tid); - return 0; + spin_lock(&journal->j_state_lock); + if (journal->j_running_transaction && + journal->j_running_transaction->t_tid == tid) { + if (journal->j_commit_request != tid) { + /* transaction not yet started, so request it */ + spin_unlock(&journal->j_state_lock); + fsfilt_log_start_commit(journal, tid); + goto wait_commit; + } + } else if (!(journal->j_committing_transaction && + journal->j_committing_transaction->t_tid == tid)) { + need_to_wait = 0; + } + spin_unlock(&journal->j_state_lock); + if (!need_to_wait) + return 0; + +wait_commit: + *wait_handle = (void *) tid; + CDEBUG(D_INODE, "commit async: %lu\n", (unsigned long) tid); + return 0; } static int fsfilt_ext3_commit_wait(struct inode *inode, void *h) -- 2.3.2 (Apple Git-55)