From e2566d577a90a1f4219c4420ef3b3596dad953f8 Mon Sep 17 00:00:00 2001
From: Bobi Jam <bobijam.xu@intel.com>
Date: Wed, 8 Apr 2015 23:02:30 +0800
Subject: [PATCH] LU-4090 fsfilt: don't wait (forever) for stale tid

In the case where an inode has a very stale transaction id (tid) in
i_datasync_tid or i_sync_tid, it's possible that after a very large
(2**31) number of transactions, that the tid number space might wrap,
causing tid_geq()'s calculations to fail.

fsfilt_ext3_commit_async() might call jbd2_log_start_commit() with a
stale tid, and subsequently call jbd2_log_wait_commit() with the same
stale tid, and then wait for a very long time.

Similar fix as kernel commit d76a3a77113db020d9bb1e894822869410450bd9

---------------------------------------------------------------------
Another kernel patch from Linux kernel commit
commit 3469a32a1e948c54204b5dd6f7476a7d11349e9e (v3.14-rc2)

jbd2: don't hold j_state_lock while calling wake_up()

The j_state_lock is one of the hottest locks in the jbd2 layer and
thus one of its scalability bottlenecks.

We don't need to be holding the j_state_lock while we are calling
wake_up(&journal->j_wait_commit), so release the lock a little bit
earlier.

Signed-off-by: Bobi Jam <bobijam.xu@intel.com>
Change-Id: I63d9a0e579bfadf30c74c1d273dc8c7bc6156eaf
---
 .../jbd2-log_wait_for_space-2.6-rhel5.patch        | 38 ++++++++++++++++++++++
 lustre/lvfs/fsfilt_ext3.c                          | 38 ++++++++++++++++------
 2 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/lustre/kernel_patches/patches/jbd2-log_wait_for_space-2.6-rhel5.patch b/lustre/kernel_patches/patches/jbd2-log_wait_for_space-2.6-rhel5.patch
index 16db364..627a495 100644
--- a/lustre/kernel_patches/patches/jbd2-log_wait_for_space-2.6-rhel5.patch
+++ b/lustre/kernel_patches/patches/jbd2-log_wait_for_space-2.6-rhel5.patch
@@ -47,6 +47,20 @@ the odds of it happening.
 Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
 
+Another patch from linux vanilla kernel commit
+3469a32a1e948c54204b5dd6f7476a7d11349e9e (v3.14-rc2)
+
+jbd2: don't hold j_state_lock while calling wake_up()
+
+The j_state_lock is one of the hottest locks in the jbd2 layer and
+thus one of its scalability bottlenecks.
+
+We don't need to be holding the j_state_lock while we are calling
+wake_up(&journal->j_wait_commit), so release the lock a little bit
+earlier.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+
 Index: linux-2.6.18-348.3.1.el5-b18/fs/jbd2/checkpoint.c
 ===================================================================
 --- linux-2.6.18-348.3.1.el5-b18.orig/fs/jbd2/checkpoint.c
@@ -67,3 +81,27 @@ Index: linux-2.6.18-348.3.1.el5-b18/fs/jbd2/checkpoint.c
  			} else {
  				printk(KERN_ERR "%s: needed %d blocks and "
  				       "only had %d space available\n",
+Index: linux-2.6.18-348.3.1.el5-b18/fs/jbd2/journal.c
+===================================================================
+--- linux-2.6.18-348.3.1.el5-b18.orig/fs/jbd2/journal.c
++++ linux-2.6.18-348.3.1.el5-b18/fs/jbd2/journal.c
+@@ -237,8 +237,8 @@ static void journal_kill_thread(journal_
+ 	journal->j_flags |= JBD2_UNMOUNT;
+ 
+ 	while (journal->j_task) {
+-		wake_up(&journal->j_wait_commit);
+ 		spin_unlock(&journal->j_state_lock);
++		wake_up(&journal->j_wait_commit);
+ 		wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
+ 		spin_lock(&journal->j_state_lock);
+ 	}
+@@ -561,8 +561,8 @@ int jbd2_log_wait_commit(journal_t *jour
+ 	while (tid_gt(tid, journal->j_commit_sequence)) {
+ 		jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
+ 				  tid, journal->j_commit_sequence);
+-		wake_up(&journal->j_wait_commit);
+ 		spin_unlock(&journal->j_state_lock);
++		wake_up(&journal->j_wait_commit);
+ 		wait_event(journal->j_wait_done_commit,
+ 				!tid_gt(tid, journal->j_commit_sequence));
+ 		spin_lock(&journal->j_state_lock);
diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c
index 498115c..2b66311 100644
--- a/lustre/lvfs/fsfilt_ext3.c
+++ b/lustre/lvfs/fsfilt_ext3.c
@@ -542,13 +542,14 @@ static int fsfilt_ext3_commit(struct inode *inode, void *h, int force_sync)
 }
 
 static int fsfilt_ext3_commit_async(struct inode *inode, void *h,
-                                    void **wait_handle)
+				    void **wait_handle)
 {
-        unsigned long tid;
-        transaction_t *transaction;
-        handle_t *handle = h;
-        journal_t *journal;
-        int rc;
+	unsigned long tid;
+	transaction_t *transaction;
+	handle_t *handle = h;
+	journal_t *journal;
+	int rc;
+	bool need_to_wait = true;
 
         LASSERT(current->journal_info == handle);
 
@@ -562,11 +563,28 @@ static int fsfilt_ext3_commit_async(struct inode *inode, void *h,
                 CERROR("error while stopping transaction: %d\n", rc);
                 return rc;
         }
-        fsfilt_log_start_commit(journal, tid);
 
-        *wait_handle = (void *) tid;
-        CDEBUG(D_INODE, "commit async: %lu\n", (unsigned long) tid);
-        return 0;
+	spin_lock(&journal->j_state_lock);
+	if (journal->j_running_transaction &&
+	    journal->j_running_transaction->t_tid == tid) {
+		if (journal->j_commit_request != tid) {
+			/* transaction not yet started, so request it */
+			spin_unlock(&journal->j_state_lock);
+			fsfilt_log_start_commit(journal, tid);
+			goto wait_commit;
+		}
+	} else if (!(journal->j_committing_transaction &&
+		     journal->j_committing_transaction->t_tid == tid)) {
+		need_to_wait = 0;
+	}
+	spin_unlock(&journal->j_state_lock);
+	if (!need_to_wait)
+		return 0;
+
+wait_commit:
+	*wait_handle = (void *) tid;
+	CDEBUG(D_INODE, "commit async: %lu\n", (unsigned long) tid);
+	return 0;
 }
 
 static int fsfilt_ext3_commit_wait(struct inode *inode, void *h)
-- 
2.3.2 (Apple Git-55)