From d78660288e166f2ce93980fb96963e072edb7392 Mon Sep 17 00:00:00 2001
From: root <alexey_lyashkov@xyratex.com>
Date: Thu, 26 Jan 2012 12:02:06 +0200
Subject: [PATCH] MRP-303 handle bulk IO errors correctly.

don't panic on incorrect bulk transfer,
correctly handle a request reorder.

Signed-off-by: root <alexey_lyashkov@xyratex.com>
---
 lustre/include/obd_support.h |    1 +
 lustre/ost/ost_handler.c     |   19 +++++++++++++++++--
 lustre/ptlrpc/client.c       |    4 ++--
 lustre/ptlrpc/events.c       |    6 ++++++
 lustre/tests/sanity.sh       |   20 ++++++++++++++++++++
 5 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h
index 5cdebfd..c7269a2 100644
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -347,6 +347,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT  0x512
 #define OBD_FAIL_PTLRPC_DROP_REQ_OPC     0x513
 #define OBD_FAIL_PTLRPC_FINISH_REPLAY    0x514
+#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2  0x515
 
 #define OBD_FAIL_OBD_PING_NET            0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c
index 8dc1cec..2a1326e 100644
--- a/lustre/ost/ost_handler.c
+++ b/lustre/ost/ost_handler.c
@@ -900,7 +900,8 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
         /* Check if client was evicted while we were doing i/o before touching
            network */
         if (rc == 0) {
-                rc = target_bulk_io(exp, desc, &lwi);
+                if (!CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2))
+                        rc = target_bulk_io(exp, desc, &lwi);
                 no_reply = rc != 0;
         }
 
@@ -920,7 +921,7 @@ out_lock:
 out_tls:
         ost_tls_put(req);
 out_bulk:
-        if (desc)
+        if (desc && !CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2))
                 ptlrpc_free_bulk(desc);
 out:
         LASSERT(rc <= 0);
@@ -944,6 +945,20 @@ out:
                       exp->exp_connection->c_remote_uuid.uuid,
                       libcfs_id2str(req->rq_peer));
         }
+        /* send a bulk after reply to simulate a network delay or reordering
+         * after router */
+        if (CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2)) {
+                cfs_waitq_t              waitq;
+                struct l_wait_info       lwi1;
+
+                CDEBUG(D_INFO, "reoder BULK\n");
+                cfs_waitq_init(&waitq);
+
+                lwi1 = LWI_TIMEOUT_INTR(cfs_time_seconds(3), NULL, NULL, NULL);
+                l_wait_event(waitq, 0, &lwi1);
+                rc = target_bulk_io(exp, desc, &lwi);
+                ptlrpc_free_bulk(desc);
+        }
 
         RETURN(rc);
 }
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c
index 11697d6..e57b486 100644
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -1675,7 +1675,7 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                          * process the reply. Similarly if the RPC returned
                          * an error, and therefore the bulk will never arrive.
                          */
-                        if (req->rq_bulk == NULL || req->rq_status != 0) {
+                        if (req->rq_bulk == NULL || req->rq_status < 0) {
                                 ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
                                 GOTO(interpret, req->rq_status);
                         }
@@ -1693,7 +1693,7 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                          * was good after getting the REPLY for her GET or
                          * the ACK for her PUT. */
                         DEBUG_REQ(D_ERROR, req, "bulk transfer failed");
-                        LBUG();
+                        req->rq_status = -EIO;
                 }
 
                 ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c
index 4165f88..cc124ce 100644
--- a/lustre/ptlrpc/events.c
+++ b/lustre/ptlrpc/events.c
@@ -190,6 +190,12 @@ void client_bulk_callback (lnet_event_t *ev)
                  ev->type == LNET_EVENT_UNLINK);
         LASSERT (ev->unlinked);
 
+        if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB, CFS_FAIL_ONCE))
+                ev->status = -EIO;
+
+        if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2, CFS_FAIL_ONCE))
+                ev->status = -EIO;
+
         CDEBUG((ev->status == 0) ? D_NET : D_ERROR,
                "event type %d, status %d, desc %p\n",
                ev->type, ev->status, desc);
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh
index da07a92..15f6fe0 100644
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -8338,6 +8338,26 @@ test_220() { #LU-325
 }
 run_test 220 "the preallocated objects in MDS still can be used if ENOSPC is returned by OST with enough disk space"
 
+test_221a() {
+        #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB   0x508
+        $LCTL set_param fail_loc=0x508
+        dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 conv=fsync
+        $LCTL set_param fail_loc=0
+        df $DIR
+}
+run_test 221a "MRP-303: don't panic on bulk IO fail"
+
+test_221b() {
+        dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1
+        cancel_lru_locks osc
+        #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2   0x515
+        $LCTL set_param fail_loc=0x515
+        dd of=/dev/null if=$DIR/$tfile bs=4096 count=1
+        $LCTL set_param fail_loc=0
+        df $DIR
+}
+run_test 221b "MRP-303: don't panic on bulk IO fail"
+
 #
 # tests that do cleanup/setup should be run at the end
 #
-- 
1.7.1