<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:04:10 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-142] system hang when running replay-single or replay-dual with three clients</title>
                <link>https://jira.whamcloud.com/browse/LU-142</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;System hangs there when running replay-single or replay-dual with three clients.&lt;/p&gt;

&lt;p&gt;MDT syslogs&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: == replay-dual test 3: |X| mkdir adir, mkdir adir/bdir =============================================== 15:25:29 (1300400729)&lt;br/&gt;
LustreError: 16978:0:(ldlm_request.c:88:ldlm_expired_completion_wait()) ### lock timed out (enqueued at 1300400765, 300s ago); not entering recovery in server code, just going back to sleep ns: mdt-ffff810634719000 lock: ffff810634d62000/0xc740c432497b7dd3 lrc: 3/0,1 mode: --/CW res: 64945/1800942150 bits 0x2 rrc: 6 type: IBT flags: 0x4004030 remote: 0x0 expref: -99 pid: 16978 timeout: 0&lt;br/&gt;
LustreError: 16860:0:(ldlm_request.c:88:ldlm_expired_completion_wait()) ### lock timed out (enqueued at 1300400765, 300s ago); not entering recovery in server code, just going back to sleep ns: mdt-ffff810634719000 lock: ffff81032829d000/0xc740c432497b7de8 lrc: 3/1,0 mode: --/PR res: 64945/1800942150 bits 0x2 rrc: 6 type: IBT flags: 0x4004000 remote: 0x0 expref: -99 pid: 16860 timeout: 0&lt;br/&gt;
LustreError: 16860:0:(ldlm_request.c:88:ldlm_expired_completion_wait()) Skipped 1 previous similar message&lt;br/&gt;
LustreError: dumping log to /tmp/lustre-log.1300401065.16859&lt;br/&gt;
Lustre: 16997:0:(service.c:1000:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/-175), not sending early reply&lt;br/&gt;
  req@ffff8102e0384800 x1363568821797227/t0(0) o-1-&amp;gt;8274223f-8957-01db-a6c1-85c4a27e713c@NET_0x50000c0a80405_UUID:0/0 lens 544/1016 e 4 to 0 dl 1300401545 ref 2 fl Interpret:/ffffffff/ffffffff rc 0/-1&lt;br/&gt;
Lustre: 16997:0:(service.c:1000:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/-175), not sending early reply&lt;br/&gt;
  req@ffff8102dd62d850 x1363568698065665/t0(0) o-1-&amp;gt;5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf@NET_0x50000c0a80401_UUID:0/0 lens 544/1016 e 4 to 0 dl 1300401545 ref 2 fl Interpret:/ffffffff/ffffffff rc 0/-1&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf reconnecting&lt;br/&gt;
Lustre: 16981:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from e0ef2add-a061-7cd8-cd0f-2a288e5fab8a@192.168.4.2@o2ib to 0xffff810325d28000/1&lt;br/&gt;
LustreError: 16981:0:(ldlm_lib.c:2118:target_send_reply_msg()) @@@ processing error (&lt;del&gt;16)  req@ffff810636acc000 x1363568749446727/t0(0) o-1&lt;/del&gt;&amp;gt;e0ef2add-a061-7cd8-cd0f-2a288e5fab8a@NET_0x50000c0a80402_UUID:0/0 lens 368/264 e 0 to 0 dl 1300401672 ref 1 fl Interpret:/ffffffff/ffffffff rc -16/-1&lt;br/&gt;
LustreError: 16981:0:(ldlm_lib.c:2118:target_send_reply_msg()) Skipped 34 previous similar messages&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 2 previous similar messages&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf@192.168.4.1@o2ib to 0xffff810325d28400/1&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:846:target_handle_connect()) Skipped 1 previous similar message&lt;br/&gt;
Lustre: 16981:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf reconnecting&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from e0ef2add-a061-7cd8-cd0f-2a288e5fab8a@192.168.4.2@o2ib to 0xffff810325d28000/1&lt;br/&gt;
Lustre: 16981:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 2 previous similar messages&lt;br/&gt;
Lustre: 16997:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf reconnecting&lt;br/&gt;
Lustre: 16981:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from e0ef2add-a061-7cd8-cd0f-2a288e5fab8a@192.168.4.2@o2ib to 0xffff810325d28000/1&lt;br/&gt;
Lustre: 16981:0:(ldlm_lib.c:846:target_handle_connect()) Skipped 2 previous similar messages&lt;br/&gt;
Lustre: 16997:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 2 previous similar messages&lt;br/&gt;
Lustre: 16997:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf reconnecting&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from e0ef2add-a061-7cd8-cd0f-2a288e5fab8a@192.168.4.2@o2ib to 0xffff810325d28000/1&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:846:target_handle_connect()) Skipped 2 previous similar messages&lt;br/&gt;
Lustre: 16997:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 2 previous similar messages&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf reconnecting&lt;br/&gt;
Lustre: 16997:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from e0ef2add-a061-7cd8-cd0f-2a288e5fab8a@192.168.4.2@o2ib to 0xffff810325d28000/1&lt;br/&gt;
Lustre: 16997:0:(ldlm_lib.c:846:target_handle_connect()) Skipped 2 previous similar messages&lt;br/&gt;
LustreError: 16997:0:(ldlm_lib.c:2118:target_send_reply_msg()) @@@ processing error (&lt;del&gt;16)  req@ffff8102bd915800 x1363568749446750/t0(0) o-1&lt;/del&gt;&amp;gt;e0ef2add-a061-7cd8-cd0f-2a288e5fab8a@NET_0x50000c0a80402_UUID:0/0 lens 368/264 e 0 to 0 dl 1300401692 ref 1 fl Interpret:/ffffffff/ffffffff rc -16/-1&lt;br/&gt;
LustreError: 16997:0:(ldlm_lib.c:2118:target_send_reply_msg()) Skipped 11 previous similar messages&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 2 previous similar messages&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf reconnecting&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 2 previous similar messages&lt;br/&gt;
Lustre: 16997:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf@192.168.4.1@o2ib to 0xffff810325d28400/1&lt;br/&gt;
Lustre: 16997:0:(ldlm_lib.c:846:target_handle_connect()) Skipped 7 previous similar messages&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf reconnecting&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 5 previous similar messages&lt;br/&gt;
Lustre: 16997:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf@192.168.4.1@o2ib to 0xffff810325d28400/1&lt;br/&gt;
Lustre: 16997:0:(ldlm_lib.c:846:target_handle_connect()) Skipped 11 previous similar messages&lt;br/&gt;
Lustre: 16997:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf reconnecting&lt;br/&gt;
Lustre: 16997:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 11 previous similar messages&lt;br/&gt;
LustreError: 16981:0:(ldlm_lib.c:2118:target_send_reply_msg()) @@@ processing error (&lt;del&gt;16)  req@ffff81032271b400 x1363568698066991/t0(0) o-1&lt;/del&gt;&amp;gt;5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf@NET_0x50000c0a80401_UUID:0/0 lens 368/264 e 0 to 0 dl 1300401732 ref 1 fl Interpret:/ffffffff/ffffffff rc -16/-1&lt;br/&gt;
LustreError: 16981:0:(ldlm_lib.c:2118:target_send_reply_msg()) Skipped 25 previous similar messages&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf@192.168.4.1@o2ib to 0xffff810325d28400/1&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:846:target_handle_connect()) Skipped 20 previous similar messages&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf reconnecting&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 20 previous similar messages&lt;br/&gt;
LustreError: 16981:0:(ldlm_lib.c:2118:target_send_reply_msg()) @@@ processing error (&lt;del&gt;16)  req@ffff8102e5f07800 x1363568749446911/t0(0) o-1&lt;/del&gt;&amp;gt;e0ef2add-a061-7cd8-cd0f-2a288e5fab8a@NET_0x50000c0a80402_UUID:0/0 lens 368/264 e 0 to 0 dl 1300401807 ref 1 fl Interpret:/ffffffff/ffffffff rc -16/-1&lt;br/&gt;
LustreError: 16981:0:(ldlm_lib.c:2118:target_send_reply_msg()) Skipped 44 previous similar messages&lt;br/&gt;
Lustre: 16981:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf@192.168.4.1@o2ib to 0xffff810325d28400/1&lt;br/&gt;
Lustre: 16981:0:(ldlm_lib.c:846:target_handle_connect()) Skipped 38 previous similar messages&lt;br/&gt;
Lustre: 16981:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf reconnecting&lt;br/&gt;
Lustre: 16981:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 38 previous similar messages&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf@192.168.4.1@o2ib to 0xffff810325d28400/1&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:846:target_handle_connect()) Skipped 77 previous similar messages&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf reconnecting&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 77 previous similar messages&lt;br/&gt;
LustreError: 16912:0:(ldlm_lib.c:2118:target_send_reply_msg()) @@@ processing error (&lt;del&gt;16)  req@ffff810636367000 x1363568698067306/t0(0) o-1&lt;/del&gt;&amp;gt;5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf@NET_0x50000c0a80401_UUID:0/0 lens 368/264 e 0 to 0 dl 1300401957 ref 1 fl Interpret:/ffffffff/ffffffff rc -16/-1&lt;br/&gt;
LustreError: 16912:0:(ldlm_lib.c:2118:target_send_reply_msg()) Skipped 89 previous similar messages&lt;br/&gt;
Lustre: 16980:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf@192.168.4.1@o2ib to 0xffff810325d28400/1&lt;br/&gt;
Lustre: 16980:0:(ldlm_lib.c:846:target_handle_connect()) Skipped 155 previous similar messages&lt;br/&gt;
Lustre: 16980:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf reconnecting&lt;br/&gt;
Lustre: 16980:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 155 previous similar messages&lt;br/&gt;
LustreError: 17037:0:(ldlm_lib.c:2118:target_send_reply_msg()) @@@ processing error (&lt;del&gt;16)  req@ffff8106378aa800 x1363568698067733/t0(0) o-1&lt;/del&gt;&amp;gt;5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf@NET_0x50000c0a80401_UUID:0/0 lens 368/264 e 0 to 0 dl 1300402262 ref 1 fl Interpret:/ffffffff/ffffffff rc -16/-1&lt;br/&gt;
LustreError: 17037:0:(ldlm_lib.c:2118:target_send_reply_msg()) Skipped 182 previous similar messages&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf@192.168.4.1@o2ib to 0xffff810325d28400/1&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:846:target_handle_connect()) Skipped 308 previous similar messages&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 5765646f-b9c3-fbc2-d58c-fd81f8d1a4cf reconnecting&lt;br/&gt;
Lustre: 16912:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 306 previous similar messages&lt;br/&gt;
LustreError: 16981:0:(ldlm_lib.c:2118:target_send_reply_msg()) @@@ processing error (&lt;del&gt;16)  req@ffff810635532800 x1363568821800131/t0(0) o-1&lt;/del&gt;&amp;gt;8274223f-8957-01db-a6c1-85c4a27e713c@NET_0x50000c0a80405_UUID:0/0 lens 368/264 e 0 to 0 dl 1300402862 ref 1 fl Interpret:/ffffffff/ffffffff rc -16/-1&lt;br/&gt;
LustreError: 16981:0:(ldlm_lib.c:2118:target_send_reply_msg()) Skipped 359 previous similar messages&lt;br/&gt;
Lustre: 16981:0:(ldlm_lib.c:846:target_handle_connect()) lustre-MDT0000: refuse reconnection from 8274223f-8957-01db-a6c1-85c4a27e713c@192.168.4.5@o2ib to 0xffff810325d28800/1&lt;br/&gt;
Lustre: 16981:0:(ldlm_lib.c:846:target_handle_connect()) Skipped 359 previous similar messages&lt;br/&gt;
Lustre: 16980:0:(ldlm_lib.c:606:target_handle_reconnect()) lustre-MDT0000: 8274223f-8957-01db-a6c1-85c4a27e713c reconnecting&lt;br/&gt;
Lustre: 16980:0:(ldlm_lib.c:606:target_handle_reconnect()) Skipped 359 previous similar messages&lt;br/&gt;
LustreError: 16912:0:(ldlm_lib.c:2118:target_send_reply_msg()) @@@ processing error (&lt;del&gt;16)  req@ffff810631ed9400 x1363568749449228/t0(0) o-1&lt;/del&gt;&amp;gt;e0ef2add-a061-7cd8-cd0f-2a288e5fab8a@NET_0x50000c0a80402_UUID:0/0 lens 368/264 e 0 to 0 dl 1300403462 ref 1 fl Interpret:/ffffffff/ffffffff rc -16/-1&lt;/p&gt;

&lt;p&gt;client syslogs&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: == replay-dual test 3: |X| mkdir adir, mkdir adir/bdir =============================================== 15:25:29 (1300400729)&lt;br/&gt;
LustreError: 21048:0:(client.c:1057:ptlrpc_import_delay_req()) @@@ IMP_INVALID  req@ffff81032cb66c00 x1363568698065666/t0(0) o-1-&amp;gt;MGS@192.168.4.128@o2ib:26/25 lens 296/352 e 0 to 0 dl 0 ref 2 fl Rpc:/ffffffff/ffffffff rc 0/-1&lt;br/&gt;
LustreError: 21048:0:(client.c:1057:ptlrpc_import_delay_req()) Skipped 4 previous similar messages&lt;br/&gt;
LustreError: 21048:0:(client.c:1057:ptlrpc_import_delay_req()) @@@ IMP_INVALID  req@ffff81032cb66c00 x1363568698065673/t0(0) o-1-&amp;gt;MGS@192.168.4.128@o2ib:26/25 lens 296/352 e 0 to 0 dl 0 ref 2 fl Rpc:/ffffffff/ffffffff rc 0/-1&lt;br/&gt;
LustreError: 21048:0:(client.c:1057:ptlrpc_import_delay_req()) Skipped 4 previous similar messages&lt;br/&gt;
Lustre: 17200:0:(import.c:529:import_select_connection()) MGC192.168.4.128@o2ib: tried all connections, increasing latency to 11s&lt;br/&gt;
Lustre: 17199:0:(import.c:885:ptlrpc_connect_interpret()) MGS@192.168.4.128@o2ib changed server handle from 0xc740c432497b7838 to 0xc740c432497b7e27&lt;br/&gt;
Lustre: MGC192.168.4.128@o2ib: Reactivating import&lt;br/&gt;
Lustre: MGC192.168.4.128@o2ib: Connection restored to service MGS using nid 192.168.4.128@o2ib.&lt;br/&gt;
Lustre: Skipped 1 previous similar message&lt;br/&gt;
Lustre: 21742:0:(client.c:1772:ptlrpc_expire_one_request()) @@@ Request x1363568698065665 sent from lustre-MDT0000-mdc-ffff81032b0c5400 to NID 192.168.4.128@o2ib has timed out for slow reply: &lt;span class=&quot;error&quot;&gt;&amp;#91;sent 1300400729&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;real_sent 1300400729&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;current 1300401616&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;deadline 887s&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;delay 0s&amp;#93;&lt;/span&gt;  req@ffff810323b7a800 x1363568698065665/t0(0) o-1-&amp;gt;lustre-MDT0000_UUID@192.168.4.128@o2ib:12/10 lens 544/1016 e 4 to 1 dl 1300401616 ref 2 fl Rpc:X/ffffffff/ffffffff rc 0/-1&lt;br/&gt;
Lustre: 21742:0:(client.c:1772:ptlrpc_expire_one_request()) Skipped 9 previous similar messages&lt;br/&gt;
Lustre: lustre-MDT0000-mdc-ffff81032b0c5400: Connection to service lustre-MDT0000 via nid 192.168.4.128@o2ib was lost; in progress operations using this service will wait for recovery to complete.&lt;br/&gt;
Lustre: Skipped 1 previous similar message&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: Skipped 3 previous similar messages&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: Skipped 1 previous similar message&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: Skipped 3 previous similar messages&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: Skipped 6 previous similar messages&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: Skipped 12 previous similar messages&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: Skipped 25 previous similar messages&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: Skipped 51 previous similar messages&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: Skipped 102 previous similar messages&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: Skipped 120 previous similar messages&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: Skipped 120 previous similar messages&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: Skipped 120 previous similar messages&lt;/p&gt;

&lt;p&gt;Here are the stack trace and debug log. With 1 client, it seems these tests can pass.&lt;/p&gt;</description>
                <environment></environment>
        <key id="10467">LU-142</key>
            <summary>system hang when running replay-single or replay-dual with three clients</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="yong.fan">nasf</assignee>
                                    <reporter username="sarah">Sarah Liu</reporter>
                        <labels>
                    </labels>
                <created>Thu, 17 Mar 2011 17:37:48 +0000</created>
                <updated>Fri, 25 Mar 2011 21:09:13 +0000</updated>
                            <resolved>Fri, 25 Mar 2011 21:09:10 +0000</resolved>
                                    <version>Lustre 2.1.0</version>
                                    <fixVersion>Lustre 2.1.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>2</watches>
                                                                            <comments>
                            <comment id="11269" author="sarah" created="Mon, 21 Mar 2011 12:49:47 +0000"  >&lt;p&gt;got this issue again when running sanityn test_17 and test_41a on server:build #152, client:build #128.&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: == sanityn test 41a: pdirops: create vs mkdir ======================================================== 10:17:01 (1300727821)&lt;/p&gt;


&lt;p&gt;Lustre: 20849:0:(client.c:1772:ptlrpc_expire_one_request()) @@@ Request x1363911719814207 sent from lustre-MDT0000-mdc-ffff810328716000 to NID 192.168.4.128@o2ib has timed out for slow reply: &lt;span class=&quot;error&quot;&gt;&amp;#91;sent 1300727831&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;real_sent 1300727831&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;current 1300728951&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;deadline 1120s&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;delay 0s&amp;#93;&lt;/span&gt;  req@ffff810330606c00 x1363911719814207/t0(0) o-1-&amp;gt;lustre-MDT0000_UUID@192.168.4.128@o2ib:12/10 lens 544/1016 e 4 to 1 dl 1300728951 ref 2 fl Rpc:X/ffffffff/ffffffff rc 0/-1&lt;br/&gt;
Lustre: lustre-MDT0000-mdc-ffff8103298b7400: Connection to service lustre-MDT0000 via nid 192.168.4.128@o2ib was lost; in progress operations using this service will wait for recovery to complete.&lt;br/&gt;
Lustre: 20849:0:(client.c:1772:ptlrpc_expire_one_request()) Skipped 1 previous similar message&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: Skipped 1 previous similar message&lt;br/&gt;
LustreError: 11-0: an error occurred while communicating with 192.168.4.128@o2ib. The mds_connect operation failed with -16&lt;br/&gt;
LustreError: Skipped 1 previous similar message&lt;/p&gt;

</comment>
                            <comment id="11360" author="yong.fan" created="Thu, 24 Mar 2011 22:39:09 +0000"  >&lt;p&gt;The patch is in inspection:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#change,365&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,365&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="11403" author="sarah" created="Fri, 25 Mar 2011 21:09:12 +0000"  >&lt;p&gt;duplicated with &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-161&quot; title=&quot;connectathon test on NFS v3 client failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-161&quot;&gt;&lt;del&gt;LU-161&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="10148" name="lustre-log.1300401065.16859.bz2" size="592427" author="sarah" created="Thu, 17 Mar 2011 17:37:48 +0000"/>
                            <attachment id="10151" name="lustre-log.1300728064.6848" size="304903" author="sarah" created="Mon, 21 Mar 2011 12:51:09 +0000"/>
                            <attachment id="10147" name="stack_trace" size="247024" author="sarah" created="Thu, 17 Mar 2011 17:37:48 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvzxj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>10067</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>