<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:56:17 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-12860] recovery-small test 55 fails with &apos;dd should be finished!&apos;</title>
                <link>https://jira.whamcloud.com/browse/LU-12860</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;recovery-small test_55 fails with &apos;dd should be finished!&apos; and has failed only twice this year with this error. See &lt;a href=&quot;https://testing.whamcloud.com/test_sets/63e23090-eb2c-11e9-a197-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/63e23090-eb2c-11e9-a197-52540065bddc&lt;/a&gt; on 09 OCT 2019 and &lt;a href=&quot;https://testing.whamcloud.com/test_sets/5ad3e090-d5cc-11e9-a25b-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/5ad3e090-d5cc-11e9-a25b-52540065bddc&lt;/a&gt; 12 SEPT 2019 both for b2_12. &lt;/p&gt;

&lt;p&gt;Looking at the suite_log for the second failure listed above, we see&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;== recovery-small test 55: ost_brw_read/write drops timed-out read/write request ===================== 12:07:50 (1570648070)
step1: testing ......
 recovery-small test_55: @@@@@@ FAIL: dd should be finished! 
  Trace dump:
  = /usr/lib64/lustre/tests/test-framework.sh:5864:error()
  = /usr/lib64/lustre/tests/recovery-small.sh:1336:test_55()
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Looking at the OSS (vm5) console log, we see&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 3438.494717] Lustre: DEBUG MARKER: == recovery-small test 55: ost_brw_read/write drops timed-out read/write request ===================== 12:07:50 (1570648070)
[ 3501.778339] LNet: Service thread pid 6405 was inactive for 40.04s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[ 3501.781156] Pid: 6405, comm: ll_ost00_053 3.10.0-957.27.2.el7_lustre.x86_64 #1 SMP Tue Oct 8 13:52:18 UTC 2019
[ 3501.782806] Call Trace:
[ 3501.783449]  [&amp;lt;ffffffffc02e1cf5&amp;gt;] do_get_write_access+0x285/0x4d0 [jbd2]
[ 3501.784645]  [&amp;lt;ffffffffc02e1f67&amp;gt;] jbd2_journal_get_write_access+0x27/0x40 [jbd2]
[ 3501.785941]  [&amp;lt;ffffffffc0ebfc5b&amp;gt;] __ldiskfs_journal_get_write_access+0x3b/0x80 [ldiskfs]
[ 3501.787604]  [&amp;lt;ffffffffc0f75099&amp;gt;] osd_ldiskfs_write_record+0xd9/0x410 [osd_ldiskfs]
[ 3501.789090]  [&amp;lt;ffffffffc0f75510&amp;gt;] osd_write+0x140/0x5c0 [osd_ldiskfs]
[ 3501.790333]  [&amp;lt;ffffffffc09784b6&amp;gt;] dt_record_write+0x36/0x120 [obdclass]
[ 3501.791667]  [&amp;lt;ffffffffc0ca8614&amp;gt;] tgt_server_data_write+0x224/0x370 [ptlrpc]
[ 3501.793309]  [&amp;lt;ffffffffc0ca9d8e&amp;gt;] tgt_server_data_update+0x41e/0x510 [ptlrpc]
[ 3501.794591]  [&amp;lt;ffffffffc0cabd0d&amp;gt;] tgt_client_del+0x29d/0x6b0 [ptlrpc]
[ 3501.795814]  [&amp;lt;ffffffffc108e13c&amp;gt;] ofd_obd_disconnect+0x1ac/0x220 [ofd]
[ 3501.797038]  [&amp;lt;ffffffffc0c12077&amp;gt;] target_handle_disconnect+0xd7/0x450 [ptlrpc]
[ 3501.798431]  [&amp;lt;ffffffffc0cb2708&amp;gt;] tgt_disconnect+0x58/0x170 [ptlrpc]
[ 3501.799587]  [&amp;lt;ffffffffc0cb736a&amp;gt;] tgt_request_handle+0xaea/0x1580 [ptlrpc]
[ 3501.800860]  [&amp;lt;ffffffffc0c5e24b&amp;gt;] ptlrpc_server_handle_request+0x24b/0xab0 [ptlrpc]
[ 3501.802264]  [&amp;lt;ffffffffc0c61bac&amp;gt;] ptlrpc_main+0xb2c/0x1460 [ptlrpc]
[ 3501.803477]  [&amp;lt;ffffffffb7ec2e81&amp;gt;] kthread+0xd1/0xe0
[ 3501.804387]  [&amp;lt;ffffffffb8577c37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[ 3501.805550]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[ 3501.806504] LustreError: dumping log to /tmp/lustre-log.1570648140.6405
[ 3502.924108] Lustre: DEBUG MARKER: /usr/sbin/lctl mark  recovery-small test_55: @@@@@@ FAIL: dd should be finished! 
[ 3503.127572] Lustre: DEBUG MARKER: recovery-small test_55: @@@@@@ FAIL: dd should be finished!
[ 3503.376291] Lustre: DEBUG MARKER: /usr/sbin/lctl dk &amp;gt; /autotest/autotest2/2019-10-08/lustre-b2_12-el7_6-x86_64-vs-lustre-b2_12-sles12sp4-x86_64--failover--1_17__52___0455cc7e-bf9e-4287-89df-0e7b8e9d7341/recovery-small.test_55.debug_log.$(hostname -s).1570648135.log;
[ 3503.376291]          dmesg &amp;gt; /auto
[ 3503.570329] LNet: Service thread pid 6183 was inactive for 40.01s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[ 3503.573227] Pid: 6183, comm: ll_ost00_023 3.10.0-957.27.2.el7_lustre.x86_64 #1 SMP Tue Oct 8 13:52:18 UTC 2019
[ 3503.574895] Call Trace:
[ 3503.575364]  [&amp;lt;ffffffffc02e1cf5&amp;gt;] do_get_write_access+0x285/0x4d0 [jbd2]
[ 3503.576655]  [&amp;lt;ffffffffc02e1f67&amp;gt;] jbd2_journal_get_write_access+0x27/0x40 [jbd2]
[ 3503.578032]  [&amp;lt;ffffffffc0ebfc5b&amp;gt;] __ldiskfs_journal_get_write_access+0x3b/0x80 [ldiskfs]
[ 3503.579502]  [&amp;lt;ffffffffc0f75099&amp;gt;] osd_ldiskfs_write_record+0xd9/0x410 [osd_ldiskfs]
[ 3503.580837]  [&amp;lt;ffffffffc0f75510&amp;gt;] osd_write+0x140/0x5c0 [osd_ldiskfs]
[ 3503.581984]  [&amp;lt;ffffffffc09784b6&amp;gt;] dt_record_write+0x36/0x120 [obdclass]
[ 3503.583173]  [&amp;lt;ffffffffc0ca8614&amp;gt;] tgt_server_data_write+0x224/0x370 [ptlrpc]
[ 3503.584458]  [&amp;lt;ffffffffc0ca9d8e&amp;gt;] tgt_server_data_update+0x41e/0x510 [ptlrpc]
[ 3503.585736]  [&amp;lt;ffffffffc0cabd0d&amp;gt;] tgt_client_del+0x29d/0x6b0 [ptlrpc]
[ 3503.586885]  [&amp;lt;ffffffffc108e13c&amp;gt;] ofd_obd_disconnect+0x1ac/0x220 [ofd]
[ 3503.588272]  [&amp;lt;ffffffffc0c12077&amp;gt;] target_handle_disconnect+0xd7/0x450 [ptlrpc]
[ 3503.590003]  [&amp;lt;ffffffffc0cb2708&amp;gt;] tgt_disconnect+0x58/0x170 [ptlrpc]
[ 3503.591263]  [&amp;lt;ffffffffc0cb736a&amp;gt;] tgt_request_handle+0xaea/0x1580 [ptlrpc]
[ 3503.592500]  [&amp;lt;ffffffffc0c5e24b&amp;gt;] ptlrpc_server_handle_request+0x24b/0xab0 [ptlrpc]
[ 3503.594004]  [&amp;lt;ffffffffc0c61bac&amp;gt;] ptlrpc_main+0xb2c/0x1460 [ptlrpc]
[ 3503.595138]  [&amp;lt;ffffffffb7ec2e81&amp;gt;] kthread+0xd1/0xe0
[ 3503.596115]  [&amp;lt;ffffffffb8577c37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[ 3503.597267]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[ 3503.598221] LustreError: dumping log to /tmp/lustre-log.1570648142.6183
[ 3505.208266] Lustre: DEBUG MARKER: lctl set_param -n fail_loc=0 	    fail_val=0 2&amp;gt;/dev/null
[ 3514.343779] Lustre: 6183:0:(service.c:2165:ptlrpc_server_handle_request()) @@@ Request took longer than estimated (45:5s); client may timeout.  req@ffff9e61e0f0e050 x1646941249245056/t0(0) o9-&amp;gt;93068985-9bd0-151c-638d-51557c0cba70@10.9.5.186@tcp:507/0 lens 224/192 e 0 to 0 dl 1570648147 ref 1 fl Complete:/0/0 rc 0/0
[ 3514.344067] LNet: Service thread pid 6405 completed after 52.61s. This indicates the system was overloaded (too many service threads, or there were not enough hardware resources).
[ 3514.351372] Lustre: 6183:0:(service.c:2165:ptlrpc_server_handle_request()) Skipped 1 previous similar message
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;On the other OSS (vm6) console log, we see&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 7059.072359] Lustre: DEBUG MARKER: == recovery-small test 55: ost_brw_read/write drops timed-out read/write request ===================== 12:07:50 (1570648070)
[ 7122.391257] LNet: Service thread pid 20783 was inactive for 40.07s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[ 7122.394181] Pid: 20783, comm: ll_ost00_001 3.10.0-957.27.2.el7_lustre.x86_64 #1 SMP Tue Oct 8 13:52:18 UTC 2019
[ 7122.395901] Call Trace:
[ 7122.396464]  [&amp;lt;ffffffffc02d1cf5&amp;gt;] do_get_write_access+0x285/0x4d0 [jbd2]
[ 7122.397807]  [&amp;lt;ffffffffc02d1f67&amp;gt;] jbd2_journal_get_write_access+0x27/0x40 [jbd2]
[ 7122.399208]  [&amp;lt;ffffffffc0eabc5b&amp;gt;] __ldiskfs_journal_get_write_access+0x3b/0x80 [ldiskfs]
[ 7122.400818]  [&amp;lt;ffffffffc0f61099&amp;gt;] osd_ldiskfs_write_record+0xd9/0x410 [osd_ldiskfs]
[ 7122.402240]  [&amp;lt;ffffffffc0f61510&amp;gt;] osd_write+0x140/0x5c0 [osd_ldiskfs]
[ 7122.403504]  [&amp;lt;ffffffffc09644b6&amp;gt;] dt_record_write+0x36/0x120 [obdclass]
[ 7122.404960]  [&amp;lt;ffffffffc0c93a37&amp;gt;] tgt_client_data_write.isra.18+0x167/0x180 [ptlrpc]
[ 7122.406800]  [&amp;lt;ffffffffc0c97229&amp;gt;] tgt_client_data_update+0x3a9/0x5e0 [ptlrpc]
[ 7122.408105]  [&amp;lt;ffffffffc0c97eb9&amp;gt;] tgt_client_del+0x449/0x6b0 [ptlrpc]
[ 7122.409465]  [&amp;lt;ffffffffc107a13c&amp;gt;] ofd_obd_disconnect+0x1ac/0x220 [ofd]
[ 7122.410677]  [&amp;lt;ffffffffc0bfe077&amp;gt;] target_handle_disconnect+0xd7/0x450 [ptlrpc]
[ 7122.411993]  [&amp;lt;ffffffffc0c9e708&amp;gt;] tgt_disconnect+0x58/0x170 [ptlrpc]
[ 7122.413292]  [&amp;lt;ffffffffc0ca336a&amp;gt;] tgt_request_handle+0xaea/0x1580 [ptlrpc]
[ 7122.414557]  [&amp;lt;ffffffffc0c4a24b&amp;gt;] ptlrpc_server_handle_request+0x24b/0xab0 [ptlrpc]
[ 7122.416009]  [&amp;lt;ffffffffc0c4dbac&amp;gt;] ptlrpc_main+0xb2c/0x1460 [ptlrpc]
[ 7122.417248]  [&amp;lt;ffffffffa28c2e81&amp;gt;] kthread+0xd1/0xe0
[ 7122.418162]  [&amp;lt;ffffffffa2f77c37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[ 7122.419471]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[ 7122.420442] LustreError: dumping log to /tmp/lustre-log.1570648139.20783
[ 7123.504819] Lustre: DEBUG MARKER: /usr/sbin/lctl mark  recovery-small test_55: @@@@@@ FAIL: dd should be finished! 
[ 7123.703794] Lustre: DEBUG MARKER: recovery-small test_55: @@@@@@ FAIL: dd should be finished!
[ 7123.955355] Lustre: DEBUG MARKER: /usr/sbin/lctl dk &amp;gt; /autotest/autotest2/2019-10-08/lustre-b2_12-el7_6-x86_64-vs-lustre-b2_12-sles12sp4-x86_64--failover--1_17__52___0455cc7e-bf9e-4287-89df-0e7b8e9d7341/recovery-small.test_55.debug_log.$(hostname -s).1570648135.log;
[ 7123.955355]          dmesg &amp;gt; /auto
[ 7125.207282] LNet: Service thread pid 24813 was inactive for 62.16s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[ 7125.210221] Pid: 24813, comm: ll_ost_io00_008 3.10.0-957.27.2.el7_lustre.x86_64 #1 SMP Tue Oct 8 13:52:18 UTC 2019
[ 7125.211990] Call Trace:
[ 7125.212538]  [&amp;lt;ffffffffc0f47e75&amp;gt;] osd_trans_stop+0x265/0x8e0 [osd_ldiskfs]
[ 7125.213833]  [&amp;lt;ffffffffc1081c75&amp;gt;] ofd_trans_stop+0x25/0x60 [ofd]
[ 7125.215002]  [&amp;lt;ffffffffc1088c34&amp;gt;] ofd_commitrw_write+0x9d4/0x1d40 [ofd]
[ 7125.216398]  [&amp;lt;ffffffffc108d10c&amp;gt;] ofd_commitrw+0x48c/0x9e0 [ofd]
[ 7125.217505]  [&amp;lt;ffffffffc0ca760b&amp;gt;] tgt_brw_write+0x10cb/0x1cf0 [ptlrpc]
[ 7125.218795]  [&amp;lt;ffffffffc0ca336a&amp;gt;] tgt_request_handle+0xaea/0x1580 [ptlrpc]
[ 7125.220109]  [&amp;lt;ffffffffc0c4a24b&amp;gt;] ptlrpc_server_handle_request+0x24b/0xab0 [ptlrpc]
[ 7125.221611]  [&amp;lt;ffffffffc0c4dbac&amp;gt;] ptlrpc_main+0xb2c/0x1460 [ptlrpc]
[ 7125.222775]  [&amp;lt;ffffffffa28c2e81&amp;gt;] kthread+0xd1/0xe0
[ 7125.223719]  [&amp;lt;ffffffffa2f77c37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[ 7125.224888]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[ 7125.225920] LustreError: dumping log to /tmp/lustre-log.1570648142.24813
[ 7125.314852] Pid: 22522, comm: ll_ost_io00_003 3.10.0-957.27.2.el7_lustre.x86_64 #1 SMP Tue Oct 8 13:52:18 UTC 2019
[ 7125.317005] Call Trace:
[ 7125.317553]  [&amp;lt;ffffffffc0f47e75&amp;gt;] osd_trans_stop+0x265/0x8e0 [osd_ldiskfs]
[ 7125.319059]  [&amp;lt;ffffffffc1081c75&amp;gt;] ofd_trans_stop+0x25/0x60 [ofd]
[ 7125.320408]  [&amp;lt;ffffffffc1088c34&amp;gt;] ofd_commitrw_write+0x9d4/0x1d40 [ofd]
[ 7125.321779]  [&amp;lt;ffffffffc108d10c&amp;gt;] ofd_commitrw+0x48c/0x9e0 [ofd]
[ 7125.323114]  [&amp;lt;ffffffffc0ca760b&amp;gt;] tgt_brw_write+0x10cb/0x1cf0 [ptlrpc]
[ 7125.324597]  [&amp;lt;ffffffffc0ca336a&amp;gt;] tgt_request_handle+0xaea/0x1580 [ptlrpc]
[ 7125.326068]  [&amp;lt;ffffffffc0c4a24b&amp;gt;] ptlrpc_server_handle_request+0x24b/0xab0 [ptlrpc]
[ 7125.327744]  [&amp;lt;ffffffffc0c4dbac&amp;gt;] ptlrpc_main+0xb2c/0x1460 [ptlrpc]
[ 7125.329103]  [&amp;lt;ffffffffa28c2e81&amp;gt;] kthread+0xd1/0xe0
[ 7125.330273]  [&amp;lt;ffffffffa2f77c37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[ 7125.331641]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[ 7125.719257] Pid: 24812, comm: ll_ost_io00_007 3.10.0-957.27.2.el7_lustre.x86_64 #1 SMP Tue Oct 8 13:52:18 UTC 2019
[ 7125.721175] Call Trace:
[ 7125.721644]  [&amp;lt;ffffffffc0f47e75&amp;gt;] osd_trans_stop+0x265/0x8e0 [osd_ldiskfs]
[ 7125.722893]  [&amp;lt;ffffffffc1081c75&amp;gt;] ofd_trans_stop+0x25/0x60 [ofd]
[ 7125.723983]  [&amp;lt;ffffffffc1088c34&amp;gt;] ofd_commitrw_write+0x9d4/0x1d40 [ofd]
[ 7125.725196]  [&amp;lt;ffffffffc108d10c&amp;gt;] ofd_commitrw+0x48c/0x9e0 [ofd]
[ 7125.726663]  [&amp;lt;ffffffffc0ca760b&amp;gt;] tgt_brw_write+0x10cb/0x1cf0 [ptlrpc]
[ 7125.727903]  [&amp;lt;ffffffffc0ca336a&amp;gt;] tgt_request_handle+0xaea/0x1580 [ptlrpc]
[ 7125.729173]  [&amp;lt;ffffffffc0c4a24b&amp;gt;] ptlrpc_server_handle_request+0x24b/0xab0 [ptlrpc]
[ 7125.730564]  [&amp;lt;ffffffffc0c4dbac&amp;gt;] ptlrpc_main+0xb2c/0x1460 [ptlrpc]
[ 7125.731757]  [&amp;lt;ffffffffa28c2e81&amp;gt;] kthread+0xd1/0xe0
[ 7125.732671]  [&amp;lt;ffffffffa2f77c37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[ 7125.733794]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[ 7125.734725] LustreError: dumping log to /tmp/lustre-log.1570648143.24812
[ 7125.822770] Lustre: DEBUG MARKER: lctl set_param -n fail_loc=0 	    fail_val=0 2&amp;gt;/dev/null
[ 7126.231240] LNet: Service thread pid 24810 was inactive for 62.20s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[ 7126.234161] LNet: Skipped 2 previous similar messages
[ 7126.235065] Pid: 24810, comm: ll_ost_io00_005 3.10.0-957.27.2.el7_lustre.x86_64 #1 SMP Tue Oct 8 13:52:18 UTC 2019
[ 7126.236828] Call Trace:
[ 7126.237412]  [&amp;lt;ffffffffc0f47e75&amp;gt;] osd_trans_stop+0x265/0x8e0 [osd_ldiskfs]
[ 7126.238787]  [&amp;lt;ffffffffc1081c75&amp;gt;] ofd_trans_stop+0x25/0x60 [ofd]
[ 7126.239919]  [&amp;lt;ffffffffc1088c34&amp;gt;] ofd_commitrw_write+0x9d4/0x1d40 [ofd]
[ 7126.241178]  [&amp;lt;ffffffffc108d10c&amp;gt;] ofd_commitrw+0x48c/0x9e0 [ofd]
[ 7126.242314]  [&amp;lt;ffffffffc0ca760b&amp;gt;] tgt_brw_write+0x10cb/0x1cf0 [ptlrpc]
[ 7126.243546]  [&amp;lt;ffffffffc0ca336a&amp;gt;] tgt_request_handle+0xaea/0x1580 [ptlrpc]
[ 7126.244851]  [&amp;lt;ffffffffc0c4a24b&amp;gt;] ptlrpc_server_handle_request+0x24b/0xab0 [ptlrpc]
[ 7126.246289]  [&amp;lt;ffffffffc0c4dbac&amp;gt;] ptlrpc_main+0xb2c/0x1460 [ptlrpc]
[ 7126.247487]  [&amp;lt;ffffffffa28c2e81&amp;gt;] kthread+0xd1/0xe0
[ 7126.248498]  [&amp;lt;ffffffffa2f77c37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[ 7126.249669]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[ 7126.250647] LNet: Service thread pid 24811 was inactive for 62.22s. Watchdog stack traces are limited to 3 per 300 seconds, skipping this one.
[ 7146.199252] LNet: Service thread pid 22011 was inactive for 62.05s. Watchdog stack traces are limited to 3 per 300 seconds, skipping this one.
[ 7146.201583] LNet: Skipped 3 previous similar messages
[ 7146.202481] LustreError: dumping log to /tmp/lustre-log.1570648163.22011
[ 7149.016236] LNet: Service thread pid 24817 was inactive for 62.03s. Watchdog stack traces are limited to 3 per 300 seconds, skipping this one.
[ 7149.018478] LustreError: dumping log to /tmp/lustre-log.1570648166.24817
[ 7157.692329] LNetError: 2982:0:(peer.c:3451:lnet_peer_ni_add_to_recoveryq_locked()) lpni 10.9.5.191@tcp added to recovery queue. Health = 0
[ 7157.694496] LNetError: 2982:0:(peer.c:3451:lnet_peer_ni_add_to_recoveryq_locked()) Skipped 2 previous similar messages
[ 7158.698091] Lustre: 22011:0:(service.c:2165:ptlrpc_server_handle_request()) @@@ Request took longer than estimated (6:69s); client may timeout.  req@ffff968afb207050 x1646941249245040/t0(0) o9-&amp;gt;93068985-9bd0-151c-638d-51557c0cba70@10.9.5.186@tcp:467/0 lens 224/192 e 0 to 0 dl 1570648107 ref 1 fl Complete:/0/0 rc 0/0
[ 7158.698315] LNet: Service thread pid 20783 completed after 76.38s. This indicates the system was overloaded (too many service threads, or there were not enough hardware resources).
[ 7158.705670] Lustre: 22011:0:(service.c:2165:ptlrpc_server_handle_request()) Skipped 1 previous similar message
[ 7171.117593] LNet: Service thread pid 24813 completed after 108.07s. This indicates the system was overloaded (too many service threads, or there were not enough hardware resources).
[ 7171.121595] LNet: Skipped 1 previous similar message
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>failover testing</environment>
        <key id="57154">LU-12860</key>
            <summary>recovery-small test 55 fails with &apos;dd should be finished!&apos;</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="jamesanunez">James Nunez</reporter>
                        <labels>
                            <label>failover</label>
                    </labels>
                <created>Tue, 15 Oct 2019 03:23:27 +0000</created>
                <updated>Tue, 15 Oct 2019 14:49:37 +0000</updated>
                                            <version>Lustre 2.12.3</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="256394" author="adilger" created="Tue, 15 Oct 2019 04:05:53 +0000"  >&lt;p&gt;It looks like the stuck threads eventually make progress:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 7125.207282] LNet: Service thread pid 24813 was inactive for 62.16s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[ 7126.231240] LNet: Service thread pid 24810 was inactive for 62.20s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[ 7171.117593] LNet: Service thread pid 24813 completed after 108.07s. This indicates the system was overloaded (too many service threads, or there were not enough hardware resources).
[ 7126.231240] LNet: Service thread pid 24810 was inactive for 62.20s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[ 7126.234161] LNet: Skipped 2 previous similar messages
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;so it looks like this is not a permanent problem, just some kind of temporary slowdown?&lt;/p&gt;

&lt;p&gt;At this point it doesn&apos;t look like a fatal problem.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00o1b:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>