<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:02:09 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-13540] MDS &quot;lock on destroyed export&quot;, &quot;lock timed out&quot;, client hangs</title>
                <link>https://jira.whamcloud.com/browse/LU-13540</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;We&apos;ve been struggling for several weeks with recurring issues where &lt;em&gt;at a high turnover rates of files&lt;/em&gt; I get MDS hangs like the following:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;May  8 08:36:16 mds12 kernel: LustreError: 161992:0:(ldlm_lockd.c:256:expired_lock_main()) ### lock callback timer expired after 100s: evicting client at 172.17.2.4@o2ib  ns: mdt-carbofs2-MDT0001_UUID lock: ffff90d827671b00/0xf489323b9f7cfa63 lrc: 3/0,0 mode: PR/PR res: [0x24000b480:0xc5b6:0x0].0x0 bits 0x40/0x0 rrc: 229 type: IBT flags: 0x60200400000020 nid: 172.17.2.4@o2ib remote: 0x819f729a7001b5e5 expref: 74 pid: 162427 timeout: 1597115 lvb_type: 0
May  8 08:36:16 mds12 kernel: LustreError: 162425:0:(ldlm_lockd.c:1348:ldlm_handle_enqueue0()) ### lock on destroyed export ffff90d7f7331c00 ns: mdt-carbofs2-MDT0001_UUID lock: ffff90d827673cc0/0xf489323b9f7cfdf8 lrc: 3/0,0 mode: PW/PW res: [0x24000b480:0xc5b6:0x0].0x0 bits 0x40/0x0 rrc: 207 type: IBT flags: 0x50200400000020 nid: 172.17.2.2@o2ib remote: 0x3006f447d61f79eb expref: 26 pid: 162425 timeout: 0 lvb_type: 0
May  8 08:36:16 mds12 kernel: LustreError: 162425:0:(ldlm_lockd.c:1348:ldlm_handle_enqueue0()) Skipped 195 previous similar messages
May  8 08:36:16 mds12 kernel: Lustre: carbofs2-MDT0001: Connection restored to ab1e3886-7712-33ee-ea50-ce69a69a491d (at 172.17.2.4@o2ib)
May  8 08:36:16 mds12 kernel: Lustre: Skipped 454 previous similar messages
May  8 08:38:04 mds12 kernel: LustreError: 161992:0:(ldlm_lockd.c:256:expired_lock_main()) ### lock callback timer expired after 99s: evicting client at 172.17.1.221@o2ib  ns: mdt-carbofs2-MDT0001_UUID lock: ffff90d7f6fbee40/0xf489323b9f82b37e lrc: 3/0,0 mode: PR/PR res: [0x24000b480:0xc5d4:0x0].0x0 bits 0x40/0x0 rrc: 211 type: IBT flags: 0x60200400000020 nid: 172.17.1.221@o2ib remote: 0x85f23451b667ea0d expref: 138 pid: 162424 timeout: 1597223 lvb_type: 0
May  8 08:38:04 mds12 kernel: LustreError: 161992:0:(ldlm_lockd.c:256:expired_lock_main()) Skipped 8 previous similar messages
May  8 08:38:54 mds12 kernel: LustreError: 161992:0:(ldlm_lockd.c:256:expired_lock_main()) ### lock callback timer expired after 149s: evicting client at 172.17.2.5@o2ib  ns: mdt-carbofs2-MDT0001_UUID lock: ffff90d61bebe9c0/0xf489323b9f82b53e lrc: 3/0,0 mode: PR/PR res: [0x24000b480:0xc5ca:0x0].0x0 bits 0x40/0x0 rrc: 197 type: IBT flags: 0x60200400000020 nid: 172.17.2.5@o2ib remote: 0xe98766af8c44579e expref: 71 pid: 162423 timeout: 1597273 lvb_type: 0
May  8 08:38:54 mds12 kernel: LustreError: 162415:0:(service.c:2128:ptlrpc_server_handle_request()) @@@ Dropping timed-out request from 12345-172.17.1.221@o2ib: deadline 138:11s ago#012  req@ffff90ced5611f80 x1662838202338496/t0(0) o101-&amp;gt;454af10f-c871-de3c-e61f-fa5345d10e3a@172.17.1.221@o2ib:58/0 lens 480/0 e 0 to 0 dl 1588945123 ref 1 fl Interpret:/0/ffffffff rc 0/-1
May  8 08:38:54 mds12 kernel: LustreError: 180178:0:(ldlm_lockd.c:1348:ldlm_handle_enqueue0()) ### lock on destroyed export ffff90d819744000 ns: mdt-carbofs2-MDT0001_UUID lock: ffff90d276622ac0/0xf489323b9f82ba1c lrc: 3/0,0 mode: PR/PR res: [0x24000b480:0xc5ca:0x0].0x0 bits 0x40/0x0 rrc: 185 type: IBT flags: 0x50200400000020 nid: 172.17.2.5@o2ib remote: 0xe98766af8c4457ac expref: 15 pid: 180178 timeout: 0 lvb_type: 0
May  8 08:38:54 mds12 kernel: LustreError: 180178:0:(ldlm_lockd.c:1348:ldlm_handle_enqueue0()) Skipped 50 previous similar messages
May  8 08:38:54 mds12 kernel: Lustre: 162443:0:(service.c:2165:ptlrpc_server_handle_request()) @@@ Request took longer than estimated (138:11s); client may timeout.  req@ffff90ced5616300 x1662838202338560/t0(0) o101-&amp;gt;454af10f-c871-de3c-e61f-fa5345d10e3a@172.17.1.221@o2ib:58/0 lens 480/0 e 0 to 0 dl 1588945123 ref 1 fl Interpret:/0/ffffffff rc 0/-1
May  8 08:38:54 mds12 kernel: Lustre: 162443:0:(service.c:2165:ptlrpc_server_handle_request()) Skipped 1754 previous similar messages
May  8 08:38:54 mds12 kernel: Lustre: carbofs2-MDT0001: Connection restored to 454af10f-c871-de3c-e61f-fa5345d10e3a (at 172.17.1.221@o2ib)
May  8 08:38:54 mds12 kernel: LustreError: 162415:0:(service.c:2128:ptlrpc_server_handle_request()) Skipped 7 previous similar messages
May  8 08:39:34 mds12 kernel: Lustre: carbofs2-MDT0001: Client 454af10f-c871-de3c-e61f-fa5345d10e3a (at 172.17.1.221@o2ib) reconnecting
May  8 08:39:34 mds12 kernel: Lustre: Skipped 8 previous similar messages
May  8 08:39:34 mds12 kernel: Lustre: carbofs2-MDT0001: Connection restored to 454af10f-c871-de3c-e61f-fa5345d10e3a (at 172.17.1.221@o2ib)
May  8 08:39:34 mds12 kernel: Lustre: Skipped 4 previous similar messages
May  8 08:39:58 mds12 kernel: LNet: Service thread pid 180635 was inactive for 212.81s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
May  8 08:39:58 mds12 kernel: LNet: Skipped 2 previous similar messages
May  8 08:39:58 mds12 kernel: Pid: 180635, comm: mdt00_041 3.10.0-1062.9.1.el7_lustre.x86_64 #1 SMP Tue Feb 11 19:11:56 UTC 2020
May  8 08:39:58 mds12 kernel: Call Trace:
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc135ac90&amp;gt;] ldlm_completion_ast+0x430/0x860 [ptlrpc]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc135b7b1&amp;gt;] ldlm_cli_enqueue_local+0x231/0x830 [ptlrpc]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc17ad5cb&amp;gt;] mdt_object_local_lock+0x50b/0xb20 [mdt]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc17adc50&amp;gt;] mdt_object_lock_internal+0x70/0x360 [mdt]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc17adf60&amp;gt;] mdt_object_lock+0x20/0x30 [mdt]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc17ed0eb&amp;gt;] mdt_brw_enqueue+0x44b/0x760 [mdt]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc179b4bf&amp;gt;] mdt_intent_brw+0x1f/0x30 [mdt]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc17b3d35&amp;gt;] mdt_intent_policy+0x435/0xd80 [mdt]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc1341e06&amp;gt;] ldlm_lock_enqueue+0x356/0xa20 [ptlrpc]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc136a516&amp;gt;] ldlm_handle_enqueue0+0xa56/0x15f0 [ptlrpc]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc13f2bb2&amp;gt;] tgt_enqueue+0x62/0x210 [ptlrpc]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc13f99ca&amp;gt;] tgt_request_handle+0xada/0x1570 [ptlrpc]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc139e47b&amp;gt;] ptlrpc_server_handle_request+0x24b/0xab0 [ptlrpc]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffc13a1de4&amp;gt;] ptlrpc_main+0xb34/0x1470 [ptlrpc]
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffff928c61f1&amp;gt;] kthread+0xd1/0xe0
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffff92f8dd37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
May  8 08:39:58 mds12 kernel: [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
May  8 08:39:58 mds12 kernel: LustreError: dumping log to /tmp/lustre-log.1588945198.180635
May  8 08:39:59 mds12 kernel: Pid: 180647, comm: mdt00_053 3.10.0-1062.9.1.el7_lustre.x86_64 #1 SMP Tue Feb 11 19:11:56 UTC 2020
May  8 08:39:59 mds12 kernel: Call Trace:
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc135ac90&amp;gt;] ldlm_completion_ast+0x430/0x860 [ptlrpc]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc135b7b1&amp;gt;] ldlm_cli_enqueue_local+0x231/0x830 [ptlrpc]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc17ad5cb&amp;gt;] mdt_object_local_lock+0x50b/0xb20 [mdt]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc17adc50&amp;gt;] mdt_object_lock_internal+0x70/0x360 [mdt]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc17adf60&amp;gt;] mdt_object_lock+0x20/0x30 [mdt]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc17ed0eb&amp;gt;] mdt_brw_enqueue+0x44b/0x760 [mdt]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc179b4bf&amp;gt;] mdt_intent_brw+0x1f/0x30 [mdt]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc17b3d35&amp;gt;] mdt_intent_policy+0x435/0xd80 [mdt]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc1341e06&amp;gt;] ldlm_lock_enqueue+0x356/0xa20 [ptlrpc]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc136a516&amp;gt;] ldlm_handle_enqueue0+0xa56/0x15f0 [ptlrpc]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc13f2bb2&amp;gt;] tgt_enqueue+0x62/0x210 [ptlrpc]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc13f99ca&amp;gt;] tgt_request_handle+0xada/0x1570 [ptlrpc]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc139e47b&amp;gt;] ptlrpc_server_handle_request+0x24b/0xab0 [ptlrpc]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffc13a1de4&amp;gt;] ptlrpc_main+0xb34/0x1470 [ptlrpc]
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffff928c61f1&amp;gt;] kthread+0xd1/0xe0
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffff92f8dd37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
May  8 08:39:59 mds12 kernel: [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
May  8 08:39:59 mds12 kernel: Pid: 180247, comm: mdt00_035 3.10.0-1062.9.1.el7_lustre.x86_64 #1 SMP Tue Feb 11 19:11:56 UTC 2020
May  8 08:39:59 mds12 kernel: Call Trace:
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It&apos;s been frustratingly difficult to pinpoint a single cause and reproduce those hangs. The stacktrace led me to the following possibly related issues:&lt;/p&gt;

&lt;ul&gt;
	&lt;li&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12354&quot; title=&quot;MDT threads stuck at ldlm_completion_ast&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12354&quot;&gt;&lt;del&gt;LU-12354&lt;/del&gt;&lt;/a&gt;&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11359&quot; title=&quot;racer test 1 times out with client hung in dir_create.sh, ls, &#8230; and MDS in ldlm_completion_ast()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11359&quot;&gt;&lt;del&gt;LU-11359&lt;/del&gt;&lt;/a&gt;&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12935&quot; title=&quot;MDT deadlock on 2.12.3 with DoM; is it missing async_discard feature?&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12935&quot;&gt;&lt;del&gt;LU-12935&lt;/del&gt;&lt;/a&gt;&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12037&quot; title=&quot;Possible DNE issue leading to hung filesystem&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12037&quot;&gt;&lt;del&gt;LU-12037&lt;/del&gt;&lt;/a&gt;&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13331&quot; title=&quot;LNet: Service thread pid 23940 was inactive for 756.26s; slow/unusable filesystem&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13331&quot;&gt;LU-13331&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Thus, I&apos;ve tried:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;Increasing or decreasing &lt;tt&gt;mds.MDS.mdt.threads_max&lt;/tt&gt;.
	&lt;ul&gt;
		&lt;li&gt;Default was 120. I (perhaps unwisely) increased to 240, then set back to 120. Current value is 80.&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
	&lt;li&gt;Setting &lt;tt&gt;enable_remote_rename&lt;/tt&gt; to 0&lt;/li&gt;
	&lt;li&gt;Disabling DoM for new files.
	&lt;ul&gt;
		&lt;li&gt;DoM was originally active, at max 64KB on MDT.&lt;/li&gt;
		&lt;li&gt;I recently disabled it by &lt;tt&gt;lfs setstripe&lt;/tt&gt; on directories.&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
&lt;/ul&gt;



&lt;p&gt;There are &lt;em&gt;no&lt;/em&gt; lustre-log dumps on the first MDS &lt;tt&gt;mds11&lt;/tt&gt;, and the following on the second MDS &lt;tt&gt;mds12&lt;/tt&gt;.&lt;/p&gt;

&lt;p&gt;Counts of entry time stamps near the crash time are:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;1588930000 = 2020-05-08 04:26:40 CDT -0500
1588940000 = 2020-05-08 07:13:20 CDT -0500
1588950000 = 2020-05-08 10:00:00 CDT -0500
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# grep -E -c &apos;:15889[345]....\.&apos; /tmp/lustre-log.15889*[0-9].dump
/tmp/lustre-log.1588945198.180635.dump:168133
/tmp/lustre-log.1588945199.180195.dump:131
/tmp/lustre-log.1588945459.92545.dump:2283
/tmp/lustre-log.1588945461.162415.dump:31
/tmp/lustre-log.1588948374.168310.dump:293947
/tmp/lustre-log.1588948526.162419.dump:218
/tmp/lustre-log.1588949132.162453.dump:3906
/tmp/lustre-log.1588949280.92548.dump:7764
/tmp/lustre-log.1588949296.180233.dump:51
/tmp/lustre-log.1588949447.180235.dump:396
/tmp/lustre-log.1588949599.180642.dump:365
/tmp/lustre-log.1588949734.180631.dump:395
/tmp/lustre-log.1588949886.180651.dump:339
/tmp/lustre-log.1588950037.24710.dump:662
/tmp/lustre-log.1588950488.180208.dump:6239
/tmp/lustre-log.1588950643.162456.dump:4
/tmp/lustre-log.1588950795.180657.dump:243
/tmp/lustre-log.1588951549.180643.dump:1670
/tmp/lustre-log.1588952274.180185.dump:2231
/tmp/lustre-log.1588952278.168309.dump:12
/tmp/lustre-log.1588952282.180237.dump:26
/tmp/lustre-log.1588952425.162437.dump:788
/tmp/lustre-log.1588952433.24712.dump:20
/tmp/lustre-log.1588952581.180200.dump:384
/tmp/lustre-log.1588952585.162224.dump:14
/tmp/lustre-log.1588952737.180196.dump:478
/tmp/lustre-log.1588952909.180649.dump:1548
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;p&gt;Excerpt from near the crash time, around first error:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# grep -E &apos;:15889[345]....\.&apos; /tmp/lustre-log.15889*[0-9].dump | sort -t: -k5,7n
...
/tmp/lustre-log.1588945198.180635.dump:00000004:00080000:3.0:1588944970.333403:0:122652:0:(osp_object.c:1592:osp_create()) carbofs2-OST0005-osc-MDT0001: Wrote last used FID: [0x380000400:0xf75cd:0x0], index 5: 0
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944970.334436:0:161976:0:(tgt_lastrcvd.c:902:tgt_cb_last_committed()) carbofs2-MDT0001: transno 193291912093 is committed
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:3.0:1588944970.334684:0:122652:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 193291912094
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:3.0:1588944970.335904:0:122652:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 193291912095
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:7.0:1588944970.375259:0:168303:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 197573715030
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:7.0:1588944971.150971:0:168303:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 197573715038
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944971.151852:0:161974:0:(tgt_lastrcvd.c:902:tgt_cb_last_committed()) sandbox2-MDT0001: transno 197573715038 is committed
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944971.151861:0:161974:0:(tgt_lastrcvd.c:902:tgt_cb_last_committed()) sandbox2-MDT0001: transno 197573715033 is committed
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944971.151867:0:161974:0:(tgt_lastrcvd.c:902:tgt_cb_last_committed()) sandbox2-MDT0001: transno 197573715026 is committed
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:7.0:1588944971.966343:0:92555:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 193291912098
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:7.0:1588944971.968021:0:92555:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 193291912100
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:7.0:1588944971.969193:0:168303:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 193291912102
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:7.0:1588944972.404425:0:168303:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 197573715052
/tmp/lustre-log.1588945198.180635.dump:00000004:00080000:7.0:1588944972.955662:0:92555:0:(osp_object.c:1592:osp_create()) carbofs2-OST0008-osc-MDT0001: Wrote last used FID: [0x480000400:0xf780d:0x0], index 8: 0
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944972.957507:0:92555:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 193291912104
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944972.958735:0:92555:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 193291912105
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:7.0:1588944974.376516:0:168303:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 197573715066
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944974.595200:0:92555:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 193291912108
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944974.596092:0:92555:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 193291912110
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:7.0:1588944974.597258:0:168303:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 193291912112
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:7.0:1588944975.133486:0:168303:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 197573715072
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944975.486761:0:161976:0:(tgt_lastrcvd.c:902:tgt_cb_last_committed()) carbofs2-MDT0001: transno 193291912113 is committed
/tmp/lustre-log.1588945198.180635.dump:00000004:00080000:0.0:1588944975.565523:0:92555:0:(osp_object.c:1592:osp_create()) carbofs2-OST0002-osc-MDT0001: Wrote last used FID: [0x340000400:0xf75cd:0x0], index 2: 0
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:1.0:1588944975.567362:0:92555:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 193291912115
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:1.0:1588944975.568690:0:92555:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 193291912116
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944976.478896:0:161974:0:(tgt_lastrcvd.c:902:tgt_cb_last_committed()) sandbox2-MDT0001: transno 197573715077 is committed
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944976.478911:0:161974:0:(tgt_lastrcvd.c:902:tgt_cb_last_committed()) sandbox2-MDT0001: transno 197573715064 is committed
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944976.478915:0:161974:0:(tgt_lastrcvd.c:902:tgt_cb_last_committed()) sandbox2-MDT0001: transno 197573715062 is committed
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944976.478920:0:161974:0:(tgt_lastrcvd.c:902:tgt_cb_last_committed()) sandbox2-MDT0001: transno 197573715057 is committed
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944976.478922:0:161974:0:(tgt_lastrcvd.c:902:tgt_cb_last_committed()) sandbox2-MDT0001: transno 197573715055 is committed
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:0.0:1588944976.478933:0:161974:0:(tgt_lastrcvd.c:902:tgt_cb_last_committed()) sandbox2-MDT0001: transno 197573715042 is committed
/tmp/lustre-log.1588945198.180635.dump:00000001:00080000:7.0:1588944976.494952:0:168303:0:(tgt_lastrcvd.c:1873:tgt_txn_stop_cb()) More than one transaction 197573715078
/tmp/lustre-log.1588945198.180635.dump:00010000:00020000:8.0:1588944976.790931:0:161992:0:(ldlm_lockd.c:256:expired_lock_main()) ### lock callback timer expired after 100s: evicting client at 172.17.2.4@o2ib  ns: mdt-carbofs2-MDT0001_UUID lock: ffff90d827671b00/0xf489323b9f7cfa63 lr
c: 3/0,0 mode: PR/PR res: [0x24000b480:0xc5b6:0x0].0x0 bits 0x40/0x0 rrc: 229 type: IBT flags: 0x60200400000020 nid: 172.17.2.4@o2ib remote: 0x819f729a7001b5e5 expref: 74 pid: 162427 timeout: 1597115 lvb_type: 0
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795299:0:161992:0:(genops.c:1662:class_fail_export()) disconnecting export ffff90d80e74e400/89c3daaf-89ab-87e8-0414-0b4c5a62ec8e
/tmp/lustre-log.1588945198.180635.dump:00000020:00000080:8.0:1588944976.795310:0:161992:0:(genops.c:1501:class_disconnect()) disconnect: cookie 0xf489323b396e171f
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795456:0:161992:0:(genops.c:1679:class_fail_export()) disconnected export ffff90d80e74e400/89c3daaf-89ab-87e8-0414-0b4c5a62ec8e
/tmp/lustre-log.1588945198.180635.dump:00010000:00020000:8.0:1588944976.795466:0:161992:0:(ldlm_lockd.c:256:expired_lock_main()) ### lock callback timer expired after 100s: evicting client at 172.17.2.5@o2ib  ns: mdt-carbofs2-MDT0001_UUID lock: ffff90d5dfcf9d40/0xf489323b9f7cfb35 lrc: 3/0,0 mode: PR/PR res: [0x24000b480:0xc5b6:0x0].0x0 bits 0x40/0x0 rrc: 225 type: IBT flags: 0x60200400000020 nid: 172.17.2.5@o2ib remote: 0xe98766af8c442ce1 expref: 396 pid: 180230 timeout: 1597115 lvb_type: 0
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795489:0:161992:0:(genops.c:1662:class_fail_export()) disconnecting export ffff90d7f7330c00/7685d096-da00-7900-01cd-d51973277575
/tmp/lustre-log.1588945198.180635.dump:00000020:00000080:8.0:1588944976.795492:0:161992:0:(genops.c:1501:class_disconnect()) disconnect: cookie 0xf489323b396e1589
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795644:0:161992:0:(genops.c:1679:class_fail_export()) disconnected export ffff90d7f7330c00/7685d096-da00-7900-01cd-d51973277575
/tmp/lustre-log.1588945198.180635.dump:00010000:00020000:8.0:1588944976.795652:0:161992:0:(ldlm_lockd.c:256:expired_lock_main()) ### lock callback timer expired after 100s: evicting client at 172.17.1.227@o2ib  ns: mdt-carbofs2-MDT0001_UUID lock: ffff90cfe0a10fc0/0xf489323b9f7cfb58 lrc: 3/0,0 mode: PR/PR res: [0x24000b480:0xc5b6:0x0].0x0 bits 0x40/0x0 rrc: 220 type: IBT flags: 0x60200400000020 nid: 172.17.1.227@o2ib remote: 0xf0c3059052dfd0f8 expref: 423 pid: 162415 timeout: 1597115 lvb_type: 0
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795663:0:161992:0:(genops.c:1662:class_fail_export()) disconnecting export ffff90d3dd5f4400/e294594d-e357-7d83-07c6-94cee77191e2
/tmp/lustre-log.1588945198.180635.dump:00000020:00000080:8.0:1588944976.795666:0:161992:0:(genops.c:1501:class_disconnect()) disconnect: cookie 0xf489323b396e144e
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795715:0:161992:0:(genops.c:1679:class_fail_export()) disconnected export ffff90d3dd5f4400/e294594d-e357-7d83-07c6-94cee77191e2
/tmp/lustre-log.1588945198.180635.dump:00010000:00020000:8.0:1588944976.795722:0:161992:0:(ldlm_lockd.c:256:expired_lock_main()) ### lock callback timer expired after 100s: evicting client at 172.17.1.227@o2ib  ns: mdt-carbofs2-MDT0001_UUID lock: ffff90ce9bb90000/0xf489323b9f7cfb66 lrc: 3/0,0 mode: PR/PR res: [0x24000b480:0xc5b6:0x0].0x0 bits 0x40/0x0 rrc: 220 type: IBT flags: 0x60200400000020 nid: 172.17.1.227@o2ib remote: 0xf0c3059052dfd0f1 expref: 420 pid: 162417 timeout: 1597115 lvb_type: 0
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795732:0:161992:0:(genops.c:1657:class_fail_export()) disconnecting dead export ffff90d3dd5f4400/e294594d-e357-7d83-07c6-94cee77191e2; skipping
/tmp/lustre-log.1588945198.180635.dump:00010000:00020000:8.0:1588944976.795738:0:161992:0:(ldlm_lockd.c:256:expired_lock_main()) ### lock callback timer expired after 100s: evicting client at 172.17.1.227@o2ib  ns: mdt-carbofs2-MDT0001_UUID lock: ffff90cd4ae01b00/0xf489323b9f7cfba5 lrc: 3/0,0 mode: PR/PR res: [0x24000b480:0xc5b6:0x0].0x0 bits 0x40/0x0 rrc: 220 type: IBT flags: 0x60200400000020 nid: 172.17.1.227@o2ib remote: 0xf0c3059052dfd0ff expref: 420 pid: 162220 timeout: 1597115 lvb_type: 0
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795748:0:161992:0:(genops.c:1657:class_fail_export()) disconnecting dead export ffff90d3dd5f4400/e294594d-e357-7d83-07c6-94cee77191e2; skipping
/tmp/lustre-log.1588945198.180635.dump:00010000:00020000:8.0:1588944976.795754:0:161992:0:(ldlm_lockd.c:256:expired_lock_main()) ### lock callback timer expired after 100s: evicting client at 172.17.2.2@o2ib  ns: mdt-carbofs2-MDT0001_UUID lock: ffff90d5dfcfec00/0xf489323b9f7cfc70 lrc: 3/0,0 mode: PR/PR res: [0x24000b480:0xc5b6:0x0].0x0 bits 0x40/0x0 rrc: 220 type: IBT flags: 0x60200400000020 nid: 172.17.2.2@o2ib remote: 0x3006f447d61f79e4 expref: 78 pid: 180225 timeout: 1597115 lvb_type: 0
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795765:0:161992:0:(genops.c:1662:class_fail_export()) disconnecting export ffff90d7f7331c00/644262f6-0235-7572-f194-d4689a69670b
/tmp/lustre-log.1588945198.180635.dump:00000020:00000080:8.0:1588944976.795767:0:161992:0:(genops.c:1501:class_disconnect()) disconnect: cookie 0xf489323b396e15cf
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795814:0:161992:0:(genops.c:1679:class_fail_export()) disconnected export ffff90d7f7331c00/644262f6-0235-7572-f194-d4689a69670b
/tmp/lustre-log.1588945198.180635.dump:00010000:00020000:8.0:1588944976.795822:0:161992:0:(ldlm_lockd.c:256:expired_lock_main()) ### lock callback timer expired after 100s: evicting client at 172.17.2.10@o2ib  ns: mdt-carbofs2-MDT0001_UUID lock: ffff90d5cee0ba80/0xf489323b9f7cf77d lrc: 3/0,0 mode: PR/PR res: [0x24000b480:0xc5c0:0x0].0x0 bits 0x40/0x0 rrc: 204 type: IBT flags: 0x60200400000020 nid: 172.17.2.10@o2ib remote: 0x70316307a2048b75 expref: 66 pid: 180231 timeout: 1597115 lvb_type: 0
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795861:0:161992:0:(genops.c:1662:class_fail_export()) disconnecting export ffff90d80cbae400/629b4428-94ac-83ba-f61c-69aa684bee44
/tmp/lustre-log.1588945198.180635.dump:00000020:00000080:8.0:1588944976.795865:0:161992:0:(genops.c:1501:class_disconnect()) disconnect: cookie 0xf489323b396e1703
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795920:0:161992:0:(genops.c:1679:class_fail_export()) disconnected export ffff90d80cbae400/629b4428-94ac-83ba-f61c-69aa684bee44
/tmp/lustre-log.1588945198.180635.dump:00010000:00020000:8.0:1588944976.795926:0:161992:0:(ldlm_lockd.c:256:expired_lock_main()) ### lock callback timer expired after 100s: evicting client at 172.17.2.10@o2ib  ns: mdt-carbofs2-MDT0001_UUID lock: ffff90d51f3cb180/0xf489323b9f7cf78b lrc: 3/0,0 mode: PR/PR res: [0x24000b480:0xc5c0:0x0].0x0 bits 0x40/0x0 rrc: 204 type: IBT flags: 0x60200400000020 nid: 172.17.2.10@o2ib remote: 0x70316307a2048b7c expref: 63 pid: 162423 timeout: 1597115 lvb_type: 0
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795936:0:161992:0:(genops.c:1657:class_fail_export()) disconnecting dead export ffff90d80cbae400/629b4428-94ac-83ba-f61c-69aa684bee44; skipping
/tmp/lustre-log.1588945198.180635.dump:00010000:00020000:8.0:1588944976.795943:0:161992:0:(ldlm_lockd.c:256:expired_lock_main()) ### lock callback timer expired after 100s: evicting client at 172.17.1.224@o2ib  ns: mdt-carbofs2-MDT0001_UUID lock: ffff90d36f8bad00/0xf489323b9f7cf784 lrc: 3/0,0 mode: PR/PR res: [0x24000b480:0xc5c0:0x0].0x0 bits 0x40/0x0 rrc: 204 type: IBT flags: 0x60200400000020 nid: 172.17.1.224@o2ib remote: 0xed5de96130f26ae expref: 108 pid: 180199 timeout: 1597115 lvb_type: 0
/tmp/lustre-log.1588945198.180635.dump:00000020:00080000:8.0:1588944976.795952:0:161992:0:(genops.c:1662:class_fail_export()) disconnecting export ffff90d7f7334000/30180198-3c05-91ca-6f14-1fc4bc7a164e
...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>OS: CentOS 7.7.1908&lt;br/&gt;
Kernel: 3.10.0-1062.9.1.el7_lustre.x86_64&lt;br/&gt;
lustre version: lustre-2.12.4-1.el7.src.rpm -&amp;#xA0;server and clients&lt;br/&gt;
heterogeneous IB network: servers on IB-EDR, clients on IB-{DDR,QDR,FDR40,FDR56,EDR}&lt;br/&gt;
&lt;br/&gt;
*&amp;#xA0;2 MDS (as 1 HA pair), 6 OSS (as 3 HA pairs), serving 2 Lustre file systems: carbofs2, sandbox2.&lt;br/&gt;
* approx. 220 clients</environment>
        <key id="59122">LU-13540</key>
            <summary>MDS &quot;lock on destroyed export&quot;, &quot;lock timed out&quot;, client hangs</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="stern">Michael Sternberg</reporter>
                        <labels>
                    </labels>
                <created>Fri, 8 May 2020 21:40:19 +0000</created>
                <updated>Wed, 2 Feb 2022 07:28:42 +0000</updated>
                            <resolved>Wed, 2 Feb 2022 07:28:42 +0000</resolved>
                                    <version>Lustre 2.12.4</version>
                                                        <due></due>
                            <votes>1</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="269726" author="stern" created="Fri, 8 May 2020 21:46:52 +0000"  >&lt;p&gt;PS: Similar to several of the LU issues referenced, stopping and restarting the MDT mounts will usually &quot;unwedge&quot; the file system.&lt;/p&gt;</comment>
                            <comment id="324852" author="green" created="Wed, 2 Feb 2022 07:28:42 +0000"  >&lt;p&gt;this is a dup of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14522&quot; title=&quot;missing ldlm lock processing causes timeouts in racer&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14522&quot;&gt;&lt;del&gt;LU-14522&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="34876" name="lustre-log.1588945198.180635.dump" size="37467714" author="stern" created="Fri, 8 May 2020 21:52:01 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_10030" key="com.atlassian.jira.plugin.system.customfieldtypes:labels">
                        <customfieldname>Epic/Theme</customfieldname>
                        <customfieldvalues>
                                        <label>Lustre-2.12</label>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00zzb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>