<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:56:28 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-6015] mds evicting clients - lock timed out?</title>
                <link>https://jira.whamcloud.com/browse/LU-6015</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;We have seen this issue twice in the last week as the load on one of our Lustre filesystems has increased. Long story short - we have 4 Lustre clients exporting the filesystem over NFS (round-robin). The MDS seems to get snagged on a lock and then the clients take it in turn to drop off which sends IO errors to the NFS clients.&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Dec 10 16:00:02 cmds1 kernel: LNet: Service thread pid 22181 was inactive &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; 200.00s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; debugging purposes:
Dec 10 16:00:02 cmds1 kernel: Pid: 22181, comm: mdt00_026
Dec 10 16:00:02 cmds1 kernel: 
Dec 10 16:00:02 cmds1 kernel: Call Trace:
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffff8150f362&amp;gt;] schedule_timeout+0x192/0x2e0
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffff810811e0&amp;gt;] ? process_timeout+0x0/0x10
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa03fe6d1&amp;gt;] cfs_waitq_timedwait+0x11/0x20 [libcfs]
Dec 10 16:00:02 cmds1 kernel: Lustre: lock timed out (enqueued at 1418227002, 200s ago)
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa06a601d&amp;gt;] ldlm_completion_ast+0x4ed/0x960 [ptlrpc]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa06a1790&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x390 [ptlrpc]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffff81063410&amp;gt;] ? default_wake_function+0x0/0x20
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa06a5758&amp;gt;] ldlm_cli_enqueue_local+0x1f8/0x5d0 [ptlrpc]
Dec 10 16:00:02 cmds1 kernel: LustreError: dumping log to /tmp/lustre-log.1418227202.22181
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa06a5b30&amp;gt;] ? ldlm_completion_ast+0x0/0x960 [ptlrpc]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa0d93a90&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa0d99c7b&amp;gt;] mdt_object_lock0+0x33b/0xaf0 [mdt]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa0d93a90&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa06a5b30&amp;gt;] ? ldlm_completion_ast+0x0/0x960 [ptlrpc]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa0d9a4f4&amp;gt;] mdt_object_lock+0x14/0x20 [mdt]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa0dc24f8&amp;gt;] mdt_object_open_lock+0x1c8/0x510 [mdt]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa0da6bfa&amp;gt;] ? mdt_attr_get_complex+0x38a/0x770 [mdt]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa0dc76b3&amp;gt;] mdt_open_by_fid_lock+0x443/0x7d0 [mdt]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa0dc829b&amp;gt;] mdt_reint_open+0x56b/0x20c0 [mdt]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa041a82e&amp;gt;] ? upcall_cache_get_entry+0x28e/0x860 [libcfs]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa06cedcc&amp;gt;] ? lustre_msg_add_version+0x6c/0xc0 [ptlrpc]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa0565f50&amp;gt;] ? lu_ucred+0x20/0x30 [obdclass]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa0d93015&amp;gt;] ? mdt_ucred+0x15/0x20 [mdt]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa0daf15c&amp;gt;] ? mdt_root_squash+0x2c/0x410 [mdt]
Dec 10 16:00:02 cmds1 kernel: [&amp;lt;ffffffffa06f6646&amp;gt;] ? __req_capsule_get+0x166/0x700 [ptlrpc]
..
..
Dec 10 16:07:58 cmds1 kernel: 
Dec 10 16:07:58 cmds1 kernel: LustreError: dumping log to /tmp/lustre-log.1418227678.7488
Dec 10 16:11:13 cmds1 kernel: Lustre: 22178:0:(service.c:1339:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/-271), not sending early reply
Dec 10 16:11:13 cmds1 kernel:  req@ffff882fd6dfd000 x1486501717804232/t0(0) o101-&amp;gt;4540220b-aff6-ec4a-3862-59a2488b6775@10.21.22.27@tcp:0/0 lens 576/3448 e 5 to 0 dl 1418227878 ref 2 fl Interpret:/0/0 rc 0/0
Dec 10 16:11:44 cmds1 kernel: Lustre: 9532:0:(service.c:1339:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/-271), not sending early reply
Dec 10 16:11:44 cmds1 kernel:  req@ffff88427dd19400 x1486495779658676/t0(0) o101-&amp;gt;dce9c151-cc13-2ac2-7911-8ded672c09d6@10.21.22.29@tcp:0/0 lens 608/3448 e 5 to 0 dl 1418227909 ref 2 fl Interpret:/0/0 rc 0/0
Dec 10 16:12:02 cmds1 kernel: Lustre: 7487:0:(service.c:1339:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/-260), not sending early reply
Dec 10 16:12:02 cmds1 kernel:  req@ffff88026471d800 x1486501720068016/t0(0) o101-&amp;gt;4540220b-aff6-ec4a-3862-59a2488b6775@10.21.22.27@tcp:0/0 lens 576/3448 e 3 to 0 dl 1418227927 ref 2 fl Interpret:/0/0 rc 0/0
Dec 10 16:12:34 cmds1 kernel: Lustre: 6826:0:(service.c:1339:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/-134), not sending early reply
Dec 10 16:12:34 cmds1 kernel:  req@ffff8822e2c0d000 x1486501725457272/t0(0) o101-&amp;gt;4540220b-aff6-ec4a-3862-59a2488b6775@10.21.22.27@tcp:0/0 lens 608/3448 e 1 to 0 dl 1418227959 ref 2 fl Interpret:/0/0 rc 0/0
Dec 10 16:13:17 cmds1 kernel: Lustre: charlie-MDT0000: Client 4540220b-aff6-ec4a-3862-59a2488b6775 (at 10.21.22.27@tcp) reconnecting
Dec 10 16:13:17 cmds1 kernel: Lustre: charlie-MDT0000: Client 4540220b-aff6-ec4a-3862-59a2488b6775 (at 10.21.22.27@tcp) refused reconnection, still busy with 6 active RPCs
Dec 10 16:13:42 cmds1 kernel: Lustre: charlie-MDT0000: Client 4540220b-aff6-ec4a-3862-59a2488b6775 (at 10.21.22.27@tcp) reconnecting
Dec 10 16:13:42 cmds1 kernel: Lustre: charlie-MDT0000: Client 4540220b-aff6-ec4a-3862-59a2488b6775 (at 10.21.22.27@tcp) refused reconnection, still busy with 3 active RPCs
Dec 10 16:13:48 cmds1 kernel: Lustre: charlie-MDT0000: Client dce9c151-cc13-2ac2-7911-8ded672c09d6 (at 10.21.22.29@tcp) reconnecting
Dec 10 16:13:48 cmds1 kernel: Lustre: charlie-MDT0000: Client dce9c151-cc13-2ac2-7911-8ded672c09d6 (at 10.21.22.29@tcp) refused reconnection, still busy with 2 active RPCs
Dec 10 16:13:53 cmds1 kernel: Lustre: 22166:0:(service.c:1339:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/-431), not sending early reply
Dec 10 16:13:53 cmds1 kernel:  req@ffff882fe3e6a000 x1486492815968152/t0(0) o101-&amp;gt;b9fd8649-3e4d-fb11-ba42-c2526b8f651f@10.21.22.28@tcp:0/0 lens 576/1152 e 4 to 0 dl 1418228038 ref 2 fl Interpret:/0/0 rc 0/0
Dec 10 16:14:07 cmds1 kernel: Lustre: charlie-MDT0000: Client 4540220b-aff6-ec4a-3862-59a2488b6775 (at 10.21.22.27@tcp) reconnecting
Dec 10 16:14:07 cmds1 kernel: Lustre: charlie-MDT0000: Client 4540220b-aff6-ec4a-3862-59a2488b6775 (at 10.21.22.27@tcp) refused reconnection, still busy with 3 active RPCs
Dec 10 16:14:13 cmds1 kernel: Lustre: charlie-MDT0000: Client dce9c151-cc13-2ac2-7911-8ded672c09d6 (at 10.21.22.29@tcp) reconnecting
Dec 10 16:14:13 cmds1 kernel: Lustre: charlie-MDT0000: Client dce9c151-cc13-2ac2-7911-8ded672c09d6 (at 10.21.22.29@tcp) refused reconnection, still busy with 2 active RPCs
Dec 10 16:14:32 cmds1 kernel: Lustre: charlie-MDT0000: Client 4540220b-aff6-ec4a-3862-59a2488b6775 (at 10.21.22.27@tcp) reconnecting
Dec 10 16:14:32 cmds1 kernel: Lustre: charlie-MDT0000: Client 4540220b-aff6-ec4a-3862-59a2488b6775 (at 10.21.22.27@tcp) refused reconnection, still busy with 3 active RPCs
Dec 10 16:14:57 cmds1 kernel: Lustre: charlie-MDT0000: Client 4540220b-aff6-ec4a-3862-59a2488b6775 (at 10.21.22.27@tcp) reconnecting
Dec 10 16:14:57 cmds1 kernel: Lustre: Skipped 1 previous similar message
Dec 10 16:14:57 cmds1 kernel: Lustre: charlie-MDT0000: Client 4540220b-aff6-ec4a-3862-59a2488b6775 (at 10.21.22.27@tcp) refused reconnection, still busy with 3 active RPCs
Dec 10 16:14:57 cmds1 kernel: Lustre: Skipped 1 previous similar message
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Both times, the only way to get back to a stable state was to reboot the mds and abort recovery. Logs attached.&lt;/p&gt;</description>
                <environment>EL6</environment>
        <key id="27867">LU-6015</key>
            <summary>mds evicting clients - lock timed out?</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="laisiyao">Lai Siyao</assignee>
                                    <reporter username="daire">Daire Byrne</reporter>
                        <labels>
                    </labels>
                <created>Wed, 10 Dec 2014 18:49:46 +0000</created>
                <updated>Wed, 7 Jan 2015 13:08:43 +0000</updated>
                            <resolved>Wed, 7 Jan 2015 13:08:43 +0000</resolved>
                                    <version>Lustre 2.4.1</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="101248" author="pjones" created="Wed, 10 Dec 2014 21:56:01 +0000"  >&lt;p&gt;Lai&lt;/p&gt;

&lt;p&gt;Could you please advise on this issue?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="101293" author="laisiyao" created="Thu, 11 Dec 2014 09:12:58 +0000"  >&lt;p&gt;This looks to be that MDS enqueued a local lock, but timed out, in this case, it will sleep wait for it to become available. Normally the conflicting lock against this local lock should be released later (if other clients don&apos;t cooperate, MDS will evict them to achieve this). So there should be something go wrong, and cause deadlock. To make out the cause of this deadlock, I need to know who holds it, could you `echo t &amp;gt; /proc/sysrq-trigger` on MDS to collect all process backtraces when you see this error next time?&lt;/p&gt;</comment>
                            <comment id="101424" author="daire" created="Fri, 12 Dec 2014 11:05:24 +0000"  >&lt;p&gt;Okay we will do that. I was hoping that the lustre dumps to /tmp may have contained enough debug info to figure out what was going on but apparently not.&lt;/p&gt;</comment>
                            <comment id="101500" author="daire" created="Fri, 12 Dec 2014 19:13:11 +0000"  >&lt;p&gt;This just happened again (so only two days between occurrences now). I&apos;m attaching the messages file after doing a sysrq. I then had to reboot the server to get things stable.&lt;/p&gt;</comment>
                            <comment id="101693" author="daire" created="Tue, 16 Dec 2014 12:58:28 +0000"  >&lt;p&gt;Are there any updates on this? Was the sysrq info provided useful? Can we provide any more information?&lt;/p&gt;

&lt;p&gt;It may or may not be useful but when we saw this issue for the first time 2 weeks ago, it was right after changing the network card hardware in 2/4 of the clients/NFS exporters. The process of bringing the clients back online caused the MDS to fall over. All 4 of the clients and the MDS have been rebooted a few times since then. Since then we seem to be able to hit it frequently under high load. And because the clients are not recovering, long running simulations are getting IO errors resulting in lots of lost render time.&lt;/p&gt;</comment>
                            <comment id="101700" author="niu" created="Tue, 16 Dec 2014 14:45:12 +0000"  >&lt;p&gt;From the full trace looks like that&apos;s a deadlock, should be dup of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4152&quot; title=&quot; layout locks can cause deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4152&quot;&gt;&lt;del&gt;LU-4152&lt;/del&gt;&lt;/a&gt;:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Dec 12 18:55:07 cmds1 kernel: mdt05_032     S 000000000000000b     0 15079      2 0x00000000
Dec 12 18:55:07 cmds1 kernel: ffff88500a907770 0000000000000046 0000000000000000 ffff88500a9077c0
Dec 12 18:55:07 cmds1 kernel: 0000000000000001 ffff885d74c78040 ffff88500a907740 ffffffffa03b92d1
Dec 12 18:55:07 cmds1 kernel: ffff885d74c785f8 ffff88500a907fd8 000000000000fb88 ffff885d74c785f8
Dec 12 18:55:07 cmds1 kernel: Call Trace:
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa03b92d1&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa03a96fe&amp;gt;] cfs_waitq_wait+0xe/0x10 [libcfs]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06810aa&amp;gt;] ldlm_completion_ast+0x57a/0x960 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa067c790&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x390 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffff81063410&amp;gt;] ? default_wake_function+0x0/0x20
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0680758&amp;gt;] ldlm_cli_enqueue_local+0x1f8/0x5d0 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0680b30&amp;gt;] ? ldlm_completion_ast+0x0/0x960 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d84a90&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d8ac7b&amp;gt;] mdt_object_lock0+0x33b/0xaf0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d84a90&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0680b30&amp;gt;] ? ldlm_completion_ast+0x0/0x960 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d8b4f4&amp;gt;] mdt_object_lock+0x14/0x20 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0db34f8&amp;gt;] mdt_object_open_lock+0x1c8/0x510 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d97bfa&amp;gt;] ? mdt_attr_get_complex+0x38a/0x770 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0db86b3&amp;gt;] mdt_open_by_fid_lock+0x443/0x7d0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0db929b&amp;gt;] mdt_reint_open+0x56b/0x20c0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa03c582e&amp;gt;] ? upcall_cache_get_entry+0x28e/0x860 [libcfs]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06a9dcc&amp;gt;] ? lustre_msg_add_version+0x6c/0xc0 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0540f50&amp;gt;] ? lu_ucred+0x20/0x30 [obdclass]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d84015&amp;gt;] ? mdt_ucred+0x15/0x20 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0da015c&amp;gt;] ? mdt_root_squash+0x2c/0x410 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06d1646&amp;gt;] ? __req_capsule_get+0x166/0x700 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0540f50&amp;gt;] ? lu_ucred+0x20/0x30 [obdclass]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0da4911&amp;gt;] mdt_reint_rec+0x41/0xe0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d89ae3&amp;gt;] mdt_reint_internal+0x4c3/0x780 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d8a06d&amp;gt;] mdt_intent_reint+0x1ed/0x520 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d87f1e&amp;gt;] mdt_intent_policy+0x39e/0x720 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0661831&amp;gt;] ldlm_lock_enqueue+0x361/0x8d0 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06881ef&amp;gt;] ldlm_handle_enqueue0+0x4ef/0x10b0 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d883a6&amp;gt;] mdt_enqueue+0x46/0xe0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d8ea97&amp;gt;] mdt_handle_common+0x647/0x16d0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06aabac&amp;gt;] ? lustre_msg_get_transno+0x8c/0x100 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0dc83f5&amp;gt;] mds_regular_handle+0x15/0x20 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06ba3c8&amp;gt;] ptlrpc_server_handle_request+0x398/0xc60 [ptlrpc]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Dec 12 18:55:07 cmds1 kernel: mdt03_012     S 000000000000002c     0  7511      2 0x00000000
Dec 12 18:55:07 cmds1 kernel: ffff882f6da2f8f0 0000000000000046 00000000548b3476 ffff882f6da2f940
Dec 12 18:55:07 cmds1 kernel: 0000000000000001 ffff882f8e4f8aa0 ffff882f6da2f8c0 ffffffffa03b92d1
Dec 12 18:55:07 cmds1 kernel: ffff882f8e4f9058 ffff882f6da2ffd8 000000000000fb88 ffff882f8e4f9058
Dec 12 18:55:07 cmds1 kernel: Call Trace:
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa03b92d1&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa03a96fe&amp;gt;] cfs_waitq_wait+0xe/0x10 [libcfs]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06810aa&amp;gt;] ldlm_completion_ast+0x57a/0x960 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa067c790&amp;gt;] ? ldlm_expired_completion_wait+0x0/0x390 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffff81063410&amp;gt;] ? default_wake_function+0x0/0x20
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0680758&amp;gt;] ldlm_cli_enqueue_local+0x1f8/0x5d0 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0680b30&amp;gt;] ? ldlm_completion_ast+0x0/0x960 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d84a90&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d84a90&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d8ac7b&amp;gt;] mdt_object_lock0+0x33b/0xaf0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d84a90&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0680b30&amp;gt;] ? ldlm_completion_ast+0x0/0x960 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d8b4f4&amp;gt;] mdt_object_lock+0x14/0x20 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d9a5a9&amp;gt;] mdt_getattr_name_lock+0xe19/0x1980 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06a9135&amp;gt;] ? lustre_msg_buf+0x55/0x60 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06d1646&amp;gt;] ? __req_capsule_get+0x166/0x700 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06ab3c4&amp;gt;] ? lustre_msg_get_flags+0x34/0xb0 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d9b3ad&amp;gt;] mdt_intent_getattr+0x29d/0x490 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d87f1e&amp;gt;] mdt_intent_policy+0x39e/0x720 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0661831&amp;gt;] ldlm_lock_enqueue+0x361/0x8d0 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06881ef&amp;gt;] ldlm_handle_enqueue0+0x4ef/0x10b0 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d883a6&amp;gt;] mdt_enqueue+0x46/0xe0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0d8ea97&amp;gt;] mdt_handle_common+0x647/0x16d0 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06aabac&amp;gt;] ? lustre_msg_get_transno+0x8c/0x100 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa0dc83f5&amp;gt;] mds_regular_handle+0x15/0x20 [mdt]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06ba3c8&amp;gt;] ptlrpc_server_handle_request+0x398/0xc60 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa03a95de&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa03bad9f&amp;gt;] ? lc_watchdog_touch+0x6f/0x170 [libcfs]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06b1729&amp;gt;] ? ptlrpc_wait_event+0xa9/0x290 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffff81055ad3&amp;gt;] ? __wake_up+0x53/0x70
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06bb75e&amp;gt;] ptlrpc_main+0xace/0x1700 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06bac90&amp;gt;] ? ptlrpc_main+0x0/0x1700 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffff8100c0ca&amp;gt;] child_rip+0xa/0x20
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06bac90&amp;gt;] ? ptlrpc_main+0x0/0x1700 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffffa06bac90&amp;gt;] ? ptlrpc_main+0x0/0x1700 [ptlrpc]
Dec 12 18:55:07 cmds1 kernel: [&amp;lt;ffffffff8100c0c0&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="101703" author="laisiyao" created="Tue, 16 Dec 2014 14:59:41 +0000"  >&lt;p&gt;This looks to be duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4152&quot; title=&quot; layout locks can cause deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4152&quot;&gt;&lt;del&gt;LU-4152&lt;/del&gt;&lt;/a&gt;, and the patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4152&quot; title=&quot; layout locks can cause deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4152&quot;&gt;&lt;del&gt;LU-4152&lt;/del&gt;&lt;/a&gt; &lt;a href=&quot;http://review.whamcloud.com/#/c/8083/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/8083/&lt;/a&gt; can be applied to 2.4.1 code directly, will you apply it and test again?&lt;/p&gt;</comment>
                            <comment id="101711" author="daire" created="Tue, 16 Dec 2014 15:48:54 +0000"  >&lt;p&gt;Okay, great - we&apos;ll organise some downtime to upgrade the code. Just to be clear - this only needs to be applied to the MDS?&lt;/p&gt;</comment>
                            <comment id="101784" author="laisiyao" created="Wed, 17 Dec 2014 01:50:56 +0000"  >&lt;p&gt;yes, it&apos;s needed by MDS only.&lt;/p&gt;</comment>
                            <comment id="102735" author="daire" created="Wed, 7 Jan 2015 12:59:58 +0000"  >&lt;p&gt;Just to update - we have applied the patch and have not seen any further instances of this issue in 3 weeks now. You can close this if you wish and we&apos;ll re-open should we hit it again.&lt;/p&gt;

&lt;p&gt;Cheers.&lt;/p&gt;</comment>
                            <comment id="102736" author="pjones" created="Wed, 7 Jan 2015 13:08:43 +0000"  >&lt;p&gt;Good news - thanks Daire!&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="21658">LU-4152</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="16582" name="cmds1.messages" size="816364" author="daire" created="Fri, 12 Dec 2014 19:13:34 +0000"/>
                            <attachment id="16553" name="lustre-logs.tar.gz" size="3297263" author="daire" created="Wed, 10 Dec 2014 18:49:46 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzx29r:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>16763</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>