<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:58:02 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-6189] LustreError: (mdt_handler.c:4078:mdt_intent_reint()) ASSERTION( rc == 0 ) failed: Error occurred but lock handle is still in use, rc = -116</title>
                <link>https://jira.whamcloud.com/browse/LU-6189</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This morning within a few hours of each other, we hit this LBUG which caused the MDS to crash. The first time after reboot we had to abort recovery to get lustre back. We have a crashdump from the MDS.&lt;/p&gt;

&lt;p&gt;Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.805235] LustreError: 0:0:(ldlm_lockd.c:344:waiting_locks_callback()) ### lock callback timer expired after 375s: evicting client at 4966@gni100  ns: mdt-&lt;br/&gt;
atlas1-MDT0000_UUID lock: ffff881ec6e16c80/0xfc6e8aed747d1af2 lrc: 4/0,0 mode: CR/CR res: &lt;span class=&quot;error&quot;&gt;&amp;#91;0x2001a597a:0x85:0x0&amp;#93;&lt;/span&gt;.0 bits 0x2 rrc: 4 type: IBT flags: 0x60200000000020 nid: 4966@gni100 remote: 0x20ee476ee499c158&lt;br/&gt;
 expref: 132 pid: 16827 timeout: 4301930544 lvb_type: 0&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.858358] LustreError: 16827:0:(mdt_handler.c:4078:mdt_intent_reint()) ASSERTION( rc == 0 ) failed: Error occurred but lock handle is still in use, rc = -1&lt;br/&gt;
16&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.874660] LustreError: 16827:0:(mdt_handler.c:4078:mdt_intent_reint()) LBUG&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.882757] Pid: 16827, comm: mdt00_224&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.887151] &lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.887152] Call Trace:&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.891770]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0407895&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.899670]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0407e97&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.906710]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d4379a&amp;gt;&amp;#93;&lt;/span&gt; mdt_intent_reint+0x51a/0x520 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.913933]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d40c4e&amp;gt;&amp;#93;&lt;/span&gt; mdt_intent_policy+0x3ae/0x770 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.921281]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06de2e5&amp;gt;&amp;#93;&lt;/span&gt; ldlm_lock_enqueue+0x135/0x980 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.928910]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0707d0b&amp;gt;&amp;#93;&lt;/span&gt; ldlm_handle_enqueue0+0x51b/0x10c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.936903]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81069f75&amp;gt;&amp;#93;&lt;/span&gt; ? enqueue_entity+0x125/0x450&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.943544]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d41116&amp;gt;&amp;#93;&lt;/span&gt; mdt_enqueue+0x46/0xe0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.950094]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d4602a&amp;gt;&amp;#93;&lt;/span&gt; mdt_handle_common+0x52a/0x1470 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.957515]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0d833e5&amp;gt;&amp;#93;&lt;/span&gt; mds_regular_handle+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;mdt&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.964770]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0737fe5&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_server_handle_request+0x385/0xc00 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.973547]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa04084ce&amp;gt;&amp;#93;&lt;/span&gt; ? cfs_timer_arm+0xe/0x10 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.980677]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa04193cf&amp;gt;&amp;#93;&lt;/span&gt; ? lc_watchdog_touch+0x6f/0x170 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.988407]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa072f6c9&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_wait_event+0xa9/0x2d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7271.996116]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810546b9&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up_common+0x59/0x90&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7272.002774]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa073934d&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xaed/0x1760 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7272.009920]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0738860&amp;gt;&amp;#93;&lt;/span&gt; ? ptlrpc_main+0x0/0x1760 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7272.017040]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109ab56&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x96/0xa0&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7272.022607]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c20a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7272.028267]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109aac0&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xa0&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7272.033930]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c200&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;br/&gt;
Feb  1 10:03:15 atlas-mds1.ccs.ornl.gov kernel: [ 7272.039782] &lt;/p&gt;</description>
                <environment></environment>
        <key id="28489">LU-6189</key>
            <summary>LustreError: (mdt_handler.c:4078:mdt_intent_reint()) ASSERTION( rc == 0 ) failed: Error occurred but lock handle is still in use, rc = -116</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="pjones">Peter Jones</assignee>
                                    <reporter username="curtispb">Philip B Curtis</reporter>
                        <labels>
                    </labels>
                <created>Sun, 1 Feb 2015 16:23:10 +0000</created>
                <updated>Mon, 4 Jan 2016 18:20:29 +0000</updated>
                            <resolved>Thu, 2 Apr 2015 17:23:55 +0000</resolved>
                                    <version>Lustre 2.5.3</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="105310" author="pjones" created="Sun, 1 Feb 2015 17:08:16 +0000"  >&lt;p&gt;Philip&lt;/p&gt;

&lt;p&gt;You have entered this ticket as a Severity 1 which means that the filesystem is down. Is this the case? From the description it sounds like service has been restored but you want to treat this as a high priority to prevent further such crashes.&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="105311" author="curtispb" created="Sun, 1 Feb 2015 17:12:17 +0000"  >&lt;p&gt;Peter&lt;/p&gt;

&lt;p&gt;No, the first time this occurred lustre was restarted. I haven&apos;t brought lustre back up this time since this was following so closely to the first time. I wanted to get Intel involved before I attempted another start.&lt;/p&gt;

&lt;p&gt;Philip&lt;/p&gt;</comment>
                            <comment id="105313" author="pjones" created="Sun, 1 Feb 2015 17:27:38 +0000"  >&lt;p&gt;ok. I think that it is best to start uploading the crash dump to our ftp site in case that is useful. Do you have the instructions on how to do that? Also, is the code being run exactly in sync with the tip of your b2_5 branch on gut hub? &lt;a href=&quot;https://github.com/ORNL-TechInt/lustre/commits/b2_5&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/ORNL-TechInt/lustre/commits/b2_5&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="105314" author="bzzz" created="Sun, 1 Feb 2015 17:32:24 +0000"  >&lt;p&gt;I&apos;m quite sure this is fixed with  &lt;a href=&quot;http://review.whamcloud.com/#/c/12828/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/12828/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="105315" author="pjones" created="Sun, 1 Feb 2015 17:43:39 +0000"  >&lt;p&gt;Philip&lt;/p&gt;

&lt;p&gt;This is a patch that needs to be applied to the MDS only. Is there anything else that you need from us at this point before attempting to bring the filesystem back up?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="105316" author="curtispb" created="Sun, 1 Feb 2015 17:43:54 +0000"  >&lt;p&gt;No, I do not have instructions for the ftp site. That is correct, we are at the tip of the code there.&lt;/p&gt;</comment>
                            <comment id="105317" author="simmonsja" created="Sun, 1 Feb 2015 17:45:02 +0000"  >&lt;p&gt;We are running what is in the ORNL git hub. We attempted a upgrade but it failed after a few days. I general don&apos;t upgrade the ORNL branch for a few weeks after a upgrade just in case something goes wrong.&lt;/p&gt;</comment>
                            <comment id="105318" author="curtispb" created="Sun, 1 Feb 2015 17:45:08 +0000"  >&lt;p&gt;Nope. I will get you those crashdumps once I have those instructions and I will see about getting this patched version in place and we will go from there.&lt;/p&gt;</comment>
                            <comment id="105320" author="curtispb" created="Sun, 1 Feb 2015 21:00:32 +0000"  >&lt;p&gt;We have rebooted into the new RPMs with the patch. Lustre has started and I will continue to monitor. Thank you for your help.&lt;/p&gt;

&lt;p&gt;Philip&lt;/p&gt;</comment>
                            <comment id="105321" author="pjones" created="Sun, 1 Feb 2015 21:04:30 +0000"  >&lt;p&gt;Good news. Thanks for the update. I will drop the severity to S2 and will continue to monitor in case there are any further complications.&lt;/p&gt;</comment>
                            <comment id="111404" author="pjones" created="Thu, 2 Apr 2015 17:23:55 +0000"  >&lt;p&gt;As per ORNL ok to close as &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5934&quot; title=&quot;mdt_intent_reint()) ASSERTION( rc == 0 ) failed: Error occurred but lock handle is still in use, rc = -2&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5934&quot;&gt;&lt;del&gt;LU-5934&lt;/del&gt;&lt;/a&gt; has landed&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="27645">LU-5934</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzx5dj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>17312</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10021"><![CDATA[2]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>