<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:18:54 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8592] MDS crashed with ASSERTION( atomic_read(&amp;o-&gt;lo_header-&gt;loh_ref) &gt; 0 )</title>
                <link>https://jira.whamcloud.com/browse/LU-8592</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Error happened during soak testing of build &apos;20160902&apos; (see &lt;a href=&quot;https://wiki.hpdd.intel.com/display/Releases/Soak+Testing+on+Lola#SoakTestingonLola-20160902&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://wiki.hpdd.intel.com/display/Releases/Soak+Testing+on+Lola#SoakTestingonLola-20160902&lt;/a&gt;) &lt;br/&gt;
Configuration reads as:&lt;br/&gt;
4 MDS with 1 MDT / MDS, formatted with &lt;em&gt;ldiskfs&lt;/em&gt; and configured pairwise in active-active HA configuration&lt;br/&gt;
6 OSS with 4 OST / OSS formatted with &lt;em&gt;ldiskfs&lt;/em&gt; and configured pairwise in active-active HA configuration&lt;br/&gt;
DNE is enabled&lt;/p&gt;

&lt;p&gt;Sequence of events&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;2016-09-06 02:51:28,201:fsmgmt.fsmgmt:INFO     triggering fault mds_failover ( &lt;tt&gt;lola-8&lt;/tt&gt; (mdt-0) --&amp;gt; &lt;tt&gt;lola-9&lt;/tt&gt;)&lt;/li&gt;
	&lt;li&gt;2016-09-06 03:41:33,479:fsmgmt.fsmgmt:INFO     mds_failover just completed (mdt-0 failed back to &lt;tt&gt;lola-8&lt;/tt&gt;)&lt;/li&gt;
	&lt;li&gt;2016-09-06 03:41:17   MDS &lt;tt&gt;lola-11&lt;/tt&gt; crashed with error message:
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;&amp;lt;0&amp;gt;LustreError: 6666:0:(lu_object.h:716:lu_object_get()) ASSERTION( atomic_read(&amp;amp;o-&amp;gt;lo_header-&amp;gt;loh_ref) &amp;gt; 0 ) failed: 
&amp;lt;0&amp;gt;LustreError: 6666:0:(lu_object.h:716:lu_object_get()) LBUG
&amp;lt;4&amp;gt;Pid: 6666, comm: mdt03_002
&amp;lt;4&amp;gt;
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa07f0875&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa07f0e77&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa1203071&amp;gt;] mdt_remote_object_lock+0x491/0x4a0 [mdt]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa12298a0&amp;gt;] mdt_reint_open+0x2b90/0x3180 [mdt]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa1211ead&amp;gt;] mdt_reint_rec+0x5d/0x200 [mdt]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa11fd5db&amp;gt;] mdt_reint_internal+0x62b/0xa50 [mdt]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa11fdbf6&amp;gt;] mdt_intent_reint+0x1f6/0x440 [mdt]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa11fb8be&amp;gt;] mdt_intent_policy+0x4be/0xc70 [mdt]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0ab77c7&amp;gt;] ldlm_lock_enqueue+0x127/0x990 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0ae2c27&amp;gt;] ldlm_handle_enqueue0+0x807/0x14d0 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0b68b21&amp;gt;] tgt_enqueue+0x61/0x230 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0b69ccc&amp;gt;] tgt_request_handle+0x8ec/0x1440 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0b16501&amp;gt;] ptlrpc_main+0xd31/0x1800 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0b157d0&amp;gt;] ? ptlrpc_main+0x0/0x1800 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810a138e&amp;gt;] kthread+0x9e/0xc0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c28a&amp;gt;] child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810a12f0&amp;gt;] ? kthread+0x0/0xc0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c280&amp;gt;] ? child_rip+0x0/0x20
&amp;lt;4&amp;gt;
&amp;lt;0&amp;gt;Kernel panic - not syncing: LBUG
&amp;lt;4&amp;gt;Pid: 6666, comm: mdt03_002 Tainted: P           -- ------------    2.6.32-573.26.1.el6_lustre.x86_64 #1
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffff81539407&amp;gt;] ? panic+0xa7/0x16f
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa07f0ecb&amp;gt;] ? lbug_with_loc+0x9b/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa1203071&amp;gt;] ? mdt_remote_object_lock+0x491/0x4a0 [mdt]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa12298a0&amp;gt;] ? mdt_reint_open+0x2b90/0x3180 [mdt]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa1211ead&amp;gt;] ? mdt_reint_rec+0x5d/0x200 [mdt]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa11fd5db&amp;gt;] ? mdt_reint_internal+0x62b/0xa50 [mdt]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa11fdbf6&amp;gt;] ? mdt_intent_reint+0x1f6/0x440 [mdt]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa11fb8be&amp;gt;] ? mdt_intent_policy+0x4be/0xc70 [mdt]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0ab77c7&amp;gt;] ? ldlm_lock_enqueue+0x127/0x990 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0ae2c27&amp;gt;] ? ldlm_handle_enqueue0+0x807/0x14d0 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0b68b21&amp;gt;] ? tgt_enqueue+0x61/0x230 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0b69ccc&amp;gt;] ? tgt_request_handle+0x8ec/0x1440 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0b16501&amp;gt;] ? ptlrpc_main+0xd31/0x1800 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0b157d0&amp;gt;] ? ptlrpc_main+0x0/0x1800 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810a138e&amp;gt;] ? kthread+0x9e/0xc0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c28a&amp;gt;] ? child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810a12f0&amp;gt;] ? kthread+0x0/0xc0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c280&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Attached files:&lt;br/&gt;
console, message logs of all MDS nodes; vmcore-dmesg.txt of &lt;tt&gt;lola-11&lt;/tt&gt;.&lt;br/&gt;
crash dump is available.&lt;/p&gt;&lt;/li&gt;
&lt;/ul&gt;
</description>
                <environment>lola&lt;br/&gt;
build: &lt;a href=&quot;https://build.hpdd.intel.com/job/lustre-master/3431/&quot;&gt;https://build.hpdd.intel.com/job/lustre-master/3431/&lt;/a&gt;   tag 2.8.57  for el6.7</environment>
        <key id="39683">LU-8592</key>
            <summary>MDS crashed with ASSERTION( atomic_read(&amp;o-&gt;lo_header-&gt;loh_ref) &gt; 0 )</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="yong.fan">nasf</assignee>
                                    <reporter username="heckes">Frank Heckes</reporter>
                        <labels>
                            <label>soak</label>
                    </labels>
                <created>Thu, 8 Sep 2016 12:13:27 +0000</created>
                <updated>Fri, 14 Oct 2016 00:55:30 +0000</updated>
                            <resolved>Fri, 14 Oct 2016 00:55:30 +0000</resolved>
                                    <version>Lustre 2.9.0</version>
                                    <fixVersion>Lustre 2.9.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="165288" author="heckes" created="Thu, 8 Sep 2016 12:25:05 +0000"  >&lt;p&gt;crash dump had been saved to &lt;tt&gt;lhn.hpdd.intel.com:/scratch/crashdumps/lu-8592/lola-11/27.0.0.1-2016-09-06-03:41:17&lt;/tt&gt;&lt;/p&gt;</comment>
                            <comment id="165663" author="yong.fan" created="Mon, 12 Sep 2016 15:18:01 +0000"  >&lt;p&gt;The issue may be related with the patch &lt;a href=&quot;http://review.whamcloud.com/#/c/19041&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/19041&lt;/a&gt;. Such patch caches the metadata attributes on remote MDT. To invalid the cached attributes, it makes the remote ibits lock callback data to reference the cached object. Such reference will be released when the mdt_remote_blocking_ast() is triggered.&lt;/p&gt;

&lt;p&gt;According to the logs on the lola-11, just before the ASSERTION, the OSP detected some exception:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;&amp;lt;3&amp;gt;LustreError: 167-0: soaked-MDT0000-osp-MDT0003: This client was evicted by soaked-MDT0000; in progress operations using this service will fail.
&amp;lt;3&amp;gt;LustreError: 32471:0:(ldlm_resource.c:878:ldlm_resource_complain()) soaked-MDT0000-osp-MDT0003: namespace resource [0x2c00013a4:0x97e0:0x0].0x0 (ffff88078ba0f2c0) refcount nonzero (1) after lock cleanup; forcing cleanup.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;That means the connection from the MDT3 to the MDT0 was evicted by the MDT0, such IMP_EVENT_INVALIDATE event triggered ldlm_namespace_cleanup(). Unfortunately, at that time, some up layer user was referencing the resource &lt;span class=&quot;error&quot;&gt;&amp;#91;0x2c00013a4:0x97e0:0x0&amp;#93;&lt;/span&gt;. It is suspected that such resource (object) has been cleaned by force, then caused related object&apos;s reference wrong, as to the subsequent mdt_remote_object_lock() hit the ASSERTION. Currently, I have no the detailed scenario to reproduce the failure, but I will make a patch to enhance related logic and try to check what will happen.&lt;/p&gt;</comment>
                            <comment id="165664" author="gerrit" created="Mon, 12 Sep 2016 15:20:38 +0000"  >&lt;p&gt;Fan Yong (fan.yong@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/22438&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22438&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8592&quot; title=&quot;MDS crashed with ASSERTION( atomic_read(&amp;amp;o-&amp;gt;lo_header-&amp;gt;loh_ref) &amp;gt; 0 )&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8592&quot;&gt;&lt;del&gt;LU-8592&lt;/del&gt;&lt;/a&gt; dne: cache remote ibits lock properly&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 8bae476f5258e78be6cc7a58a19be8f95fc59209&lt;/p&gt;</comment>
                            <comment id="165666" author="yong.fan" created="Mon, 12 Sep 2016 15:23:24 +0000"  >&lt;p&gt;Frank,&lt;/p&gt;

&lt;p&gt;Would you please to try this patch?&lt;br/&gt;
Thanks!&lt;/p&gt;</comment>
                            <comment id="165708" author="heckes" created="Mon, 12 Sep 2016 16:59:42 +0000"  >&lt;p&gt;Sure, I&apos;ll do. Could take till Wednesday before I start, as I have to reproduce an error for EE-3.1 first.&lt;/p&gt;</comment>
                            <comment id="166509" author="heckes" created="Tue, 20 Sep 2016 09:38:47 +0000"  >&lt;p&gt;Installed build containing &lt;a href=&quot;http://review.whamcloud.com/22438&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22438&lt;/a&gt; patchset #4 (see &lt;a href=&quot;https://wiki.hpdd.intel.com/display/Releases/Soak+Testing+on+Lola#SoakTestingonLola-20160916&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://wiki.hpdd.intel.com/display/Releases/Soak+Testing+on+Lola#SoakTestingonLola-20160916&lt;/a&gt;)&lt;br/&gt;
Test session is running since 2016-09-17. No occurrence of this bug so far, but due to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8580&quot; title=&quot;general protection fault: osd_xattr_get+0x32c/0x5b0 [osd_ldiskfs]&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8580&quot;&gt;&lt;del&gt;LU-8580&lt;/del&gt;&lt;/a&gt; (happens with frequency of 2 hours) it&apos;s difficult to say whether your fix resolves the problem.&lt;/p&gt;</comment>
                            <comment id="169565" author="gerrit" created="Thu, 13 Oct 2016 23:36:20 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/22438/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22438/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8592&quot; title=&quot;MDS crashed with ASSERTION( atomic_read(&amp;amp;o-&amp;gt;lo_header-&amp;gt;loh_ref) &amp;gt; 0 )&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8592&quot;&gt;&lt;del&gt;LU-8592&lt;/del&gt;&lt;/a&gt; mdt: hold mdt_device::mdt_md_root until service stop&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 1bf196aaf5adb9a1c67886c0cd6a780ec6838040&lt;/p&gt;</comment>
                            <comment id="169582" author="pjones" created="Fri, 14 Oct 2016 00:55:30 +0000"  >&lt;p&gt;Landed for 2.9&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="23018" name="console-lola-10.log.bz2" size="55465" author="heckes" created="Thu, 8 Sep 2016 12:23:23 +0000"/>
                            <attachment id="23020" name="console-lola-11.log.bz2" size="54840" author="heckes" created="Thu, 8 Sep 2016 12:23:23 +0000"/>
                            <attachment id="23013" name="console-lola-8.log.bz2" size="96144" author="heckes" created="Thu, 8 Sep 2016 12:23:23 +0000"/>
                            <attachment id="23015" name="console-lola-9.log.bz2" size="64112" author="heckes" created="Thu, 8 Sep 2016 12:23:23 +0000"/>
                            <attachment id="23021" name="lola-11-vmcore-dmesg.txt.bz2" size="29589" author="heckes" created="Thu, 8 Sep 2016 12:23:23 +0000"/>
                            <attachment id="23014" name="message-lola-10.log.bz2" size="192898" author="heckes" created="Thu, 8 Sep 2016 12:23:23 +0000"/>
                            <attachment id="23019" name="message-lola-11.log.bz2" size="187576" author="heckes" created="Thu, 8 Sep 2016 12:23:23 +0000"/>
                            <attachment id="23016" name="message-lola-8.log.bz2" size="407173" author="heckes" created="Thu, 8 Sep 2016 12:23:23 +0000"/>
                            <attachment id="23017" name="message-lola-9.log.bz2" size="414857" author="heckes" created="Thu, 8 Sep 2016 12:23:23 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzynq7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>