<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:18:11 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8510] ASSERTION( dt-&gt;do_ops-&gt;do_invalidate ) failed</title>
                <link>https://jira.whamcloud.com/browse/LU-8510</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;The following call stack during autotesting on Maloo for &lt;a href=&quot;http://review.whamcloud.com/#/c/20546/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/20546/&lt;/a&gt;. My new test is standing up 3 MDTs with non-consecutive indices and a couple of OSTs. The method I am using to start the &quot;custom&quot; filesystem seems to be consistent with how other tests start their &quot;custom&quot; filesystems.&lt;/p&gt;

&lt;p&gt;Link to the Maloo test session results is &lt;a href=&quot;https://testing.hpdd.intel.com/test_sessions/4599d8d8-6108-11e6-906c-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sessions/4599d8d8-6108-11e6-906c-5254006e85c2&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The LBUG is preventing the filesystem from coming up. Any suggestions?&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt; LustreError: 21374:0:(dt_object.h:2633:dt_invalidate()) ASSERTION( dt-&amp;gt;do_ops-&amp;gt;do_invalidate ) failed:
 LustreError: 21374:0:(dt_object.h:2633:dt_invalidate()) LBUG
 Pid: 21374, comm: mdt00_002 

 Call Trace:
  [&amp;lt;ffffffffa05e67d3&amp;gt;] libcfs_debug_dumpstack+0x53/0x80 [libcfs]
  [&amp;lt;ffffffffa05e6d75&amp;gt;] lbug_with_loc+0x45/0xc0 [libcfs]
  [&amp;lt;ffffffffa0ea8fcf&amp;gt;] lod_object_unlock+0x39f/0x440 [lod]
  [&amp;lt;ffffffffa0f11e1b&amp;gt;] mdd_object_unlock+0x3b/0xd0 [mdd]
  [&amp;lt;ffffffffa0ddbb62&amp;gt;] mdt_unlock_slaves+0x1a2/0x3c0 [mdt]
  [&amp;lt;ffffffffa0de3c72&amp;gt;] mdt_md_create+0xb52/0xba0 [mdt]
  [&amp;lt;ffffffffa0de3e2b&amp;gt;] mdt_reint_create+0x16b/0x350 [mdt]
  [&amp;lt;ffffffffa0de5330&amp;gt;] mdt_reint_rec+0x80/0x210 [mdt]
  [&amp;lt;ffffffffa0dc7d62&amp;gt;] mdt_reint_internal+0x5b2/0x9b0 [mdt]
  [&amp;lt;ffffffffa0dd3077&amp;gt;] mdt_reint+0x67/0x140 [mdt]
  [&amp;lt;ffffffffa0a69aa5&amp;gt;] tgt_request_handle+0x915/0x1320 [ptlrpc]
  [&amp;lt;ffffffffa0a15c5b&amp;gt;] ptlrpc_server_handle_request+0x21b/0xa90 [ptlrpc]
  [&amp;lt;ffffffffa0a13818&amp;gt;] ? ptlrpc_wait_event+0x98/0x340 [ptlrpc]
  [&amp;lt;ffffffffa05f1957&amp;gt;] ? libcfs_debug_msg+0x57/0x80 [libcfs]
  [&amp;lt;ffffffffa0a19d10&amp;gt;] ptlrpc_main+0xaa0/0x1de0 [ptlrpc]
  [&amp;lt;ffffffffa0a19270&amp;gt;] ? ptlrpc_main+0x0/0x1de0 [ptlrpc]
  [&amp;lt;ffffffff810a5aef&amp;gt;] kthread+0xcf/0xe0
  [&amp;lt;ffffffff810a5a20&amp;gt;] ? kthread+0x0/0xe0
  [&amp;lt;ffffffff816469d8&amp;gt;] ret_from_fork+0x58/0x90
  [&amp;lt;ffffffff810a5a20&amp;gt;] ? kthread+0x0/0xe0

 Kernel panic - not syncing: LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>CentOS Linux 7/x86_64</environment>
        <key id="38921">LU-8510</key>
            <summary>ASSERTION( dt-&gt;do_ops-&gt;do_invalidate ) failed</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="dinatale2">Giuseppe Di Natale</reporter>
                        <labels>
                            <label>soak</label>
                    </labels>
                <created>Wed, 17 Aug 2016 18:11:32 +0000</created>
                <updated>Tue, 19 Mar 2019 15:43:00 +0000</updated>
                            <resolved>Thu, 8 Sep 2016 04:24:55 +0000</resolved>
                                    <version>Lustre 2.9.0</version>
                                    <fixVersion>Lustre 2.9.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="162440" author="pjones" created="Thu, 18 Aug 2016 20:43:28 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Could you please assist with this issue&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="162491" author="gerrit" created="Fri, 19 Aug 2016 03:09:40 +0000"  >&lt;p&gt;Bobi Jam (bobijam@hotmail.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/22017&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22017&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8510&quot; title=&quot;ASSERTION( dt-&amp;gt;do_ops-&amp;gt;do_invalidate ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8510&quot;&gt;&lt;del&gt;LU-8510&lt;/del&gt;&lt;/a&gt; dne: set osd_obj_ea_ops::dt_invalidate&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: a9fae446db68cb1c34f2db949c875f30d5e93980&lt;/p&gt;</comment>
                            <comment id="164938" author="heckes" created="Tue, 6 Sep 2016 09:47:59 +0000"  >&lt;p&gt;The same error also happened during soak testing of &apos;20160902&apos; (see &lt;a href=&quot;https://wiki.hpdd.intel.com/pages/viewpage.action?title=Soak+Testing+on+Lola&amp;amp;spaceKey=Releases#SoakTestingonLola-20160902&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://wiki.hpdd.intel.com/pages/viewpage.action?title=Soak+Testing+on+Lola&amp;amp;spaceKey=Releases#SoakTestingonLola-20160902&lt;/a&gt;)&lt;br/&gt;
Test cluster configuration:&lt;br/&gt;
4 MDS with 1 MDT / MDS, backend FS formatted with &lt;em&gt;ldiskfs&lt;/em&gt; , in active-active HA configuration (node pair affected &lt;tt&gt;lola-&lt;span class=&quot;error&quot;&gt;&amp;#91;8,9&amp;#93;&lt;/span&gt;&lt;/tt&gt;)&lt;br/&gt;
6 OSS with 4 OST / OSS, backend FS formatted with &lt;em&gt;zfs&lt;/em&gt; , n active-active HA configuration &lt;/p&gt;

&lt;p&gt;Error message is the same beside addresses (see attached file vmcore-dmesg.txt) &lt;/p&gt;

&lt;p&gt;Sequence of events&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;2016-09-05 15:48:29                            Node &lt;tt&gt;lola-8&lt;/tt&gt; crashed before (injected fault) failover of lola-9&apos;s resources (mdt11) to lola-8.&lt;/li&gt;
	&lt;li&gt;2016-09-05 15:55:04                            lola-8 became available before failover took place&lt;/li&gt;
	&lt;li&gt;2016-09-05 16:25:38,987:fsmgmt.fsmgmt:INFO     triggering fault mds_failover (&lt;tt&gt;lola-9&lt;/tt&gt;)&lt;/li&gt;
	&lt;li&gt;2016-09-05 16:35:39,398:                       mdt-1 successful mounted on &lt;tt&gt;lola-8&lt;/tt&gt;, but stalled in recovery due to missing MDT/MGT of lola-8&lt;/li&gt;
	&lt;li&gt;The MDT/MDT can&apos;t be mounted on the primary node (active-active HA configuration) anymore. The error message reads as:
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@lola-8 ~]# date ; mount -t lustre -o rw,user_xattr /dev/disk/by-id/dm-name-360080e50002ff4f00000026952013088p1 /mnt/soaked-mdt0 ; date
Tue Sep  6 02:04:13 PDT 2016
mount.lustre: mount /dev/mapper/360080e50002ff4f00000026952013088p1 at /mnt/soaked-mdt0 failed: Input/output error
Is the MGS running?
Tue Sep  6 02:05:14 PDT 2016
[root@lola-8 ~]# lctl debug_kernel /tmp/lustre-log-20160906-020514-mgs-mount-fails
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;(Double checked HW; IB and disk resources are operation and sane)&lt;br/&gt;
After manual umount of mdt1 and reboot of node &lt;tt&gt;lola-8&lt;/tt&gt;, mdt-0, mdt-1 could be mounted and recovery completed within 2 mins and primary&lt;br/&gt;
resource could be switched back to (primary) node &lt;tt&gt;lola-9&lt;/tt&gt; again.&lt;br/&gt;
This symptom is eventually a different bug that happens only by chance due to the node crash.&lt;/p&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Attached files: messages, console and vmcore-dmesg.txt of affected node &lt;tt&gt;lola-8&lt;/tt&gt;, debug log (mask &lt;tt&gt;-1&lt;/tt&gt;) containing debug information of the time interval while executing the mount command specified above.&lt;br/&gt;
A crash dump file exists and had been store to &lt;tt&gt;lhn.hpdd.intel.com:/scratch/crashdumps/lu-8510/lola-8/127.0.0.1-2016-09-05-15:48:29&lt;/tt&gt;.&lt;/p&gt;</comment>
                            <comment id="164939" author="heckes" created="Tue, 6 Sep 2016 09:50:45 +0000"  >&lt;p&gt;The soak test was executed with el6.7 build (&lt;a href=&quot;https://build.hpdd.intel.com/job/lustre-master/3431/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://build.hpdd.intel.com/job/lustre-master/3431/&lt;/a&gt;   tag 2.8.57)&lt;/p&gt;</comment>
                            <comment id="165238" author="gerrit" created="Thu, 8 Sep 2016 02:08:10 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/22017/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22017/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8510&quot; title=&quot;ASSERTION( dt-&amp;gt;do_ops-&amp;gt;do_invalidate ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8510&quot;&gt;&lt;del&gt;LU-8510&lt;/del&gt;&lt;/a&gt; dne: set osd_obj_ea_ops::dt_invalidate&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 13364590a8c9ef64320f62b9937c01aaa6b6fa85&lt;/p&gt;</comment>
                            <comment id="165258" author="pjones" created="Thu, 8 Sep 2016 04:24:55 +0000"  >&lt;p&gt;Landed for 2.9&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="22967" name="console-lola-8.log.bz2" size="80783" author="heckes" created="Tue, 6 Sep 2016 10:30:31 +0000"/>
                            <attachment id="22969" name="lustre-log-20160906-020514-mgs-mount-fails.bz2" size="1629834" author="heckes" created="Tue, 6 Sep 2016 10:30:31 +0000"/>
                            <attachment id="22966" name="messages-lola-8.log.bz2" size="296482" author="heckes" created="Tue, 6 Sep 2016 10:30:31 +0000"/>
                            <attachment id="22968" name="vmcore-dmesg.txt.bz2" size="21681" author="heckes" created="Tue, 6 Sep 2016 10:30:31 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzyl33:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>