<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:12:00 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-7798] ll_prep_inode()) ASSERTION( fid_is_sane(&amp;md.body-&gt;mbo_fid1) ) failed:</title>
                <link>https://jira.whamcloud.com/browse/LU-7798</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Error happened during soak testing of build &apos;20160218&apos; (see: &lt;a href=&quot;https://wiki.hpdd.intel.com/display/Releases/Soak+Testing+on+Lola#SoakTestingonLola-20160218&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://wiki.hpdd.intel.com/display/Releases/Soak+Testing+on+Lola#SoakTestingonLola-20160218&lt;/a&gt;). DNE is enabled.&lt;br/&gt;
MDT&apos;s have been formated using ldiskfs, OSTs using zfs. &lt;/p&gt;

&lt;p&gt;Event history:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;2016-02-18 16:24:27,115:fsmgmt.fsmgmt:INFO     reseting MDS node lola-10&lt;/li&gt;
	&lt;li&gt;2016-02-18-16:34:04] Lustre client &lt;tt&gt;lola-26&lt;/tt&gt; crash with LBUG&lt;/li&gt;
	&lt;li&gt;2016-02-18-16:34:06 Lustre client &lt;tt&gt;lola-29&lt;/tt&gt; crash with LBUG&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Error message and stack trace is same for both events:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;&amp;lt;6&amp;gt;Lustre: soaked-MDT0004-mdc-ffff880852d1a000: Connection restored to 192.168.1.110@o2ib10 (at 192.168.1.110@o2ib10)
&amp;lt;0&amp;gt;LustreError: 23705:0:(llite_lib.c:2295:ll_prep_inode()) ASSERTION( fid_is_sane(&amp;amp;md.body-&amp;gt;mbo_fid1) ) failed:
&amp;lt;0&amp;gt;LustreError: 23705:0:(llite_lib.c:2295:ll_prep_inode()) LBUG
&amp;lt;4&amp;gt;Pid: 23705, comm: pct
&amp;lt;4&amp;gt;
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa050b875&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa050be77&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0aa0192&amp;gt;] ll_prep_inode+0x752/0xc40 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa07f6d60&amp;gt;] ? lustre_swab_mdt_body+0x0/0x130 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0ab42b2&amp;gt;] ll_new_node+0x682/0x7f0 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0ab6b04&amp;gt;] ll_mkdir+0x104/0x220 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8122ec0f&amp;gt;] ? security_inode_permission+0x1f/0x30
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8119d759&amp;gt;] vfs_mkdir+0xd9/0x140
&amp;lt;4&amp;gt; [&amp;lt;ffffffff811a04e7&amp;gt;] sys_mkdirat+0xc7/0x1b0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c6f5&amp;gt;] ? math_state_restore+0x45/0x60
&amp;lt;4&amp;gt; [&amp;lt;ffffffff811a05e8&amp;gt;] sys_mkdir+0x18/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100b0d2&amp;gt;] system_call_fastpath+0x16/0x1b
&amp;lt;4&amp;gt;
&amp;lt;0&amp;gt;Kernel panic - not syncing: LBUG
&amp;lt;4&amp;gt;Pid: 23705, comm: pct Not tainted 2.6.32-504.30.3.el6.x86_64 #1
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffff815293fc&amp;gt;] ? panic+0xa7/0x16f
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa050becb&amp;gt;] ? lbug_with_loc+0x9b/0xb0 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0aa0192&amp;gt;] ? ll_prep_inode+0x752/0xc40 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa07f6d60&amp;gt;] ? lustre_swab_mdt_body+0x0/0x130 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0ab42b2&amp;gt;] ? ll_new_node+0x682/0x7f0 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0ab6b04&amp;gt;] ? ll_mkdir+0x104/0x220 [lustre]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8122ec0f&amp;gt;] ? security_inode_permission+0x1f/0x30
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8119d759&amp;gt;] ? vfs_mkdir+0xd9/0x140
&amp;lt;4&amp;gt; [&amp;lt;ffffffff811a04e7&amp;gt;] ? sys_mkdirat+0xc7/0x1b0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c6f5&amp;gt;] ? math_state_restore+0x45/0x60
&amp;lt;4&amp;gt; [&amp;lt;ffffffff811a05e8&amp;gt;] ? sys_mkdir+0x18/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100b0d2&amp;gt;] ? system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;ul&gt;
	&lt;li&gt;No errors on Lustre server nodes ca be be correlated to the events.&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Attached console, messages and vmcore-dmesg.txt from both clients.&lt;br/&gt;
Crash files are available&lt;/p&gt;</description>
                <environment>lola&lt;br/&gt;
build: 2.8.50-6-gf9ca359 ;commit f9ca359284357d145819beb08b316e932f7a3060 </environment>
        <key id="34806">LU-7798</key>
            <summary>ll_prep_inode()) ASSERTION( fid_is_sane(&amp;md.body-&gt;mbo_fid1) ) failed:</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="heckes">Frank Heckes</reporter>
                        <labels>
                            <label>soak</label>
                    </labels>
                <created>Fri, 19 Feb 2016 16:26:36 +0000</created>
                <updated>Fri, 7 Oct 2016 17:14:09 +0000</updated>
                            <resolved>Fri, 7 Oct 2016 17:13:16 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="142983" author="heckes" created="Fri, 19 Feb 2016 16:44:50 +0000"  >&lt;p&gt;Crash files have been saved to &lt;tt&gt;lhn.lola.hpdd.intel.com:/scratch/crashdumps/lu-7798/&lt;/tt&gt;&lt;/p&gt;</comment>
                            <comment id="142992" author="heckes" created="Fri, 19 Feb 2016 17:32:47 +0000"  >&lt;p&gt;I forgot to mention that the OSS nodes have been extended to operate in active-active failover configuration&lt;br/&gt;
for disk resources by Feb, 17th 2016. So the failover partner node &lt;tt&gt;lola-4&lt;/tt&gt; can see of node &lt;tt&gt;lola-5&lt;/tt&gt; all disks and has its ZFS pools imported also.&lt;br/&gt;
There&apos;s no start-up (boot) wrapper script that prevents the (primary) zpools of the other node from being imported.&lt;/p&gt;</comment>
                            <comment id="143010" author="adilger" created="Fri, 19 Feb 2016 18:48:01 +0000"  >&lt;p&gt;The client should not crash when accessing bad data from the network.  It is very likely that this is a result of bad data being sent from the server, but that is a separate issue.  This bug should focus on adding proper error handling to ll_prep_inode() for this case instead of crashing.&lt;/p&gt;</comment>
                            <comment id="145109" author="cliffw" created="Thu, 10 Mar 2016 04:50:51 +0000"  >&lt;p&gt;This bug has re-occured in 2.8.0-RC5 testing &lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Lustre: 3790:0:(client.c:2063:ptlrpc_expire_one_request()) Skipped 6 previous similar messages^M
Lustre: 3779:0:(client.c:2063:ptlrpc_expire_one_request()) @@@ Request sent has timed out &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; slow reply: [sent 1457583412/real 1457583412]  req@ffff880275d579c0 x1528367925364328/t0(0) o38-&amp;gt;soaked-MDT0007-mdc-ffff88082f2f4800@192.168.1.111@o2ib10:12/10 lens 520/544 e 0 to 1 dl 1457583433 ref 1 fl Rpc:XN/0/ffffffff rc 0/-1^M
Lustre: 3779:0:(client.c:2063:ptlrpc_expire_one_request()) Skipped 6 previous similar messages^M
LustreError: 11-0: soaked-MDT0006-mdc-ffff88082f2f4800: operation mds_reint to node 192.168.1.110@o2ib10 failed: rc = -19^M
Lustre: soaked-MDT0007-mdc-ffff88082f2f4800: Connection restored to 192.168.1.111@o2ib10 (at 192.168.1.111@o2ib10)^M
LustreError: 3779:0:(client.c:2874:ptlrpc_replay_interpret()) @@@ request replay timed out.^M
  req@ffff88046cbfb6c0 x1528367924440432/t412317941034(412317941034) o36-&amp;gt;soaked-MDT0006-mdc-ffff88082f2f4800@192.168.1.111@o2ib10:12/10 lens 616/192 e 1 to 1 dl 1457583622 ref 2 fl Interpret:EX/4/ffffffff rc -110/-1^M
Lustre: soaked-MDT0006-mdc-ffff88082f2f4800: Connection restored to 192.168.1.111@o2ib10 (at 192.168.1.111@o2ib10)^M
LustreError: 85659:0:(llite_lib.c:2295:ll_prep_inode()) ASSERTION( fid_is_sane(&amp;amp;md.body-&amp;gt;mbo_fid1) ) failed: ^M
LustreError: 85659:0:(llite_lib.c:2295:ll_prep_inode()) LBUG^M
Pid: 85659, comm: pct^M
^M
Call Trace:^M
 [&amp;lt;ffffffffa045c875&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]^M
 [&amp;lt;ffffffffa045ce77&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]^M
 [&amp;lt;ffffffffa09ff192&amp;gt;] ll_prep_inode+0x752/0xc40 [lustre]^M
 [&amp;lt;ffffffffa0747d60&amp;gt;] ? lustre_swab_mdt_body+0x0/0x130 [ptlrpc]^M
 [&amp;lt;ffffffffa0a132b2&amp;gt;] ll_new_node+0x682/0x7f0 [lustre]^M
 [&amp;lt;ffffffffa0a15b04&amp;gt;] ll_mkdir+0x104/0x220 [lustre]^M
 [&amp;lt;ffffffff8122ec0f&amp;gt;] ? security_inode_permission+0x1f/0x30^M
 [&amp;lt;ffffffff8119d759&amp;gt;] vfs_mkdir+0xd9/0x140^M
 [&amp;lt;ffffffff811a04e7&amp;gt;] sys_mkdirat+0xc7/0x1b0^M
 [&amp;lt;ffffffff8100c6f5&amp;gt;] ? math_state_restore+0x45/0x60^M
 [&amp;lt;ffffffff811a05e8&amp;gt;] sys_mkdir+0x18/0x20^M
 [&amp;lt;ffffffff8100b0d2&amp;gt;] system_call_fastpath+0x16/0x1b^M
^M
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="145111" author="cliffw" created="Thu, 10 Mar 2016 04:59:41 +0000"  >&lt;p&gt;Rest of kernel panic&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Kernel panic - not syncing: LBUG^M
Pid: 85659, comm: pct Not tainted 2.6.32-504.30.3.el6.x86_64 #1^M
Call Trace:^M
 [&amp;lt;ffffffff815293fc&amp;gt;] ? panic+0xa7/0x16f^M
 [&amp;lt;ffffffffa045cecb&amp;gt;] ? lbug_with_loc+0x9b/0xb0 [libcfs]^M
 [&amp;lt;ffffffffa09ff192&amp;gt;] ? ll_prep_inode+0x752/0xc40 [lustre]^M
 [&amp;lt;ffffffffa0747d60&amp;gt;] ? lustre_swab_mdt_body+0x0/0x130 [ptlrpc]^M
 [&amp;lt;ffffffffa0a132b2&amp;gt;] ? ll_new_node+0x682/0x7f0 [lustre]^M
 [&amp;lt;ffffffffa0a15b04&amp;gt;] ? ll_mkdir+0x104/0x220 [lustre]^M
 [&amp;lt;ffffffff8122ec0f&amp;gt;] ? security_inode_permission+0x1f/0x30^M
 [&amp;lt;ffffffff8119d759&amp;gt;] ? vfs_mkdir+0xd9/0x140^M
 [&amp;lt;ffffffff811a04e7&amp;gt;] ? sys_mkdirat+0xc7/0x1b0^M
 [&amp;lt;ffffffff8100c6f5&amp;gt;] ? math_state_restore+0x45/0x60^M
 [&amp;lt;ffffffff811a05e8&amp;gt;] ? sys_mkdir+0x18/0x20^M
 [&amp;lt;ffffffff8100b0d2&amp;gt;] ? system_call_fastpath+0x16/0x1b^M
Initializing cgroup subsys cpuset^M
Initializing cgroup subsys cpu^M
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;crash dump has been copied to lola:/scratch/crashdumps/lu-7798/&lt;/p&gt;</comment>
                            <comment id="168589" author="cliffw" created="Thu, 6 Oct 2016 22:53:05 +0000"  >&lt;p&gt;We appear to have hit this again, while doing ZFS metadata testing:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Oct  6 22:18:08 spirit-17 kernel: LustreError: 36451:0:(llite_lib.c:2465:ll_prep_inode()) ASSERTION( fid_is_sane(&amp;amp;md.body-&amp;gt;mbo_fid1) ) failed:
Oct  6 22:18:08 spirit-17 kernel: LustreError: 36451:0:(llite_lib.c:2465:ll_prep_inode()) LBUG
Oct  6 22:18:08 spirit-17 kernel: Pid: 36451, comm: mdtest
Oct  6 22:18:08 spirit-17 kernel: #012Call Trace:
Oct  6 22:18:08 spirit-17 kernel: [&amp;lt;ffffffffa08ab853&amp;gt;] libcfs_debug_dumpstack+0x53/0x80 [libcfs]
Oct  6 22:18:08 spirit-17 kernel: [&amp;lt;ffffffffa08abdf5&amp;gt;] lbug_with_loc+0x45/0xc0 [libcfs]
Oct  6 22:18:08 spirit-17 kernel: [&amp;lt;ffffffffa0de7002&amp;gt;] ll_prep_inode+0xb12/0xe60 [lustre]
Oct  6 22:18:08 spirit-17 kernel: [&amp;lt;ffffffffa0df6c3f&amp;gt;] ? ll_lookup_it+0x64f/0xe20 [lustre]
Oct  6 22:18:08 spirit-17 kernel: [&amp;lt;ffffffffa0df7c2c&amp;gt;] ll_atomic_open+0x81c/0x12c0 [lustre]
Oct  6 22:18:08 spirit-17 kernel: [&amp;lt;ffffffff811ec8a1&amp;gt;] do_last+0xa11/0x1270 
Oct  6 22:18:08 spirit-17 kernel: [&amp;lt;ffffffff811eede2&amp;gt;] path_openat+0xc2/0x490
Oct  6 22:18:08 spirit-17 kernel: [&amp;lt;ffffffff811f05ab&amp;gt;] do_filp_open+0x4b/0xb0
Oct  6 22:18:08 spirit-17 kernel: [&amp;lt;ffffffff811fd147&amp;gt;] ? __alloc_fd+0xa7/0x130
Oct  6 22:18:08 spirit-17 kernel: [&amp;lt;ffffffff811ddf53&amp;gt;] do_sys_open+0xf3/0x1f0
Oct  6 22:18:08 spirit-17 kernel: [&amp;lt;ffffffff811de06e&amp;gt;] SyS_open+0x1e/0x20
Oct  6 22:18:08 spirit-17 kernel: [&amp;lt;ffffffff811de0a6&amp;gt;] SyS_creat+0x16/0x20
Oct  6 22:18:08 spirit-17 kernel: [&amp;lt;ffffffff81646a09&amp;gt;] system_call_fastpath+0x16/0x1b
Oct  6 22:18:08 spirit-17 kernel:
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="168686" author="pjones" created="Fri, 7 Oct 2016 17:13:17 +0000"  >&lt;p&gt;seems to be a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7422&quot; title=&quot;incorrect ENOENT handling in mdt_intent_reint causes panic on client side&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7422&quot;&gt;&lt;del&gt;LU-7422&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="33109">LU-7422</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="34805">LU-7797</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="20455" name="console-lola-26.log.bz2" size="64679" author="heckes" created="Fri, 19 Feb 2016 16:53:11 +0000"/>
                            <attachment id="20460" name="console-lola-29.log.bz2" size="73708" author="heckes" created="Fri, 19 Feb 2016 16:53:38 +0000"/>
                            <attachment id="20456" name="lola-26-vmcore-dmesg.txt.bz2" size="30327" author="heckes" created="Fri, 19 Feb 2016 16:53:12 +0000"/>
                            <attachment id="20457" name="lola-29-vmcore-dmesg.txt.bz2" size="27479" author="heckes" created="Fri, 19 Feb 2016 16:53:12 +0000"/>
                            <attachment id="20458" name="messages-lola-26.log.bz2" size="240742" author="heckes" created="Fri, 19 Feb 2016 16:53:12 +0000"/>
                            <attachment id="20459" name="messages-lola-29.log.bz2" size="253882" author="heckes" created="Fri, 19 Feb 2016 16:53:12 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzy1wf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>