<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:05:06 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-6996] osd_ea_lookup_rec assertion</title>
                <link>https://jira.whamcloud.com/browse/LU-6996</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This morning a production MDS hit an assertion:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;&amp;lt;0&amp;gt;[2551157.740086] LustreError: 14993:0:(osd_handler.c:4071:osd_ea_lookup_rec()) ASSERTION( dir-&amp;gt;i_op != ((void *)0) &amp;amp;&amp;amp; dir-&amp;gt;i_op-&amp;gt;lookup != ((void *)0) ) failed: 
&amp;lt;0&amp;gt;[2551157.756253] LustreError: 14993:0:(osd_handler.c:4071:osd_ea_lookup_rec()) LBUG
&amp;lt;4&amp;gt;[2551157.764766] Pid: 14993, comm: mdt01_094
&amp;lt;4&amp;gt;[2551157.769360] 
&amp;lt;4&amp;gt;[2551157.769361] Call Trace:
&amp;lt;4&amp;gt;[2551157.774374]  [&amp;lt;ffffffffa0409895&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
&amp;lt;4&amp;gt;[2551157.782474]  [&amp;lt;ffffffffa0409e97&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
&amp;lt;4&amp;gt;[2551157.789707]  [&amp;lt;ffffffffa0ca8fcf&amp;gt;] osd_index_ea_lookup+0x6ff/0x8a0 [osd_ldiskfs]
&amp;lt;4&amp;gt;[2551157.798308]  [&amp;lt;ffffffffa0d0dde0&amp;gt;] ? mdt_blocking_ast+0x0/0x2a0 [mdt]
&amp;lt;4&amp;gt;[2551157.805733]  [&amp;lt;ffffffffa088c7c0&amp;gt;] ? lod_index_lookup+0x0/0x30 [lod]
&amp;lt;4&amp;gt;[2551157.813056]  [&amp;lt;ffffffffa088c7e5&amp;gt;] lod_index_lookup+0x25/0x30 [lod]
&amp;lt;4&amp;gt;[2551157.820291]  [&amp;lt;ffffffffa0dd0daa&amp;gt;] __mdd_lookup+0x24a/0x440 [mdd]
&amp;lt;4&amp;gt;[2551157.827325]  [&amp;lt;ffffffffa0dd1599&amp;gt;] mdd_lookup+0x39/0xe0 [mdd]
&amp;lt;4&amp;gt;[2551157.833977]  [&amp;lt;ffffffffa0d3bee5&amp;gt;] ? mdt_name+0x35/0xc0 [mdt]
&amp;lt;4&amp;gt;[2551157.840629]  [&amp;lt;ffffffffa0d44b09&amp;gt;] mdt_reint_open+0xb69/0x21a0 [mdt]
&amp;lt;4&amp;gt;[2551157.847959]  [&amp;lt;ffffffffa0426376&amp;gt;] ? upcall_cache_get_entry+0x296/0x880 [libcfs]
&amp;lt;4&amp;gt;[2551157.856570]  [&amp;lt;ffffffffa05c7a80&amp;gt;] ? lu_ucred+0x20/0x30 [obdclass]
&amp;lt;4&amp;gt;[2551157.863705]  [&amp;lt;ffffffffa0d2d481&amp;gt;] mdt_reint_rec+0x41/0xe0 [mdt]
&amp;lt;4&amp;gt;[2551157.870643]  [&amp;lt;ffffffffa0d12ed3&amp;gt;] mdt_reint_internal+0x4c3/0x780 [mdt]
&amp;lt;4&amp;gt;[2551157.878254]  [&amp;lt;ffffffffa0d1345e&amp;gt;] mdt_intent_reint+0x1ee/0x410 [mdt]
&amp;lt;4&amp;gt;[2551157.885669]  [&amp;lt;ffffffffa0d10c3e&amp;gt;] mdt_intent_policy+0x3ae/0x770 [mdt]
&amp;lt;4&amp;gt;[2551157.893212]  [&amp;lt;ffffffffa06e42e5&amp;gt;] ldlm_lock_enqueue+0x135/0x980 [ptlrpc]
&amp;lt;4&amp;gt;[2551157.901044]  [&amp;lt;ffffffffa070de2b&amp;gt;] ldlm_handle_enqueue0+0x51b/0x10c0 [ptlrpc]
&amp;lt;4&amp;gt;[2551157.909336]  [&amp;lt;ffffffffa0d11106&amp;gt;] mdt_enqueue+0x46/0xe0 [mdt]
&amp;lt;4&amp;gt;[2551157.916083]  [&amp;lt;ffffffffa0d15ada&amp;gt;] mdt_handle_common+0x52a/0x1470 [mdt]
&amp;lt;4&amp;gt;[2551157.923701]  [&amp;lt;ffffffffa0d52595&amp;gt;] mds_regular_handle+0x15/0x20 [mdt]
&amp;lt;4&amp;gt;[2551157.931144]  [&amp;lt;ffffffffa073cf25&amp;gt;] ptlrpc_server_handle_request+0x385/0xc00 [ptlrpc]
&amp;lt;4&amp;gt;[2551157.940128]  [&amp;lt;ffffffffa040a4ce&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
&amp;lt;4&amp;gt;[2551157.947452]  [&amp;lt;ffffffffa041b7c5&amp;gt;] ? lc_watchdog_touch+0x65/0x170 [libcfs]
&amp;lt;4&amp;gt;[2551157.955380]  [&amp;lt;ffffffffa07358f9&amp;gt;] ? ptlrpc_wait_event+0xa9/0x2d0 [ptlrpc]
&amp;lt;4&amp;gt;[2551157.963287]  [&amp;lt;ffffffff810546b9&amp;gt;] ? __wake_up_common+0x59/0x90
&amp;lt;4&amp;gt;[2551157.970142]  [&amp;lt;ffffffffa073f6ed&amp;gt;] ptlrpc_main+0xaed/0x1930 [ptlrpc]
&amp;lt;4&amp;gt;[2551157.977487]  [&amp;lt;ffffffffa073ec00&amp;gt;] ? ptlrpc_main+0x0/0x1930 [ptlrpc]
&amp;lt;4&amp;gt;[2551157.984809]  [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
&amp;lt;4&amp;gt;[2551157.990580]  [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
&amp;lt;4&amp;gt;[2551157.998367]  [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
&amp;lt;4&amp;gt;[2551158.004229]  [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&amp;lt;4&amp;gt;[2551158.010272] 
&amp;lt;0&amp;gt;[2551158.012746] Kernel panic - not syncing: LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>2.5.3-2.6.32_431.29.2.el6.atlas.x86_64_g57d5785</environment>
        <key id="31455">LU-6996</key>
            <summary>osd_ea_lookup_rec assertion</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bzzz">Alex Zhuravlev</assignee>
                                    <reporter username="ezell">Matt Ezell</reporter>
                        <labels>
                    </labels>
                <created>Wed, 12 Aug 2015 20:39:03 +0000</created>
                <updated>Sat, 16 Jan 2016 06:04:00 +0000</updated>
                            <resolved>Tue, 6 Oct 2015 14:22:50 +0000</resolved>
                                    <version>Lustre 2.5.3</version>
                                    <fixVersion>Lustre 2.8.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>11</watches>
                                                                            <comments>
                            <comment id="123992" author="ezell" created="Wed, 12 Aug 2015 20:45:25 +0000"  >&lt;p&gt;We did get a crash dump.  The code surrounding the assertion is:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeHeader panelHeader&quot; style=&quot;border-bottom-width: 1px;&quot;&gt;&lt;b&gt;lustre/osd-ldiskfs/osd_handler.c&lt;/b&gt;&lt;/div&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; osd_ea_lookup_rec(&lt;span class=&quot;code-keyword&quot;&gt;const&lt;/span&gt; struct lu_env *env, struct osd_object *obj,
                             struct dt_rec *rec, &lt;span class=&quot;code-keyword&quot;&gt;const&lt;/span&gt; struct dt_key *key)
{
        struct inode               *dir    = obj-&amp;gt;oo_inode;
...
        LASSERT(dir-&amp;gt;i_op != NULL &amp;amp;&amp;amp; dir-&amp;gt;i_op-&amp;gt;lookup != NULL);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I loaded this in crash to try to see if it&apos;s i_op or lookup that&apos;s NULL.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; xbt|grep obj
xbt: cannot find debuginfo for module &apos;libcfs&apos;
        obj = ffff8815aca40b40
        pobj = ffff881bd96f23c0
        object_locked = 0
crash&amp;gt; mod -s osd_ldiskfs
     MODULE       NAME                        SIZE  OBJECT FILE
ffffffffa0ceaee0  osd_ldiskfs               454984  /lib/modules/2.6.32-431.29.2.el6.atlas.x86_64/extra/kernel/fs/lustre/osd_ldiskfs.ko 
crash&amp;gt; osd_object.oo_inode ffff8815aca40b40
  oo_inode = 0xffff882b54a72a80
crash&amp;gt; inode.i_op 0xffff882b54a72a80
  i_op = 0xffffffffa0bf0c40
crash&amp;gt; inode_operations.lookup 0xffffffffa0bf0c40
  lookup = 0xffffffffa0bd31f0
crash&amp;gt; dis 0xffffffffa0bd31f0
0xffffffffa0bd31f0 &amp;lt;ldiskfs_lookup&amp;gt;:    push   %rbp
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Both look non-NULL and valid.&lt;/p&gt;</comment>
                            <comment id="123998" author="yujian" created="Wed, 12 Aug 2015 22:26:49 +0000"  >&lt;p&gt;Hi Matt,&lt;/p&gt;

&lt;p&gt;There might be a race. Could you please dump the stack traces for all tasks? Thank you.&lt;/p&gt;</comment>
                            <comment id="124032" author="pjones" created="Thu, 13 Aug 2015 13:23:56 +0000"  >&lt;p&gt;Alex&lt;/p&gt;

&lt;p&gt;Could you please look into this issue?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="124033" author="ezell" created="Thu, 13 Aug 2015 13:28:05 +0000"  >&lt;p&gt;The output of &apos;bt -a&apos; and &apos;ps&apos; from crash.  Let me know if you need the backtrace from any idle PIDs.&lt;/p&gt;</comment>
                            <comment id="124059" author="yujian" created="Thu, 13 Aug 2015 15:51:26 +0000"  >&lt;p&gt;Thank you, Matt. Alex would look into the stack traces and ask you for help to get more logs if needed.&lt;/p&gt;</comment>
                            <comment id="124623" author="gerrit" created="Wed, 19 Aug 2015 17:39:41 +0000"  >&lt;p&gt;Fan Yong (fan.yong@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/16026&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/16026&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6996&quot; title=&quot;osd_ea_lookup_rec assertion&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6996&quot;&gt;&lt;del&gt;LU-6996&lt;/del&gt;&lt;/a&gt; osd-ldiskfs: handle stale OI mapping cache&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_5&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 4bb351a4e4c76c8538532dcbfb7829dfea35aed0&lt;/p&gt;</comment>
                            <comment id="125335" author="yong.fan" created="Thu, 27 Aug 2015 06:27:39 +0000"  >&lt;p&gt;On server side, the RPC service thread may cache one OI mapping on its stack, such OI mapping will become invalid if some other (RPC service thread) removed the object by race. If the RPC service thread uses the cached OI mapping and finds the inode that has been unlinked and reused by other object with no LMA generated yet, then the original osd_check_lma() would regard it as the expect local object by wrong. Such case is one of the reason for the failure in this ticket.&lt;/p&gt;

&lt;p&gt;Anyway, it is just possible reason, with the given stack/logs, we cannot say that the patch will fix the failure completely. Please apply the patch and see what will happen.&lt;/p&gt;</comment>
                            <comment id="125642" author="gerrit" created="Mon, 31 Aug 2015 04:27:11 +0000"  >&lt;p&gt;Fan Yong (fan.yong@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/16137&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/16137&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6996&quot; title=&quot;osd_ea_lookup_rec assertion&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6996&quot;&gt;&lt;del&gt;LU-6996&lt;/del&gt;&lt;/a&gt; osd: test b2_5 base&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_5&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: f78965727e7a86786cdb9fe7a15389721be1b1bd&lt;/p&gt;</comment>
                            <comment id="125838" author="gerrit" created="Tue, 1 Sep 2015 05:11:41 +0000"  >&lt;p&gt;Fan Yong (fan.yong@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/16157&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/16157&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6996&quot; title=&quot;osd_ea_lookup_rec assertion&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6996&quot;&gt;&lt;del&gt;LU-6996&lt;/del&gt;&lt;/a&gt; osd-ldiskfs: handle stale OI mapping cache&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: ea2bb60f6b85e53ea43ba240ef1c7e3ef809595c&lt;/p&gt;</comment>
                            <comment id="126123" author="yujian" created="Thu, 3 Sep 2015 01:28:05 +0000"  >&lt;p&gt;Hi Alex,&lt;/p&gt;

&lt;p&gt;Nasf is working on the patches handling stale OI mapping cache but he was unsure of the root cause of the original issue in this ticket. Could you please give some more suggestions here?&lt;/p&gt;</comment>
                            <comment id="129405" author="gerrit" created="Tue, 6 Oct 2015 01:56:32 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/16157/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/16157/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6996&quot; title=&quot;osd_ea_lookup_rec assertion&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6996&quot;&gt;&lt;del&gt;LU-6996&lt;/del&gt;&lt;/a&gt; osd-ldiskfs: handle stale OI mapping cache&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 7aaa680b7f22e7dfaac8af38b78d89164a94e842&lt;/p&gt;</comment>
                            <comment id="129458" author="pjones" created="Tue, 6 Oct 2015 14:22:50 +0000"  >&lt;p&gt;Landed for 2.8&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="18610" name="LU-6996-bt-a.txt" size="13820" author="ezell" created="Thu, 13 Aug 2015 13:28:04 +0000"/>
                            <attachment id="18611" name="LU-6996-ps.txt" size="380390" author="ezell" created="Thu, 13 Aug 2015 13:28:05 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzxkd3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>