<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:48:17 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5071] statahead.c:1704:do_statahead_enter()) ASSERTION( lli-&gt;u.d.d_sai == ((void *)0) ) failed:</title>
                <link>https://jira.whamcloud.com/browse/LU-5071</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Hello,  &lt;/p&gt;

&lt;p&gt;We are seeing following error message on Lustre 2.5.1 clients, and it makes the system not responsive. multiple clients were affected with this issue.&lt;/p&gt;

&lt;p&gt;System Details: Lustre 2.5.1 / RHEL 6.5&lt;/p&gt;

&lt;p&gt;Here are the node names, time stamps and one according message:  &lt;br/&gt;
May  4 11:03:28 uc1n055 kernel: LustreError: 1979:0:(statahead.c:1704:do_statahead_enter()) ASSERTION( lli-&amp;gt;u.d.d_sai == ((void *)0) ) failed: &lt;br/&gt;
May  4 18:15:43 uc1n468 kernel: LustreError: 42888:0:(statahead.c:1704:do_statahead_enter()) ASSERTION( lli-&amp;gt;u.d.d_sai == ((void *)0) ) failed: &lt;br/&gt;
May  4 18:54:19 uc1n059 kernel: LustreError: 111650:0:(lovsub_lock.c:103:lovsub_lock_state()) ASSERTION( cl_lock_is_mutexed(slice-&amp;gt;cls_lock) ) failed: &lt;br/&gt;
May  9 09:21:08 uc1n129 kernel: LustreError: 93767:0:(statahead.c:1704:do_statahead_enter()) LBUG &lt;br/&gt;
May 10 09:28:14 uc1n996 kernel: LustreError: 7387:0:(osc_lock.c:1224:osc_lock_wait()) LBUG &lt;br/&gt;
May 15 07:50:57 uc1n198 kernel: LustreError: 25007:0:(statahead.c:1704:do_statahead_enter()) ASSERTION( lli-&amp;gt;u.d.d_sai == ((void *)0) ) failed:  &lt;/p&gt;</description>
                <environment></environment>
        <key id="24738">LU-5071</key>
            <summary>statahead.c:1704:do_statahead_enter()) ASSERTION( lli-&gt;u.d.d_sai == ((void *)0) ) failed:</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="rganesan@ddn.com">Rajeshwaran Ganesan</reporter>
                        <labels>
                    </labels>
                <created>Fri, 16 May 2014 13:24:48 +0000</created>
                <updated>Sat, 6 Sep 2014 13:16:47 +0000</updated>
                            <resolved>Tue, 2 Sep 2014 00:15:48 +0000</resolved>
                                    <version>Lustre 2.5.1</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="84301" author="pjones" created="Fri, 16 May 2014 20:13:01 +0000"  >&lt;p&gt;Rajesh&lt;/p&gt;

&lt;p&gt;Could you please confirm that it is vanilla 2.5.1 on both servers and clients for this cluster? Are any other Lustre versions or patches involved?&lt;/p&gt;

&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Does this seem related to existing tickets &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4797&quot; title=&quot;ASSERTION( cl_lock_is_mutexed(slice-&amp;gt;cls_lock) ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4797&quot;&gt;&lt;del&gt;LU-4797&lt;/del&gt;&lt;/a&gt;/4693/4558?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="84349" author="rganesan@ddn.com" created="Sun, 18 May 2014 21:14:03 +0000"  >&lt;p&gt;Servers are in 2.4.3&lt;br/&gt;
Clients are in 2.5.1&lt;/p&gt;</comment>
                            <comment id="84361" author="bobijam" created="Mon, 19 May 2014 07:51:04 +0000"  >&lt;p&gt;the do_statahead_enter() LBUG can be cured by this back port patch &lt;a href=&quot;http://review.whamcloud.com/10363&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10363&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="84362" author="bobijam" created="Mon, 19 May 2014 08:06:08 +0000"  >&lt;p&gt;the lovsub_lock_state() LBUG was fixed in b2_5 branch, the patch is at &lt;a href=&quot;http://review.whamcloud.com/9881&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9881&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="84436" author="rganesan@ddn.com" created="Tue, 20 May 2014 12:35:07 +0000"  >&lt;p&gt;Could you please provide  source RPM with the patches?&lt;/p&gt;</comment>
                            <comment id="84455" author="pjones" created="Tue, 20 May 2014 15:31:42 +0000"  >&lt;p&gt;Rajesh&lt;/p&gt;

&lt;p&gt;These are included by default. For example, &lt;a href=&quot;http://review.whamcloud.com/#/c/10363/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/10363/&lt;/a&gt; has a link to the build on the Jenkins server &lt;a href=&quot;http://build.whamcloud.com/job/lustre-reviews/23961/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-reviews/23961/&lt;/a&gt; Selecting the desired distro version allows you to drill into specific build artifacts - &lt;a href=&quot;http://build.whamcloud.com/job/lustre-reviews/23961/arch=i686,build_type=server,distro=el6,ib_stack=inkernel/artifact/artifacts/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-reviews/23961/arch=i686,build_type=server,distro=el6,ib_stack=inkernel/artifact/artifacts/&lt;/a&gt; , say. &lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="87181" author="haasken" created="Fri, 20 Jun 2014 16:52:16 +0000"  >&lt;p&gt;Zhenyu has identified two of the LBUGs as &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3498&quot; title=&quot;most uses of IS_ERR_VALUE() are incorrect&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3498&quot;&gt;&lt;del&gt;LU-3498&lt;/del&gt;&lt;/a&gt; and &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4558&quot; title=&quot;Crash in cl_lock_put on racer&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4558&quot;&gt;&lt;del&gt;LU-4558&lt;/del&gt;&lt;/a&gt;, and both of those bugs are fixed in b2_5 and master.  Since the LBUG which is in the summary of this ticket has been fixed, should this bug be resolved?&lt;/p&gt;

&lt;p&gt;I suppose there is still this LBUG:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;May 10 09:28:14 uc1n996 kernel: LustreError: 7387:0:(osc_lock.c:1224:osc_lock_wait()) LBUG 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;But without any information other than the location of the LBUG, I think this bug isn&apos;t helpful.  There is no information about that LBUG in any of the attachments either, as far as I can tell.  If the bug will be kept open for the osc_lock_wait() LBUG, would it be possible to update the summary and description so that it doesn&apos;t look like &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3498&quot; title=&quot;most uses of IS_ERR_VALUE() are incorrect&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3498&quot;&gt;&lt;del&gt;LU-3498&lt;/del&gt;&lt;/a&gt;?&lt;/p&gt;</comment>
                            <comment id="87796" author="rganesan@ddn.com" created="Mon, 30 Jun 2014 16:47:10 +0000"  >&lt;p&gt;In regards to  the above comment...&lt;/p&gt;

&lt;p&gt;Does the above issue is fixed on 2.5.2, or still its a LBUG. Our customer saw the message once in the log.  &lt;/p&gt;


&lt;p&gt;May 10 09:28:14 uc1n996 kernel: LustreError:7387:0:(osc_lock.c:1224:osc_lock_wait()) LBUG&lt;br/&gt;
May 10 09:28:14 uc1n996 kernel: Pid: 7387, comm: less&lt;/p&gt;
</comment>
                            <comment id="87809" author="haasken" created="Mon, 30 Jun 2014 17:17:25 +0000"  >&lt;p&gt;Not a lot of information here to go on.  The assertion which was triggered looks like the same one as in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1356&quot; title=&quot;Assertion triggered in osc_lock_wait()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1356&quot;&gt;&lt;del&gt;LU-1356&lt;/del&gt;&lt;/a&gt;, but that bug was fixed way back in 2.3.0 and 2.1.4.&lt;/p&gt;</comment>
                            <comment id="87812" author="rganesan@ddn.com" created="Mon, 30 Jun 2014 17:32:15 +0000"  >&lt;p&gt;Does it re-appears in 2.5.1 as well? &lt;/p&gt;</comment>
                            <comment id="87952" author="lixi" created="Wed, 2 Jul 2014 08:29:19 +0000"  >&lt;p&gt;ll_statahead_thread() calls ll_sai_get() at the first begining, but does not ll_sai_put() when ll_prep_md_op_data() failes. I think that might be the cause.&lt;/p&gt;

&lt;p&gt;Here is a patch which tries to fix this problem.&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/#/c/10940/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/10940/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="87991" author="haasken" created="Wed, 2 Jul 2014 17:57:37 +0000"  >&lt;p&gt;I think that the following assertion is already fixed by &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3498&quot; title=&quot;most uses of IS_ERR_VALUE() are incorrect&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3498&quot;&gt;&lt;del&gt;LU-3498&lt;/del&gt;&lt;/a&gt;:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;May  4 11:03:28 uc1n055 kernel: LustreError: 1979:0:(statahead.c:1698:do_statahead_enter()) can&apos;t start ll_sa thread, rc: -2816
May  4 11:03:28 uc1n055 kernel: LustreError: 1979:0:(statahead.c:1704:do_statahead_enter()) ASSERTION( lli-&amp;gt;u.d.d_sai == ((void *)0) ) failed:
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;That assertion is inside an if block which is only executed when do_statahead_enter() &lt;b&gt;thinks&lt;/b&gt; that the thread &lt;b&gt;creation&lt;/b&gt; failed.&lt;/p&gt;

&lt;p&gt;Here is the relevant portion of do_statahead_enter() in version 2.5.1 (which has the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3498&quot; title=&quot;most uses of IS_ERR_VALUE() are incorrect&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3498&quot;&gt;&lt;del&gt;LU-3498&lt;/del&gt;&lt;/a&gt; bug):&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;        rc = PTR_ERR(kthread_run(ll_statahead_thread, parent,
                                 &lt;span class=&quot;code-quote&quot;&gt;&quot;ll_sa_%u&quot;&lt;/span&gt;, plli-&amp;gt;lli_opendir_pid));
...
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (IS_ERR_VALUE(rc)) {
...
                LASSERT(lli-&amp;gt;lli_sai == NULL);
                RETURN(-EAGAIN);
        }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;So with the fix for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3498&quot; title=&quot;most uses of IS_ERR_VALUE() are incorrect&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3498&quot;&gt;&lt;del&gt;LU-3498&lt;/del&gt;&lt;/a&gt;, this code will not be executed unless the thread creation actually fails.  If the thread creation fails, your patched code which does an extra ll_sai_put(sai) will not be executed anyway.&lt;/p&gt;

&lt;p&gt;Unless I&apos;m missing something, I don&apos;t think this patch belongs to this ticket.  It probably is more appropriate to link that patch against &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5274&quot; title=&quot;ll_statahead_thread() may leak parent, uses parent after dput&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5274&quot;&gt;&lt;del&gt;LU-5274&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="92941" author="bobijam" created="Tue, 2 Sep 2014 00:15:48 +0000"  >&lt;p&gt;2.5.1 code has a glitch, kthread_run() returns thread id which could be a big value, and IS_ERR defines (-1000, -1) which is too narrow. 2.5.3 code does not has this issue.&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeHeader panelHeader&quot; style=&quot;border-bottom-width: 1px;&quot;&gt;&lt;b&gt;2.5.1&lt;/b&gt;&lt;/div&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;#define IS_ERR(a) ((unsigned &lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;)(a) &amp;gt; (unsigned &lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;)-1000L)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeHeader panelHeader&quot; style=&quot;border-bottom-width: 1px;&quot;&gt;&lt;b&gt;2.5.3&lt;/b&gt;&lt;/div&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# define IS_ERR_VALUE(x) ((x) &amp;gt;= (unsigned &lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;)-4095)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="22914">LU-4558</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="19526">LU-3498</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="25376">LU-5274</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="14955" name="ddn_lustre_showall-uc1n996_2014-05-15_192030.tar.bz2" size="210590" author="rganesan@ddn.com" created="Fri, 16 May 2014 13:25:52 +0000"/>
                            <attachment id="14954" name="messages_uc1n055" size="22398" author="rganesan@ddn.com" created="Fri, 16 May 2014 13:25:52 +0000"/>
                            <attachment id="14952" name="messages_uc1n059" size="39702" author="rganesan@ddn.com" created="Fri, 16 May 2014 13:25:52 +0000"/>
                            <attachment id="14951" name="messages_uc1n129" size="11106" author="rganesan@ddn.com" created="Fri, 16 May 2014 13:25:52 +0000"/>
                            <attachment id="14950" name="messages_uc1n198" size="177699" author="rganesan@ddn.com" created="Fri, 16 May 2014 13:25:52 +0000"/>
                            <attachment id="14953" name="messages_uc1n468" size="31243" author="rganesan@ddn.com" created="Fri, 16 May 2014 13:25:52 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwmmv:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>14002</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10021"><![CDATA[2]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>