<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:35:29 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-3621] during failover testing, statahead hangs</title>
                <link>https://jira.whamcloud.com/browse/LU-3621</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;KIT has recently been doing failover testing with 2.4.0 clients and 2.1.5 servers. During a tree copy and deletion test, the client hung in the deletion phase with a lot of statahead stack traces, like:&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: INFO: task ll_sa_71331:90417 blocked for more than 120 seconds.&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: ll_sa_71331   D 000000000000001e     0 90417      2 0x00000080&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: ffff881030ce7b10 0000000000000046 ffff881030ce7a80 ffffffff00000050&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: ffff881030ce7ab0 0000000000000246 0000000000000010 ffff88086aac23c0&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: ffff881042c59af8 ffff881030ce7fd8 000000000000fb88 ffff881042c59af8&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: Call Trace:&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81096f8e&amp;gt;&amp;#93;&lt;/span&gt; ? prepare_to_wait+0x4e/0x80&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8119c928&amp;gt;&amp;#93;&lt;/span&gt; __wait_on_freeing_inode+0x98/0xc0&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81096ce0&amp;gt;&amp;#93;&lt;/span&gt; ? wake_bit_function+0x0/0x50&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0b1f630&amp;gt;&amp;#93;&lt;/span&gt; ? ll_test_inode+0x0/0x90 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8119cad4&amp;gt;&amp;#93;&lt;/span&gt; find_inode+0x64/0x90&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0b1f630&amp;gt;&amp;#93;&lt;/span&gt; ? ll_test_inode+0x0/0x90 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8119dc1d&amp;gt;&amp;#93;&lt;/span&gt; ifind+0x4d/0xd0&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0b1f630&amp;gt;&amp;#93;&lt;/span&gt; ? ll_test_inode+0x0/0x90 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8119e099&amp;gt;&amp;#93;&lt;/span&gt; iget5_locked+0x59/0x1b0&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0b1fe00&amp;gt;&amp;#93;&lt;/span&gt; ? ll_set_inode+0x0/0x1a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0b1fb05&amp;gt;&amp;#93;&lt;/span&gt; ll_iget+0x65/0x360 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bf88d3&amp;gt;&amp;#93;&lt;/span&gt; ? lmv_get_lustre_md+0x153/0x3d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lmv&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa076d9a1&amp;gt;&amp;#93;&lt;/span&gt; ? ldlm_revalidate_lock_handle+0x81/0x250 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0b13080&amp;gt;&amp;#93;&lt;/span&gt; ll_prep_inode+0x6e0/0xf60 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c0d8c4&amp;gt;&amp;#93;&lt;/span&gt; ? lmv_revalidate_lock+0x2b4/0x550 &lt;span class=&quot;error&quot;&gt;&amp;#91;lmv&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0b36a47&amp;gt;&amp;#93;&lt;/span&gt; ll_post_statahead+0x2f7/0xa80 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0b3b4b8&amp;gt;&amp;#93;&lt;/span&gt; ll_statahead_thread+0xd38/0xfa0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81063310&amp;gt;&amp;#93;&lt;/span&gt; ? default_wake_function+0x0/0x20&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0b3a780&amp;gt;&amp;#93;&lt;/span&gt; ? ll_statahead_thread+0x0/0xfa0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c0ca&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0b3a780&amp;gt;&amp;#93;&lt;/span&gt; ? ll_statahead_thread+0x0/0xfa0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0b3a780&amp;gt;&amp;#93;&lt;/span&gt; ? ll_statahead_thread+0x0/0xfa0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c0c0&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;/p&gt;

&lt;p&gt;The client hung until the directory was rm&apos;d on a different client, at which point the rm finished on the original client. There don&apos;t appear to be any messages in the server logs, except failover messages. &lt;/p&gt;

&lt;p&gt;Here is a description of the test from KIT:&lt;/p&gt;
&lt;blockquote&gt;

&lt;p&gt;I did 2 failover tests.&lt;/p&gt;

&lt;p&gt;First I rebooted oss2 at Jul 15 14:30. During the reboot the file tree&lt;br/&gt;
was copied multiple times. The delete process started around&lt;br/&gt;
Jul 15 14:35 on ucbn001. The following message corresponds to the&lt;br/&gt;
hanging situation:&lt;br/&gt;
Jul 15 15:37:13 ucbn001 kernel: INFO: task ll_sa_71331:90417 blocked for &lt;br/&gt;
more than 120 seconds.&lt;br/&gt;
Ctrl+C did not stop the rm process. Around Jul 15 15:44 I deleted the&lt;br/&gt;
rest of the file tree on ucbn006 and the rm process completed.&lt;/p&gt;

&lt;p&gt;Second I started all tests again and rebooted mds2 near Jul 15 15:55.&lt;br/&gt;
This time the delete process started around Jul 15 16:35 on ucbn001.&lt;br/&gt;
The following message corresponds to the hanging situation:&lt;br/&gt;
Jul 15 16:37:13 ucbn001 kernel: INFO: task rm:91092 blocked for more &lt;br/&gt;
than 120 seconds.&lt;br/&gt;
This time I waited for half an hour but the rm did not complete.&lt;br/&gt;
Around Jul 15 17:00 I deleted the rest of the file tree on ucbn006&lt;br/&gt;
and the rm process completed.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Are there any debug logs we should try to get?&lt;/p&gt;

&lt;p&gt;Thanks.&lt;/p&gt;</description>
                <environment>RHEL 6.4 clients</environment>
        <key id="19951">LU-3621</key>
            <summary>during failover testing, statahead hangs</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="rganesan@ddn.com">Rajeshwaran Ganesan</reporter>
                        <labels>
                    </labels>
                <created>Tue, 23 Jul 2013 15:19:39 +0000</created>
                <updated>Mon, 8 Aug 2016 17:01:38 +0000</updated>
                            <resolved>Mon, 8 Aug 2016 16:48:30 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                    <version>Lustre 2.1.5</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="62871" author="pjones" created="Wed, 24 Jul 2013 01:51:58 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Could you please help with this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="63025" author="kitwestneat" created="Fri, 26 Jul 2013 13:28:39 +0000"  >&lt;p&gt;I was wondering what the status of this was. Is there anything else we should get?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;</comment>
                            <comment id="63032" author="bobijam" created="Fri, 26 Jul 2013 14:12:18 +0000"  >&lt;p&gt;Can you get all processes stack on ucbn001?&lt;/p&gt;</comment>
                            <comment id="63033" author="kitwestneat" created="Fri, 26 Jul 2013 14:18:48 +0000"  >&lt;p&gt;Hi Xu,&lt;/p&gt;

&lt;p&gt;I have asked the customer to reproduce the issue and capture sysrq-t.&lt;/p&gt;

&lt;p&gt;Thanks.&lt;/p&gt;</comment>
                            <comment id="161136" author="chunteraa" created="Mon, 8 Aug 2016 16:45:57 +0000"  >&lt;p&gt;Please close this case. Customer upgraded to b2.5; unable to reproduce after upgrade.&lt;/p&gt;
</comment>
                            <comment id="161137" author="pjones" created="Mon, 8 Aug 2016 16:48:30 +0000"  >&lt;p&gt;ok - thanks Chris&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="13227" name="mds1.kern" size="4126" author="kitwestneat" created="Tue, 23 Jul 2013 15:19:39 +0000"/>
                            <attachment id="13226" name="mds2.kern" size="4135" author="kitwestneat" created="Tue, 23 Jul 2013 15:19:39 +0000"/>
                            <attachment id="13225" name="oss1.kern" size="4513" author="kitwestneat" created="Tue, 23 Jul 2013 15:19:39 +0000"/>
                            <attachment id="13224" name="oss2.kern" size="5692" author="kitwestneat" created="Tue, 23 Jul 2013 15:19:39 +0000"/>
                            <attachment id="13223" name="ucbn001.log" size="30392" author="kitwestneat" created="Tue, 23 Jul 2013 15:19:39 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10490" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>End date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Fri, 9 May 2014 15:19:39 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvvxz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9320</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10493" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>Start date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Tue, 23 Jul 2013 15:19:39 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    </customfields>
    </item>
</channel>
</rss>