<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:17:22 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-1520] client fails MDS connection and stack threads on another client</title>
                <link>https://jira.whamcloud.com/browse/LU-1520</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;An client (cluster1) failed connection to MDS and recovered, but failed connection again by some reasons.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Jun 11 11:28:45 cluster1 kernel: Lustre: 30906:0:(client.c:1487:ptlrpc_expire_one_request()) @@@ Request x1402727385081248 sent from lustre-MDT0000-mdc-
ffff880c06249800 to NID 192.168.3.45@o2ib 995s ago has timed out (995s prior to deadline).
Jun 11 11:28:45 cluster1 kernel:  req@ffff880293aaf800 x1402727385081248/t0 o101-&amp;gt;lustre-MDT0000_UUID@192.168.3.45@o2ib:12/10 lens 560/1616 e 3 to 1 dl 
1339381725 ref 1 fl Rpc:/0/0 rc 0/0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;few hours later, call traces showed up on another client (cluster3).&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Jun 11 15:03:10 cluster3 kernel: Call Trace:
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffff814dbcd5&amp;gt;] schedule_timeout+0x215/0x2e0
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa086808d&amp;gt;] ? lustre_msg_early_size+0x6d/0x70 [ptlrpc]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa0996244&amp;gt;] ? mdc_intent_open_pack+0x364/0x530 [mdc]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffff8115a1ae&amp;gt;] ? cache_alloc_refill+0x9e/0x240
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffff814dcbf2&amp;gt;] __down+0x72/0xb0
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffff81093f61&amp;gt;] down+0x41/0x50
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa0997173&amp;gt;] mdc_enqueue+0x283/0xa20 [mdc]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa081fbef&amp;gt;] ? __ldlm_handle2lock+0x9f/0x3d0 [ptlrpc]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa081fbef&amp;gt;] ? __ldlm_handle2lock+0x9f/0x3d0 [ptlrpc]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa09987d2&amp;gt;] mdc_intent_lock+0x102/0x440 [mdc]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa0853e90&amp;gt;] ? ptlrpc_req_finished+0x10/0x20 [ptlrpc]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa0a431a5&amp;gt;] ? ll_lookup_it+0x405/0x870 [lustre]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa0a40490&amp;gt;] ? ll_mdc_blocking_ast+0x0/0x5f0 [lustre]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa0a402ee&amp;gt;] ? ll_prepare_mdc_op_data+0xbe/0x120 [lustre]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa0a40490&amp;gt;] ? ll_mdc_blocking_ast+0x0/0x5f0 [lustre]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa083f770&amp;gt;] ? ldlm_completion_ast+0x0/0x8a0 [ptlrpc]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa0a402ee&amp;gt;] ? ll_prepare_mdc_op_data+0xbe/0x120 [lustre]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa0a430b5&amp;gt;] ll_lookup_it+0x315/0x870 [lustre]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa0a40490&amp;gt;] ? ll_mdc_blocking_ast+0x0/0x5f0 [lustre]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa06f97c1&amp;gt;] ? cfs_alloc+0x91/0xf0 [libcfs]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffffa0a43ac8&amp;gt;] ll_lookup_nd+0x88/0x470 [lustre]
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffff8118ad4e&amp;gt;] ? d_alloc+0x13e/0x1b0
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffff81181c02&amp;gt;] __lookup_hash+0x102/0x160
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffff81181d3a&amp;gt;] lookup_hash+0x3a/0x50
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffff81182768&amp;gt;] do_filp_open+0x2c8/0xd90
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffff8118f1e2&amp;gt;] ? alloc_fd+0x92/0x160
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffff8116f989&amp;gt;] do_sys_open+0x69/0x140
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffff8116faa0&amp;gt;] sys_open+0x20/0x30
Jun 11 15:03:10 cluster3 kernel: [&amp;lt;ffffffff8100b172&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I will upload the all log files soon.&lt;/p&gt;</description>
                <environment></environment>
        <key id="14918">LU-1520</key>
            <summary>client fails MDS connection and stack threads on another client</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="2">Won&apos;t Fix</resolution>
                                        <assignee username="hongchao.zhang">Hongchao Zhang</assignee>
                                    <reporter username="ihara">Shuichi Ihara</reporter>
                        <labels>
                    </labels>
                <created>Thu, 14 Jun 2012 03:11:18 +0000</created>
                <updated>Fri, 29 Apr 2016 00:24:59 +0000</updated>
                            <resolved>Fri, 29 Apr 2016 00:24:59 +0000</resolved>
                                    <version>Lustre 1.8.7</version>
                                    <fixVersion>Lustre 1.8.9</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="40547" author="ihara" created="Thu, 14 Jun 2012 03:12:47 +0000"  >&lt;p&gt;all log files on /uploads/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1520&quot; title=&quot;client fails MDS connection and stack threads on another client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1520&quot;&gt;&lt;del&gt;LU-1520&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Thanks!&lt;/p&gt;</comment>
                            <comment id="40567" author="pjones" created="Thu, 14 Jun 2012 10:41:52 +0000"  >&lt;p&gt;Hi Hongchao&lt;/p&gt;

&lt;p&gt;Could you please look into this issue?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="40639" author="hongchao.zhang" created="Fri, 15 Jun 2012 06:09:13 +0000"  >&lt;p&gt;the call traces seen in cluster3 is caused for waiting the rpc_lock in mdc_enqueue, which is the result of the bad performance&lt;br/&gt;
of mds(mds01), it runs low of memory and could be more damaged by ldlm_pools_shrink(BZ24419 or &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-607&quot; title=&quot;port bz24419 (ldlm namespace lock contention during oom)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-607&quot;&gt;&lt;del&gt;LU-607&lt;/del&gt;&lt;/a&gt;?).&lt;/p&gt;
</comment>
                            <comment id="40725" author="ihara" created="Mon, 18 Jun 2012 03:00:12 +0000"  >&lt;p&gt;Hongchao,&lt;/p&gt;

&lt;p&gt;It seems &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-607&quot; title=&quot;port bz24419 (ldlm namespace lock contention during oom)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-607&quot;&gt;&lt;del&gt;LU-607&lt;/del&gt;&lt;/a&gt; was landed in b1_8 once, but the patches were reverted by Johann.&lt;br/&gt;
&lt;a href=&quot;http://git.whamcloud.com/?p=fs/lustre-release.git;a=commit;h=447794d5ebb71dbd39d7378944c3c9eeb230f8d0&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://git.whamcloud.com/?p=fs/lustre-release.git;a=commit;h=447794d5ebb71dbd39d7378944c3c9eeb230f8d0&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Any reason why this reverted?&lt;br/&gt;
Also, the patches for BZ24419 was not landed in -wc branch. &lt;/p&gt;

&lt;p&gt;Which patches worth to try?&lt;/p&gt;

&lt;p&gt;Please advise.&lt;/p&gt;

&lt;p&gt;Ihara&lt;/p&gt;</comment>
                            <comment id="40825" author="ihara" created="Tue, 19 Jun 2012 01:21:26 +0000"  >&lt;p&gt;Just read &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-607&quot; title=&quot;port bz24419 (ldlm namespace lock contention during oom)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-607&quot;&gt;&lt;del&gt;LU-607&lt;/del&gt;&lt;/a&gt; again and it seems the patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-607&quot; title=&quot;port bz24419 (ldlm namespace lock contention during oom)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-607&quot;&gt;&lt;del&gt;LU-607&lt;/del&gt;&lt;/a&gt; introduces the regressions and reverted.&lt;br/&gt;
So, this problem is not fixed even yet.. &lt;br/&gt;
we are seeing couple of times similar situation in month. Please investigate to avoid this problem.. &lt;/p&gt;</comment>
                            <comment id="41133" author="hongchao.zhang" created="Tue, 26 Jun 2012 09:01:46 +0000"  >&lt;p&gt;Hi, Ihara&lt;/p&gt;

&lt;p&gt;sorry for delayed response! &lt;br/&gt;
the patch ported from BZ24419 is reverted for it causes a LASSERT. the patch tracked at BZ24419 is to improve&lt;br/&gt;
the performance of shrinking of LDLM, which could mitigate this issue. will port and test the newest patch in BZ24419&lt;br/&gt;
to check whether it can fix the issue.&lt;/p&gt;</comment>
                            <comment id="41262" author="ihara" created="Thu, 28 Jun 2012 12:25:31 +0000"  >&lt;p&gt;Hi Hongchao,&lt;br/&gt;
do you mind if you can port patch to b1_8, please?&lt;/p&gt;</comment>
                            <comment id="41292" author="hongchao.zhang" created="Thu, 28 Jun 2012 23:58:11 +0000"  >&lt;p&gt;Hi Ihara&lt;br/&gt;
Okay, will port it to b1_8&lt;/p&gt;</comment>
                            <comment id="41457" author="hongchao.zhang" created="Wed, 4 Jul 2012 09:56:22 +0000"  >&lt;p&gt;the patch is tracked at &lt;a href=&quot;http://review.whamcloud.com/#change,3270&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,3270&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="41731" author="ihara" created="Wed, 11 Jul 2012 23:51:25 +0000"  >&lt;p&gt;Thanks!&lt;br/&gt;
does it need to review by someone before we try to test this patches?&lt;/p&gt;</comment>
                            <comment id="41993" author="ihara" created="Thu, 19 Jul 2012 03:37:39 +0000"  >&lt;p&gt;Hongchao, we will be applying backported patches at the customer site, but before apply them, I wonder if someone could review it.. Please.. &lt;/p&gt;

&lt;p&gt;Ihara&lt;/p&gt;</comment>
                            <comment id="41994" author="hongchao.zhang" created="Thu, 19 Jul 2012 03:50:27 +0000"  >&lt;p&gt;oh, yes, sorry!! I missed your previous comment, I&apos;ll do it right now.&lt;/p&gt;</comment>
                            <comment id="43926" author="kitwestneat" created="Wed, 29 Aug 2012 03:18:19 +0000"  >&lt;p&gt;Hello, I was wondering what the status was of this patch. It appears that there were some suggested changes, were those ever done? Can we get this landed?&lt;/p&gt;</comment>
                            <comment id="44094" author="hongchao.zhang" created="Mon, 3 Sep 2012 07:08:42 +0000"  >&lt;p&gt;Hi, the updated patch is under creation &amp;amp; test, and will upload it soon&lt;/p&gt;</comment>
                            <comment id="44134" author="hongchao.zhang" created="Tue, 4 Sep 2012 11:28:20 +0000"  >&lt;p&gt;the updated patch has been pushed, &lt;a href=&quot;http://review.whamcloud.com/#change,3270&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,3270&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="44781" author="kitwestneat" created="Thu, 13 Sep 2012 08:34:33 +0000"  >&lt;p&gt;Hello, can we get an update? It looks like it has two +1 reviews, can it be landed? Thanks, Kit&lt;/p&gt;</comment>
                            <comment id="44789" author="pjones" created="Thu, 13 Sep 2012 09:09:53 +0000"  >&lt;p&gt;Kit&lt;/p&gt;

&lt;p&gt;The b1_8 version is ready to land but the master version is still being worked &lt;a href=&quot;http://review.whamcloud.com/#change,3859&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,3859&lt;/a&gt;. We try to land to master first so as to avoid deltas between 1.8.x and 2.x arising.&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="44793" author="kitwestneat" created="Thu, 13 Sep 2012 09:50:28 +0000"  >&lt;p&gt;Ah ok, thanks.&lt;/p&gt;</comment>
                            <comment id="44921" author="ihara" created="Fri, 14 Sep 2012 23:15:31 +0000"  >&lt;p&gt;Peter, &lt;br/&gt;
Thanks for clarifying on this, but nobody listed on &lt;a href=&quot;http://review.whamcloud.com/#change,3859&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,3859&lt;/a&gt; as reviewer.&lt;br/&gt;
I really want this patches to review and land, land into b1_8 as well. otherwise we need to apply patches to top of b1_8 to solve current customer issue..&lt;/p&gt;</comment>
                            <comment id="45604" author="kitwestneat" created="Wed, 26 Sep 2012 17:16:46 +0000"  >&lt;p&gt;It looks like the Maloo testing hit &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-479&quot; title=&quot;Test failure on test suite sanity, subtest test_124a&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-479&quot;&gt;&lt;del&gt;LU-479&lt;/del&gt;&lt;/a&gt;?&lt;/p&gt;</comment>
                            <comment id="46557" author="hongchao.zhang" created="Mon, 15 Oct 2012 06:11:42 +0000"  >&lt;p&gt;there is a bug in the previous patch, which causes sanity subtest 124a failed.&lt;br/&gt;
the updated patch has been pushed to Gerrit&lt;/p&gt;</comment>
                            <comment id="46566" author="kitwestneat" created="Mon, 15 Oct 2012 07:31:45 +0000"  >&lt;p&gt;Hi Hongchao, &lt;br/&gt;
Will the 1.8 version need to be updated too?&lt;/p&gt;</comment>
                            <comment id="46608" author="hongchao.zhang" created="Mon, 15 Oct 2012 23:28:45 +0000"  >&lt;p&gt;no, the 1.8.x version has no such problem.&lt;/p&gt;</comment>
                            <comment id="55755" author="kitwestneat" created="Mon, 8 Apr 2013 15:30:45 +0000"  >&lt;p&gt;any updates on the master port of this patch? We are carrying the 1.8.x version in our 1.8.9 build, but it would be nice to integrate it in the Intel version. The last update on the changeset was in October.&lt;/p&gt;</comment>
                            <comment id="127486" author="simmonsja" created="Wed, 16 Sep 2015 14:38:03 +0000"  >&lt;p&gt;Really old ticket. Peter we should close this as well.&lt;/p&gt;</comment>
                            <comment id="127499" author="pjones" created="Wed, 16 Sep 2015 15:20:38 +0000"  >&lt;p&gt;Probably we should defer to DDN on that?&lt;/p&gt;</comment>
                            <comment id="147524" author="jfc" created="Fri, 1 Apr 2016 00:40:57 +0000"  >&lt;p&gt;Hello Ihara,&lt;/p&gt;

&lt;p&gt;Do you want us to keep this open, or can we go ahead and mark it as resolved/won&apos;t fix?&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
~ jfc.&lt;/p&gt;</comment>
                            <comment id="150521" author="jfc" created="Fri, 29 Apr 2016 00:24:59 +0000"  >&lt;p&gt;Hello Ihara,&lt;/p&gt;

&lt;p&gt;We have marked this as Resolved/Won&apos;t fix.&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
~ jfc.&lt;/p&gt;
</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="29728">LU-6529</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvncf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>7592</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>