<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:43:15 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-4498] MDT thread hung, ls fails on directory</title>
                <link>https://jira.whamcloud.com/browse/LU-4498</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;IU is running into an issue where running ls on a certain file causes clients to get evicted. It appears as if there is a hung MDT thread holding a lock on the file. After the MDT is rebooted, listing the directory and file works fine.&lt;/p&gt;

&lt;p&gt;We were able to capture client debug logs and a backtrace of all the threads from the running system, but due to an issue with STONITH, we were unable to get a good vmcore from the system. Also when we tried to get debug logs from the MDT, the log overflowed, even with a 20GB buffer. &lt;/p&gt;

&lt;p&gt;We are currently waiting for the issue to reappear and will get debug logs on a quiesced system, as well as a good vmcore. &lt;/p&gt;

&lt;p&gt;I&apos;ll upload the logs we have. Is there anything else we should be looking to get?&lt;/p&gt;</description>
                <environment></environment>
        <key id="22763">LU-4498</key>
            <summary>MDT thread hung, ls fails on directory</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="kitwestneat">Kit Westneat</reporter>
                        <labels>
                    </labels>
                <created>Thu, 16 Jan 2014 16:08:22 +0000</created>
                <updated>Fri, 21 Mar 2014 12:30:34 +0000</updated>
                            <resolved>Fri, 21 Mar 2014 12:30:34 +0000</resolved>
                                    <version>Lustre 2.1.6</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="75090" author="kitwestneat" created="Thu, 16 Jan 2014 16:22:20 +0000"  >&lt;p&gt;Here&apos;s an example of the directory listing. I forgot to mention it takes hours for the MDT to actually evict the client. &lt;/p&gt;

&lt;p&gt;ls -al&lt;br/&gt;
/N/dc2/scratch/kmoriya/bggen_8.4_9.0-2013-12-11/0000/000100&lt;br/&gt;
&amp;#8212;&lt;br/&gt;
Begin 2013-12-20_13:37:32&lt;br/&gt;
total 164&lt;br/&gt;
drwxr-xr-x  34 1012412 401  4096 Dec 11 20:59 .&lt;br/&gt;
drwxr-xr-x 202 1012412 401 36864 Dec 16 09:32 ..&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:23 00003200&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003201&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003202&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003203&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003204&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003205&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003206&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003207&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003208&lt;br/&gt;
?---------   ? ?       ?       ?            ? 00003209&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003210&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003211&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003212&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003213&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003214&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003215&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003216&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003217&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003218&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003219&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003220&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003221&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003222&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003223&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003224&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003225&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003226&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003227&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003228&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003229&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 13:24 00003230&lt;br/&gt;
drwxr-xr-x   2 1012412 401  4096 Dec 12 07:42 00003231&lt;br/&gt;
End 2013-12-20_15:44:49&lt;/p&gt;</comment>
                            <comment id="75100" author="cliffw" created="Thu, 16 Jan 2014 17:20:48 +0000"  >&lt;p&gt;Is there anything else you can tell us about the &apos;certain file&apos; ? &lt;br/&gt;
Is it always the same file, or type of file?&lt;br/&gt;
Have you run fsck on the MDT lately? &lt;/p&gt;</comment>
                            <comment id="75188" author="kitwestneat" created="Fri, 17 Jan 2014 15:34:41 +0000"  >&lt;p&gt;It&apos;s happened three times on seemingly unrelated files. It looks like the last e2fsck was on June 12. FWIW there don&apos;t seem to be any ldisk errors in the logs anytime recently.&lt;/p&gt;</comment>
                            <comment id="75209" author="pjones" created="Fri, 17 Jan 2014 19:16:31 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Could you please help with this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="75257" author="bobijam" created="Mon, 20 Jan 2014 05:18:58 +0000"  >&lt;p&gt;Fanyong,&lt;/p&gt;

&lt;p&gt;does it look like dir hash collision issue?&lt;/p&gt;</comment>
                            <comment id="75269" author="yong.fan" created="Mon, 20 Jan 2014 14:37:33 +0000"  >&lt;p&gt;According to the client side log, every getattr RPC is for different file, so it cannot to say that the &quot;ls&quot; fall into hash collision. On the other hand, the MDT side shows that during the &quot;ls&quot; processing, there are some &quot;open-create&quot; operations. What they are for?&lt;/p&gt;</comment>
                            <comment id="75287" author="bobijam" created="Mon, 20 Jan 2014 17:31:18 +0000"  >&lt;p&gt;the client log shows that ls happened during 2014/01/09 23:46:11 to 2014/01/09 23:46:12, which took 1 seconds and does not match to the 1st comment report (from 2013-12-20_13:37:32 to 2013-12-20_15:44:49). Do you have logs which covers the issue time span?&lt;/p&gt;</comment>
                            <comment id="75342" author="bobijam" created="Tue, 21 Jan 2014 09:52:11 +0000"  >&lt;p&gt;would you mind trying this patch &lt;a href=&quot;http://review.whamcloud.com/8936&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/8936&lt;/a&gt; , it is a backport of dir hash collision fix patch.&lt;/p&gt;</comment>
                            <comment id="79935" author="pjones" created="Thu, 20 Mar 2014 21:39:12 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Could this be related to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4616&quot; title=&quot;ls or client eviction issues (ldlm_lockd.c:642:ldlm_handle_ast_error()) ### client (nid 979@gni) returned -22 from blocking AST &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4616&quot;&gt;&lt;del&gt;LU-4616&lt;/del&gt;&lt;/a&gt;, the root cause of which has now been established?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="79958" author="bobijam" created="Fri, 21 Mar 2014 01:14:11 +0000"  >&lt;p&gt;yes, I think it&apos;s related to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4616&quot; title=&quot;ls or client eviction issues (ldlm_lockd.c:642:ldlm_handle_ast_error()) ### client (nid 979@gni) returned -22 from blocking AST &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4616&quot;&gt;&lt;del&gt;LU-4616&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="79984" author="pjones" created="Fri, 21 Mar 2014 12:30:34 +0000"  >&lt;p&gt;ok so then let&apos;s mark it as a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4616&quot; title=&quot;ls or client eviction issues (ldlm_lockd.c:642:ldlm_handle_ast_error()) ### client (nid 979@gni) returned -22 from blocking AST &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4616&quot;&gt;&lt;del&gt;LU-4616&lt;/del&gt;&lt;/a&gt; and reopen or open a new ticket if it manifests itself again now that &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4616&quot; title=&quot;ls or client eviction issues (ldlm_lockd.c:642:ldlm_handle_ast_error()) ### client (nid 979@gni) returned -22 from blocking AST &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4616&quot;&gt;&lt;del&gt;LU-4616&lt;/del&gt;&lt;/a&gt; seems to have been addressed.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="13992" name="2014-01-09-run4-client.out" size="2282676" author="kitwestneat" created="Thu, 16 Jan 2014 16:20:23 +0000"/>
                            <attachment id="13991" name="bt.tgz" size="318722" author="kitwestneat" created="Thu, 16 Jan 2014 16:20:23 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwcyn:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>12303</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>