<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:53:03 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5617] MDS hang and would like to know the cause</title>
                <link>https://jira.whamcloud.com/browse/LU-5617</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;We had MDS hanging this morning, to a point where signing on server was denied. We had to power cycling the server in order to regain access to it.&lt;/p&gt;

&lt;p&gt;I am uploading &quot;/var/log/messages&quot; and 2 kernel trace dumping.&lt;br/&gt;
I would need you help in interpreting from these logs and let me know &lt;br/&gt;
where is likely the problem, the Lustre, networking, client overloading, and etc.&lt;/p&gt;

&lt;p&gt;thanks  &lt;/p&gt;</description>
                <environment>Linux monkey-mds-10-3.local 2.6.32-358.23.2.el6_lustre.x86_64 #1 SMP Thu Dec 19 19:57:45 PST 2013 x86_64 x86_64 x86_64 GNU/Linux&lt;br/&gt;
</environment>
        <key id="26518">LU-5617</key>
            <summary>MDS hang and would like to know the cause</summary>
                <type id="9" iconUrl="https://jira.whamcloud.com/images/icons/issuetypes/undefined.png">Question/Request</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="niu">Niu Yawei</assignee>
                                    <reporter username="haisong">Haisong Cai</reporter>
                        <labels>
                    </labels>
                <created>Fri, 12 Sep 2014 16:46:53 +0000</created>
                <updated>Mon, 21 Nov 2016 03:34:10 +0000</updated>
                            <resolved>Mon, 21 Nov 2016 03:34:10 +0000</resolved>
                                    <version>Lustre 2.4.2</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="93902" author="cliffw" created="Fri, 12 Sep 2014 22:55:31 +0000"  >&lt;p&gt;It appears from the MDS log that something was happening prior to the start of the log you attached. Can we get the log from the MDS for the 24 hours prior to the first log? &lt;br/&gt;
Were there any indications of network errors? &lt;/p&gt;</comment>
                            <comment id="93903" author="haisong" created="Fri, 12 Sep 2014 23:05:16 +0000"  >&lt;p&gt;Hi Cliff,&lt;/p&gt;

&lt;p&gt;I am including /var/log/messages for MDS since last log rotation (Sept 7) here.&lt;br/&gt;
For both Sept 10 and Sept 11, there were nothing logged.&lt;/p&gt;</comment>
                            <comment id="93932" author="pjones" created="Sat, 13 Sep 2014 14:42:23 +0000"  >&lt;p&gt;Niu&lt;/p&gt;

&lt;p&gt;Is there anything that you can determine from the information provided?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="93958" author="niu" created="Mon, 15 Sep 2014 04:06:04 +0000"  >&lt;p&gt;I saw lots of page allocation failures, looks your system is running short of memory, the reason is unclear to me, but it can be alleviated by tuning the vm parameters:&lt;/p&gt;

&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;Increasing the vm.min_free_kbytes;&lt;/li&gt;
	&lt;li&gt;Set the vm.zone_reclaim_mode to 1;&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="93959" author="haisong" created="Mon, 15 Sep 2014 04:35:38 +0000"  >
&lt;p&gt;Hi Niu,&lt;/p&gt;

&lt;p&gt;Can you recommend a value to set for vm.min_free_kbytes? &lt;br/&gt;
Our MDS has 24GB RAM.&lt;/p&gt;

&lt;p&gt;thanks,&lt;br/&gt;
Haisong&lt;/p&gt;</comment>
                            <comment id="93961" author="niu" created="Mon, 15 Sep 2014 07:33:53 +0000"  >&lt;blockquote&gt;
&lt;p&gt;Can you recommend a value to set for vm.min_free_kbytes? &lt;br/&gt;
Our MDS has 24GB RAM.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;What&apos;s the current value? I don&apos;t have experience on tuning these values, I think you need to try some bigger value and see how it works. (but don&apos;t set it too large, probably less than 5% of total memory?)&lt;/p&gt;</comment>
                            <comment id="174421" author="niu" created="Mon, 21 Nov 2016 03:34:10 +0000"  >&lt;p&gt;It because the system was running out of memory, it could be caused by &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5726&quot; title=&quot;MDS buffer not freed when deleting files&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5726&quot;&gt;&lt;del&gt;LU-5726&lt;/del&gt;&lt;/a&gt;. Dup of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5726&quot; title=&quot;MDS buffer not freed when deleting files&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5726&quot;&gt;&lt;del&gt;LU-5726&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="15734" name="lustre-log.1410510357.3995.gz" size="253" author="haisong" created="Fri, 12 Sep 2014 16:46:53 +0000"/>
                            <attachment id="15735" name="lustre-log.1410524785.3872.gz" size="253" author="haisong" created="Fri, 12 Sep 2014 16:46:53 +0000"/>
                            <attachment id="15736" name="monkey-mds-10-3.messages.gz" size="1190244" author="haisong" created="Fri, 12 Sep 2014 16:46:53 +0000"/>
                            <attachment id="15737" name="monkey-mds-10-3_messages_all.gz" size="1192403" author="haisong" created="Fri, 12 Sep 2014 23:05:11 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10490" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>End date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Mon, 15 Sep 2014 16:46:53 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzww5b:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>15712</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                <customfield id="customfield_10493" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>Start date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Fri, 12 Sep 2014 16:46:53 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    </customfields>
    </item>
</channel>
</rss>