<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:12:08 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-961] lfs df -h hangs</title>
                <link>https://jira.whamcloud.com/browse/LU-961</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;When I try to use &lt;tt&gt;lfs df -h&lt;/tt&gt; the command hangs:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# lfs df -h
UUID                       bytes        Used   Available Use% Mounted on
lustre-MDT0000_UUID       767.8M       35.1M      681.5M   5% /mnt/lustre[MDT:0]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Any ideas why this is hanging?&lt;/p&gt;</description>
                <environment>TCP network</environment>
        <key id="12789">LU-961</key>
            <summary>lfs df -h hangs</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="brian">Brian Murrell</assignee>
                                    <reporter username="brian">Brian Murrell</reporter>
                        <labels>
                    </labels>
                <created>Wed, 4 Jan 2012 12:06:39 +0000</created>
                <updated>Thu, 5 Jan 2012 13:14:07 +0000</updated>
                            <resolved>Thu, 5 Jan 2012 13:13:52 +0000</resolved>
                                    <version>Lustre 2.1.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="25818" author="bpadfield" created="Wed, 4 Jan 2012 16:49:59 +0000"  >&lt;p&gt;Can we get message logs from the MDS and OSS(s)?&lt;/p&gt;</comment>
                            <comment id="25820" author="brian" created="Wed, 4 Jan 2012 16:56:26 +0000"  >&lt;p&gt;I have way too many OSSes to add the logs from all of them.  Can you be more specific about which OSSes you want logs from?&lt;/p&gt;</comment>
                            <comment id="25821" author="bpadfield" created="Wed, 4 Jan 2012 16:57:34 +0000"  >&lt;p&gt;How about the MDS and the OSS that has OST0000?&lt;/p&gt;</comment>
                            <comment id="25822" author="brian" created="Wed, 4 Jan 2012 17:04:55 +0000"  >&lt;p&gt;Find them attached.&lt;/p&gt;

&lt;p&gt;I am just in the process of rebooting my whole cluster to see if the problem is resolved by doing that.&lt;/p&gt;</comment>
                            <comment id="25823" author="brian" created="Wed, 4 Jan 2012 17:12:11 +0000"  >&lt;p&gt;After rebooting things still don&apos;t look good:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;# lfs df -h
UUID &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; bytes &#160; &#160; &#160; &#160;Used &#160; Available Use% Mounted on
lustre-MDT0000_UUID &#160; &#160; &#160; 767.8M &#160; &#160; &#160; 35.1M &#160; &#160; &#160;681.5M &#160; 5% /mnt/lustre[MDT:0]
OST0000 &#160; &#160; &#160; &#160; &#160; &#160; : inactive device
lustre-OST0001_UUID &#160; &#160; &#160;1007.9M &#160; &#160; &#160; 52.2M &#160; &#160; &#160;904.5M &#160; 5% /mnt/lustre[OST:1]

filesystem summary: &#160; &#160; &#160;1007.9M &#160; &#160; &#160; 52.2M &#160; &#160; &#160;904.5M &#160; 5% /mnt/lustre
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="25824" author="mhelmer" created="Wed, 4 Jan 2012 17:25:46 +0000"  >&lt;p&gt;I am seeing the following network related errors in both the MDS and the OSS logs. Can you confirm that your LNET network is up and functioning as expected? Particularly i would like to see if the OSS that you provided logs from can ping the MDS over LNET (i.e. lctl ping &amp;lt;ipofMDS&amp;gt;), and reversely can the MDS ping the OSS?&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Jan  4 10:18:35 mds1 kernel: LustreError: 4047:0:(socklnd.c:2420:ksocknal_base_startup()) Can&apos;t spawn socknal scheduler[0]: -513
Jan  4 10:18:35 mds1 kernel: LustreError: 105-4: Error -100 starting up LNI tcp
Jan  4 10:18:35 mds1 kernel: LustreError: 4047:0:(events.c:728:ptlrpc_init_portals()) network initialisation failed
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="25827" author="brian" created="Wed, 4 Jan 2012 17:45:54 +0000"  >&lt;p&gt;Hi,&lt;/p&gt;

&lt;p&gt;It seems that communication between oss1 and mds2 is working:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[root@oss1 ~]# lctl ping mds2
12345-0@lo
12345-192.168.122.155@tcp
[root@mds2 ~]# lctl ping oss1
12345-0@lo
12345-192.168.122.147@tcp
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I&apos;m noticing the time on the messages that you are referring to.  I must apologize for not mentioning when I attached the logs for not mentioning that the cluster was down for maintenance and testing at that point.  It wasn&apos;t up for production until a few hours later than that so probably looking further in the log might be helpful.&lt;/p&gt;</comment>
                            <comment id="25828" author="mhelmer" created="Wed, 4 Jan 2012 17:52:53 +0000"  >&lt;p&gt;Can you please upload the full MDS and OSS logs following the reboot? It appears the MDS was still coming up from the reboot when the log was captured.&lt;/p&gt;

&lt;p&gt;Thanks!&lt;/p&gt;</comment>
                            <comment id="25879" author="brian" created="Thu, 5 Jan 2012 10:38:55 +0000"  >&lt;p&gt;I&apos;ve attached the full message log from mds2 which is actually the MDS on this filesystem.  I erroneously gave you mds1&apos;s log yesterday.  mds1 is the current MGS.&lt;/p&gt;

&lt;p&gt;I&apos;ve also attached the newer oss1 logs from the point where the previous oss1 log attachment ended.&lt;/p&gt;</comment>
                            <comment id="25890" author="vstephen" created="Thu, 5 Jan 2012 12:02:35 +0000"  >&lt;p&gt;Could we get logs from one of the clients experiencing the hang? Also, is the hang experienced from all clients, only one, or only a certain sub-set of clients within a particular portion of the network?&lt;/p&gt;</comment>
                            <comment id="25894" author="mhelmer" created="Thu, 5 Jan 2012 12:25:04 +0000"  >&lt;p&gt;Here are the client logs&lt;/p&gt;</comment>
                            <comment id="25908" author="brian" created="Thu, 5 Jan 2012 13:13:52 +0000"  >&lt;p&gt;Faulty network discovered.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="10716" name="client1.txt" size="4892" author="mhelmer" created="Thu, 5 Jan 2012 12:25:04 +0000"/>
                            <attachment id="10711" name="mds1_messages.txt" size="1291724" author="brian" created="Wed, 4 Jan 2012 17:04:55 +0000"/>
                            <attachment id="10714" name="mds2_messages.txt" size="694302" author="brian" created="Thu, 5 Jan 2012 10:38:55 +0000"/>
                            <attachment id="10715" name="oss1_messages.txt" size="228631" author="brian" created="Thu, 5 Jan 2012 10:38:55 +0000"/>
                            <attachment id="10712" name="oss1_messages.txt" size="217754" author="brian" created="Wed, 4 Jan 2012 17:04:55 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvhl3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6498</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>