<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:42:58 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-11334] LNet health check failing with ksocknal_tx_done()) tx failure rc = -113, hstatus = 2</title>
                <link>https://jira.whamcloud.com/browse/LU-11334</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;While tempting to run the latest lustre I see the following errors:&lt;/p&gt;

&lt;p&gt;66851.686185] LNet: Added LNI 172.30.224.9@tcp &lt;span class=&quot;error&quot;&gt;&amp;#91;8/256/0/180&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;66851.686267&amp;#93;&lt;/span&gt; LNet: Accept secure, port 988&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;66851.760204&amp;#93;&lt;/span&gt; LNetError: 50758:0:(socklnd_cb.c:414:ksocknal_tx_done()) tx failure rc = -113, hstatus = 2&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;66851.760533&amp;#93;&lt;/span&gt; LNetError: 50789:0:(lib-msg.c:794:lnet_is_health_check()) Msg is in inconsistent state, don&apos;t perform health checking (-5, 0)&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;66912.155561&amp;#93;&lt;/span&gt; LustreError: 15f-b: lustre-MDT0000: cannot register this server with the MGS: rc = -110. Is the MGS running?&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;66912.190968&amp;#93;&lt;/span&gt; LustreError: 50624:0:(obd_mount_server.c:1939:server_fill_super()) Unable to start targets: -110&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;66912.191191&amp;#93;&lt;/span&gt; LustreError: 50624:0:(obd_mount_server.c:1589:server_put_super()) no obd lustre-MDT0000&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;66912.191277&amp;#93;&lt;/span&gt; LustreError: 50624:0:(obd_mount_server.c:132:server_deregister_mount()) lustre-MDT0000 not registered&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;66912.193716&amp;#93;&lt;/span&gt; Lustre: server umount lustre-MDT0000 complete&lt;/p&gt;</description>
                <environment>Latest tip of lustre-release using tcp for LNet</environment>
        <key id="53215">LU-11334</key>
            <summary>LNet health check failing with ksocknal_tx_done()) tx failure rc = -113, hstatus = 2</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="ashehata">Amir Shehata</assignee>
                                    <reporter username="simmonsja">James A Simmons</reporter>
                        <labels>
                    </labels>
                <created>Wed, 5 Sep 2018 14:31:38 +0000</created>
                <updated>Wed, 19 Dec 2018 21:13:47 +0000</updated>
                            <resolved>Thu, 6 Sep 2018 01:08:42 +0000</resolved>
                                    <version>Lustre 2.12.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>2</watches>
                                                                            <comments>
                            <comment id="233057" author="ashehata" created="Wed, 5 Sep 2018 16:20:28 +0000"  >&lt;p&gt;I believe this patch should resolve the logging you&apos;re seeing:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://review.whamcloud.com/#/c/33096/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/33096/&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;However there does seem to be a legitimate problem that&apos;s causing the connection to fail with -EHOSTUNREACH&lt;/p&gt;</comment>
                            <comment id="233060" author="simmonsja" created="Wed, 5 Sep 2018 17:40:09 +0000"  >&lt;p&gt;I can do a normal ping and it works so the network is reachable. You are right lnetctl ping doesn&apos;t so lnet is unable to reach the MGS server.&lt;/p&gt;

&lt;p&gt;00000400:00000080:76.0F:1536169212.948392:1584:88313:0:(module.c:120:libcfs_ioctl()) libcfs ioctl cmd 3221775678&lt;/p&gt;

&lt;p&gt;00000400:00000100:55.0:1536169212.948714:1744:50759:0:(lib-socket.c:600:lnet_sock_connect()) Error -113 connecting 0.0.0.0/1023 -&amp;gt; 172.30.224.8/988&lt;/p&gt;

&lt;p&gt;00000400:00000100:55.0:1536169212.948724:1744:50759:0:(acceptor.c:112:lnet_connect_console_error()) Connection to 172.30.224.8@tcp at host 172.30.224.8 was unreachable: the network or that node may be down, or Lustre may be misconfigured.&lt;/p&gt;

&lt;p&gt;00000800:00000100:55.0:1536169212.948729:1680:50759:0:(socklnd_cb.c:435:ksocknal_txlist_done()) Deleting packet type 2 len 0 172.30.224.9@tcp-&amp;gt;172.30.224.8@tcp&lt;/p&gt;

&lt;p&gt;00000800:00020000:55.0:1536169212.948731:1872:50759:0:(socklnd_cb.c:414:ksocknal_tx_done()) tx failure rc = -113, hstatus = 2&lt;/p&gt;

&lt;p&gt;00000400:00000100:55.0:1536169212.948734:2352:50759:0:(lib-msg.c:719:lnet_health_check()) msg 0@&amp;lt;0:0&amp;gt;-&amp;gt;172.30.224.8@tcp exceeded retry count 0&lt;/p&gt;</comment>
                            <comment id="233063" author="ashehata" created="Wed, 5 Sep 2018 18:21:59 +0000"  >&lt;p&gt;health is off by default. So it won&apos;t try to resend. that&apos;s what the &quot;exceeded retry count 0&quot; means.&lt;/p&gt;

&lt;p&gt;It looks like maybe the 988 port is blocked?&lt;/p&gt;</comment>
                            <comment id="233079" author="simmonsja" created="Thu, 6 Sep 2018 01:08:42 +0000"  >&lt;p&gt;Duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11309&quot; title=&quot;LNet Health: clean up debug messages&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11309&quot;&gt;&lt;del&gt;LU-11309&lt;/del&gt;&lt;/a&gt;. &lt;/p&gt;

&lt;p&gt;The port was also blocked.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i001tz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>