<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:07:46 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-14206] Router ping timeouts don&apos;t mark routes down if DD is disabled</title>
                <link>https://jira.whamcloud.com/browse/LU-14206</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Discovery pings are used to determine the health of gateways and&lt;br/&gt;
associated routes. Ping replies from gateways with dynamic discovery&lt;br/&gt;
(DD) disabled (or if DD is disabled locally) are handled in&lt;br/&gt;
a special routine, lnet_router_discovery_ping_reply(), but this&lt;br/&gt;
function and related code doesn&apos;t handle the case where a discovery&lt;br/&gt;
ping hits the response tracker timeout and is unlinked by the&lt;br/&gt;
monitor thread. In this case, an UNLINK event is generated and we&lt;br/&gt;
do not call the lnet_router_discovery_ping_reply(). For gateways&lt;br/&gt;
with DD enabled (and DD enabled locally), we handle this case&lt;br/&gt;
in lnet_router_discovery_copmlete(). If discovery failed then&lt;br/&gt;
lp_dc_error is set and we mark all routes down for the gateway. We&lt;br/&gt;
can simply extend this logic to the case of gateways w/DD disabled&lt;br/&gt;
(or DD disabled locally).&lt;/p&gt;</description>
                <environment></environment>
        <key id="61935">LU-14206</key>
            <summary>Router ping timeouts don&apos;t mark routes down if DD is disabled</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="hornc">Chris Horn</assignee>
                                    <reporter username="hornc">Chris Horn</reporter>
                        <labels>
                    </labels>
                <created>Wed, 9 Dec 2020 21:07:48 +0000</created>
                <updated>Tue, 30 Aug 2022 01:01:47 +0000</updated>
                            <resolved>Wed, 28 Apr 2021 04:07:10 +0000</resolved>
                                                    <fixVersion>Lustre 2.15.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="287152" author="gerrit" created="Wed, 9 Dec 2020 21:09:05 +0000"  >&lt;p&gt;Chris Horn (chris.horn@hpe.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/40923&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/40923&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14206&quot; title=&quot;Router ping timeouts don&amp;#39;t mark routes down if DD is disabled&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14206&quot;&gt;&lt;del&gt;LU-14206&lt;/del&gt;&lt;/a&gt; lnet: Router ping timeout with discovery disabled&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 136ff5da3df3bdb908266101a09456e58e4c665d&lt;/p&gt;</comment>
                            <comment id="293636" author="hornc" created="Tue, 2 Mar 2021 16:50:52 +0000"  >&lt;p&gt;Test notes for the fix (LUS-9612 is HPE internal issue for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14206&quot; title=&quot;Router ping timeouts don&amp;#39;t mark routes down if DD is disabled&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14206&quot;&gt;&lt;del&gt;LU-14206&lt;/del&gt;&lt;/a&gt;)&lt;/p&gt;

&lt;p&gt;Build cray-2.12-int to reproduce:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;hornc@sles15build01 lustre-filesystem $ ./LUSTRE-VERSION-GEN
2.12.4.2_cray_253_gd8f8bfe
hornc@sles15build01 lustre-filesystem $ make -j 32
...
sles15build01:~ # for i in sles15s01 sles15s02 sles15c01; do rsync -avr /home/hornc/lustre-filesystem $i:/home/hornc ; done
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Test node config:&lt;/p&gt;

&lt;p&gt;sles15s01:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;sles15s01:~ # lctl list_nids
192.168.2.30@tcp1
sles15s01:~ # lctl show_route
net               tcp2 hops 4294967295 gw                192.168.2.32@tcp1 up pri 0
sles15s01:~ #
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;sles15c01:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;sles15c01:~ # lctl list_nids
192.168.2.38@tcp2
sles15c01:~ # lctl show_route
net               tcp1 hops 4294967295 gw                192.168.2.33@tcp2 up pri 0
sles15c01:~ #
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;sles15s02 (router w/DD disabled):&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;sles15s02:~ # lctl list_nids
192.168.2.32@tcp1
192.168.2.33@tcp2
sles15s02:~ # lnetctl global show | grep disc
    discovery: 0
sles15s02:~ #
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Stop LNet on the router.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;sles15s02:~ # lctl net down
LNET ready to unload
sles15s02:~ # lustre_rmmod
sles15s02:~ #
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Wait for route to be marked down on peer. Check dk log to show we do not trigger &quot;Router discovery failed&quot; code path.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;sles15c01:~ # lctl show_route
net               tcp1 hops 4294967295 gw                192.168.2.33@tcp2 down pri 0
sles15c01:~ # lctl dk &amp;gt; /tmp/dk.log
sles15c01:~ # grep &apos;Router discovery failed&apos; /tmp/dk.log
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Build/deploy fix:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;hornc@sles15build01 lustre-filesystem $ git fetch https://es-gerrit.dev.cray.com/lustre-wc-rel refs/changes/88/158188/2 &amp;amp;&amp;amp; git cherry-pick FETCH_HEAD
remote: Counting objects: 9, done
remote: Finding sources: 100% (5/5)
remote: Total 5 (delta 4), reused 5 (delta 4)
Unpacking objects: 100% (5/5), done.
From https://es-gerrit.dev.cray.com/lustre-wc-rel
 * branch                  refs/changes/88/158188/2 -&amp;gt; FETCH_HEAD
[task/2.12-int/test-LUS-9612 9571e895bb] LUS-9612 lnet: Router ping timeout with discovery disabled
 Date: Wed Dec 9 14:38:57 2020 -0600
 1 file changed, 4 insertions(+), 4 deletions(-)
hornc@sles15build01 lustre-filesystem $ make -j 32
...
sles15build01:~ # for i in sles15s01 sles15s02 sles15c01; do rsync -avr /home/hornc/lustre-filesystem $i:/home/hornc ; done
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;We can see we take the correct &quot;Router discovery failed&quot; code path.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;sles15s02:~ # lctl net down
LNET ready to unload
sles15s02:~ # lustre_rmmod
sles15s02:~ #

sles15c01:~ # lctl show_route
net               tcp1 hops 4294967295 gw                192.168.2.33@tcp2 down pri 0
sles15c01:~ # lctl dk &amp;gt; /tmp/dk.log
sles15c01:~ # grep &apos;Router discovery failed&apos; /tmp/dk.log
00000400:00000200:0.0:1611332945.989428:0:10139:0:(router.c:540:lnet_router_discovery_complete()) 192.168.2.33@tcp2: Router discovery failed -111
sles15c01:~ #
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="299890" author="gerrit" created="Wed, 28 Apr 2021 02:10:48 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/40923/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/40923/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14206&quot; title=&quot;Router ping timeouts don&amp;#39;t mark routes down if DD is disabled&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14206&quot;&gt;&lt;del&gt;LU-14206&lt;/del&gt;&lt;/a&gt; lnet: Router ping timeout with discovery disabled&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 173d86c6e9a704a84de36ae57a337a3fdae7b1ed&lt;/p&gt;</comment>
                            <comment id="299920" author="pjones" created="Wed, 28 Apr 2021 04:07:10 +0000"  >&lt;p&gt;Landed for 2.15&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i01gtb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>