<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:10:37 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-14536] kiblnd does resend for IB_CM_REJ_INVALID_SERVICE_ID</title>
                <link>https://jira.whamcloud.com/browse/LU-14536</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;when connecting to a host which is not up, for each discovery we will try retry_count(see kiblnd_check_reconnect) * lnet_retry_count(for resend) times.&lt;/p&gt;

&lt;p&gt;and for each ost when mounting the mdt, we will process attach add_conn(if ost has failover node) and add_osc so 3 times discovery.&lt;/p&gt;

&lt;p&gt;mounting of mdt when other nodes are not up can take very long, making customer think the mount is stuck.&lt;/p&gt;</description>
                <environment></environment>
        <key id="63423">LU-14536</key>
            <summary>kiblnd does resend for IB_CM_REJ_INVALID_SERVICE_ID</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="dongyang">Dongyang Li</assignee>
                                    <reporter username="dongyang">Dongyang Li</reporter>
                        <labels>
                    </labels>
                <created>Fri, 19 Mar 2021 09:50:13 +0000</created>
                <updated>Tue, 5 Apr 2022 03:15:29 +0000</updated>
                            <resolved>Thu, 15 Apr 2021 17:31:45 +0000</resolved>
                                                    <fixVersion>Lustre 2.12.9</fixVersion>
                    <fixVersion>Lustre 2.15.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="295429" author="gerrit" created="Fri, 19 Mar 2021 09:53:24 +0000"  >&lt;p&gt;Li Dongyang (dongyangli@ddn.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/42109&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/42109&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14536&quot; title=&quot;kiblnd does resend for IB_CM_REJ_INVALID_SERVICE_ID&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14536&quot;&gt;&lt;del&gt;LU-14536&lt;/del&gt;&lt;/a&gt; o2iblnd: don&apos;t resend if there&apos;s no listener&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 29f79fe50652ac048949df4b8e4a2eafa235ebbf&lt;/p&gt;</comment>
                            <comment id="295433" author="dongyang" created="Fri, 19 Mar 2021 10:13:27 +0000"  >&lt;p&gt;even without resend we are still retrying 5 times for each discovery, and for each ost from the conf llog we will try discovery once.&lt;/p&gt;

&lt;p&gt;I&apos;m wondering should we even retry at all if there&apos;s no listener.&lt;/p&gt;</comment>
                            <comment id="295436" author="gerrit" created="Fri, 19 Mar 2021 10:28:07 +0000"  >&lt;p&gt;Li Dongyang (dongyangli@ddn.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/42111&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/42111&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14536&quot; title=&quot;kiblnd does resend for IB_CM_REJ_INVALID_SERVICE_ID&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14536&quot;&gt;&lt;del&gt;LU-14536&lt;/del&gt;&lt;/a&gt; obi2lnd: don&apos;t try to reconnect if there&apos;s no listener&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: e164c7533c393be60ab35472743868eec6452129&lt;/p&gt;</comment>
                            <comment id="296823" author="dongyang" created="Fri, 26 Mar 2021 07:50:57 +0000"  >&lt;p&gt;I manged to get access to the site experiencing the issue and got some numbers:&lt;br/&gt;
 Note the site has 415 OSTs, tested with latest 2.12&lt;/p&gt;

&lt;p&gt;when all the servers are up, mounting the targets on mds1:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[root@lmds-vm1 o2iblnd]# cat mount.sh
modprobe lnet
modprobe lustre
modprobe libcfs
modprobe ksocklnd
modprobe obdclass
modprobe ptlrpc
modprobe ldiskfs
modprobe osd_ldiskfs
modprobe ko2iblnd
vgchange -ay vg_mdt0000_lustrefs --config &lt;span class=&quot;code-quote&quot;&gt;&apos;activation{volume_list=[&lt;span class=&quot;code-quote&quot;&gt;&quot;vg_mdt0000_lustrefs&quot;&lt;/span&gt;]}&apos;&lt;/span&gt;
vgchange -ay vg_mgs --config &lt;span class=&quot;code-quote&quot;&gt;&apos;activation{volume_list=[&lt;span class=&quot;code-quote&quot;&gt;&quot;vg_mgs&quot;&lt;/span&gt;]}&apos;&lt;/span&gt;
mount -t lustre -o max_sectors_kb=0 /dev/mapper/vg_mgs-mgs /lustre/mgs
mount -t lustre -o max_sectors_kb=0 /dev/mapper/vg_mdt0000_lustrefs-mdt0000 /lustre/lustrefs/mdt0000
mount -t lustre -o max_sectors_kb=0 /dev/ddn/lustrefs_ost0000 /lustre/lustrefs/ost0000
mount -t lustre -o max_sectors_kb=0 /dev/ddn/lustrefs_ost0001 /lustre/lustrefs/ost0001
mount -t lustre -o max_sectors_kb=0 /dev/ddn/lustrefs_ost0400 /lustre/lustrefs/ost0400
mount -t lustre -o max_sectors_kb=0 /dev/ddn/lustrefs_ost0401 /lustre/lustrefs/ost0401
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;mounting mdt0000 took about 30mins&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[12863.422884] LNet: HW NUMA nodes: 1, HW CPU cores: 20, npartitions: 10
[12863.424484] alg: No test &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; adler32 (adler32-zlib)
[12864.250078] Lustre: Lustre: Build Version: 2.12.6
[12864.356817] LNet: Using FastReg &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; registration
[12864.396832] LNet: Added LNI 10.149.10.21@o2ib [8/640/0/180]
[12864.434556] LNet: Added LNI 10.149.11.21@o2ib [8/640/0/180]
[12864.895181] LDISKFS-fs (dm-7): mounted filesystem with ordered data mode. Opts: user_xattr,errors=remount-ro,no_mbcache,nodelall
oc
[12866.435582] Lustre: MGS: Connection restored to 8529a39c-6bcb-c902-92ad-9af110ac39df (at 0@lo)
[12866.819906] LDISKFS-fs (dm-6): mounted filesystem with ordered data mode. Opts: acl,user_xattr,errors=remount-ro,no_mbcache,node
lalloc
[14652.389489] Lustre: lustrefs-MDT0000: Imperative Recovery enabled, recovery window shrunk from 300-900 down to 150-900
[14652.395640] Lustre: lustrefs-MDT0000: in recovery but waiting &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; the first client to connect
[14675.527074] Lustre: lustrefs-MDT0000: Connection restored to 10.149.10.21@o2ib (at 0@lo)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;don&apos;t know how much time would it take for the OSTs on the host, mount script was terminated&lt;/p&gt;

&lt;p&gt;when it was working on mdt0000&lt;/p&gt;

&lt;p&gt;with patch 42109:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[root@lmds-vm1 o2iblnd]# time mount.sh
real	0m15.763s
user	0m0.483s
sys	0m6.796s
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;with patch 42109 + 42111:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[root@lmds-vm1 o2iblnd]# time mount.sh
real	0m8.166s
user	0m0.453s
sys	0m6.703s&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="298838" author="gerrit" created="Thu, 15 Apr 2021 06:30:34 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/42109/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/42109/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14536&quot; title=&quot;kiblnd does resend for IB_CM_REJ_INVALID_SERVICE_ID&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14536&quot;&gt;&lt;del&gt;LU-14536&lt;/del&gt;&lt;/a&gt; o2iblnd: don&apos;t resend if there&apos;s no listener&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 0ab06eb9d865a47ea3e09880a41a9e8f0a78b6a6&lt;/p&gt;</comment>
                            <comment id="298839" author="gerrit" created="Thu, 15 Apr 2021 06:30:40 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/42111/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/42111/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14536&quot; title=&quot;kiblnd does resend for IB_CM_REJ_INVALID_SERVICE_ID&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14536&quot;&gt;&lt;del&gt;LU-14536&lt;/del&gt;&lt;/a&gt; obi2lnd: don&apos;t try to reconnect if there&apos;s no listener&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 67ba3ce23d32266eabd5f8c56fa78d65920455e8&lt;/p&gt;</comment>
                            <comment id="298912" author="pjones" created="Thu, 15 Apr 2021 17:31:45 +0000"  >&lt;p&gt;Landed for 2.15&lt;/p&gt;</comment>
                            <comment id="317799" author="gerrit" created="Wed, 10 Nov 2021 01:02:57 +0000"  >&lt;p&gt;&quot;Andreas Dilger &amp;lt;adilger@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/45510&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45510&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14536&quot; title=&quot;kiblnd does resend for IB_CM_REJ_INVALID_SERVICE_ID&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14536&quot;&gt;&lt;del&gt;LU-14536&lt;/del&gt;&lt;/a&gt; o2iblnd: don&apos;t resend if there&apos;s no listener&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_14&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 92acd551b2c97e2800181541e501815219bfc753&lt;/p&gt;</comment>
                            <comment id="317800" author="gerrit" created="Wed, 10 Nov 2021 01:02:58 +0000"  >&lt;p&gt;&quot;Andreas Dilger &amp;lt;adilger@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/45511&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45511&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14536&quot; title=&quot;kiblnd does resend for IB_CM_REJ_INVALID_SERVICE_ID&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14536&quot;&gt;&lt;del&gt;LU-14536&lt;/del&gt;&lt;/a&gt; obi2lnd: don&apos;t try to reconnect if there&apos;s no listener&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_14&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 33fe975fa96d9dbacda1e14c2147dfb276da12dd&lt;/p&gt;</comment>
                            <comment id="321235" author="gerrit" created="Mon, 20 Dec 2021 20:17:02 +0000"  >&lt;p&gt;&quot;Andreas Dilger &amp;lt;adilger@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/45895&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45895&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14536&quot; title=&quot;kiblnd does resend for IB_CM_REJ_INVALID_SERVICE_ID&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14536&quot;&gt;&lt;del&gt;LU-14536&lt;/del&gt;&lt;/a&gt; o2iblnd: don&apos;t resend if there&apos;s no listener&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 9fe25415e52a73bde7e53871403266a5b5db859a&lt;/p&gt;</comment>
                            <comment id="321236" author="gerrit" created="Mon, 20 Dec 2021 20:17:03 +0000"  >&lt;p&gt;&quot;Andreas Dilger &amp;lt;adilger@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/45896&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45896&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14536&quot; title=&quot;kiblnd does resend for IB_CM_REJ_INVALID_SERVICE_ID&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14536&quot;&gt;&lt;del&gt;LU-14536&lt;/del&gt;&lt;/a&gt; obi2lnd: don&apos;t try to reconnect if there&apos;s no listener&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 6ec51a85d49231e50694f0503406e548efbd6f17&lt;/p&gt;</comment>
                            <comment id="324464" author="gerrit" created="Sun, 30 Jan 2022 03:42:16 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/45895/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45895/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14536&quot; title=&quot;kiblnd does resend for IB_CM_REJ_INVALID_SERVICE_ID&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14536&quot;&gt;&lt;del&gt;LU-14536&lt;/del&gt;&lt;/a&gt; o2iblnd: don&apos;t resend if there&apos;s no listener&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: da6e6953305be165798772d820ec59a0a209b604&lt;/p&gt;</comment>
                            <comment id="324465" author="gerrit" created="Sun, 30 Jan 2022 03:42:22 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/45896/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45896/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14536&quot; title=&quot;kiblnd does resend for IB_CM_REJ_INVALID_SERVICE_ID&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14536&quot;&gt;&lt;del&gt;LU-14536&lt;/del&gt;&lt;/a&gt; obi2lnd: don&apos;t try to reconnect if there&apos;s no listener&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 3bd965a786afb04317acd1d8eb1708e594a1fc91&lt;/p&gt;</comment>
                            <comment id="331012" author="haaknci" created="Tue, 5 Apr 2022 03:10:11 +0000"  >&lt;p&gt;The b2_14 backport causes servers to panic with null dereference error at MDT mount. Can we get this looked into please&lt;/p&gt;</comment>
                            <comment id="331013" author="kim.sebo" created="Tue, 5 Apr 2022 03:15:29 +0000"  >&lt;p&gt;&lt;span class=&quot;image-wrap&quot; style=&quot;&quot;&gt;&lt;img src=&quot;https://jira.whamcloud.com/secure/attachment/43069/43069_lustre.png&quot; style=&quot;border: 0px solid black&quot; /&gt;&lt;/span&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="43069" name="lustre.png" size="365791" author="kim.sebo" created="Tue, 5 Apr 2022 03:15:19 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i01py7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>