<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:10:20 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-14503] kiblnd: assertion that all net connections are closed may fail on shutdown</title>
                <link>https://jira.whamcloud.com/browse/LU-14503</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;It appears that there&apos;s scenario when the following assert from kiblnd_shutdown() may fail:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
LASSERT (atomic_read(&amp;amp;net-&amp;gt;ibn_nconns) == 0); &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;A connection may end up on the zombie list:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
kiblnd_data.kib_connd_zombies &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Cleaning up the connections from this list is the job of&#160;kiblnd_connd instance:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
 &#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;while&lt;/span&gt; (!kiblnd_data.kib_shutdown) {
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; reconn = 0;
&#160;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; dropped_lock = 0;
&#160;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (!list_empty(&amp;amp;kiblnd_data.kib_connd_zombies)) {
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; struct kib_peer_ni *peer_ni = NULL;
&#160;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; conn = list_entry(kiblnd_data.kib_connd_zombies.next,
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; struct kib_conn, ibc_list);
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; list_del(&amp;amp;conn-&amp;gt;ibc_list);
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (conn-&amp;gt;ibc_reconnect) {
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; peer_ni = conn-&amp;gt;ibc_peer;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; kiblnd_peer_addref(peer_ni);
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; }
&#160;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; spin_unlock_irqrestore(lock, flags);
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; dropped_lock = 1;
&#160;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; kiblnd_destroy_conn(conn);
&#160;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; spin_lock_irqsave(lock, flags);
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (!peer_ni) {
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; LIBCFS_FREE(conn, sizeof(*conn));
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;continue&lt;/span&gt;;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; }
&#160;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; conn-&amp;gt;ibc_peer = peer_ni;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (peer_ni-&amp;gt;ibp_reconnected &amp;lt; KIB_RECONN_HIGH_RACE)
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; list_add_tail(&amp;amp;conn-&amp;gt;ibc_list, &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &amp;amp;kiblnd_data.kib_reconn_list);
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;else&lt;/span&gt;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; list_add_tail(&amp;amp;conn-&amp;gt;ibc_list,
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &amp;amp;kiblnd_data.kib_reconn_wait);
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; } 

................................
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (dropped_lock)
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;continue&lt;/span&gt;;
&#160;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-comment&quot;&gt;/* Nothing to &lt;span class=&quot;code-keyword&quot;&gt;do&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; &lt;span class=&quot;code-quote&quot;&gt;&apos;timeout&apos;&lt;/span&gt;&#160; */&lt;/span&gt;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; set_current_state(TASK_INTERRUPTIBLE);
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; add_wait_queue(&amp;amp;kiblnd_data.kib_connd_waitq, &amp;amp;wait);
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; spin_unlock_irqrestore(lock, flags);
&#160;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; schedule_timeout(timeout);
&#160;
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; remove_wait_queue(&amp;amp;kiblnd_data.kib_connd_waitq, &amp;amp;wait);
 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; spin_lock_irqsave(lock, flags);
 &#160; &#160; &#160; &#160; }&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;The loop exits when&#160;kib_shutdown flag is set, and it is set later than the assertion in&#160;kiblnd_shutdown(), but it is possible that&#160;kiblnd_connd() is not given the chance to clean up before the assert because the&#160;kiblnd_connd instances are not signalled to wake up until the&#160;kib_shutdown flag is set.&lt;/p&gt;

&lt;p&gt;The kiblnd shutdown procedure needs to be modified to ensure that connections on the zombie list are cleaned up before asserting on it.&lt;/p&gt;

&lt;p&gt;An example of the assertion going off is reported by &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=sihara&quot; class=&quot;user-hover&quot; rel=&quot;sihara&quot;&gt;sihara&lt;/a&gt; for &lt;a href=&quot;https://review.whamcloud.com/#/c/41937/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/41937/&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</description>
                <environment></environment>
        <key id="63254">LU-14503</key>
            <summary>kiblnd: assertion that all net connections are closed may fail on shutdown</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="ssmirnov">Serguei Smirnov</assignee>
                                    <reporter username="ssmirnov">Serguei Smirnov</reporter>
                        <labels>
                            <label>lnet</label>
                            <label>o2iblnd</label>
                    </labels>
                <created>Tue, 9 Mar 2021 22:39:03 +0000</created>
                <updated>Sat, 11 Jun 2022 15:26:57 +0000</updated>
                            <resolved>Sat, 11 Jun 2022 15:26:57 +0000</resolved>
                                                    <fixVersion>Lustre 2.16.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="294553" author="gerrit" created="Wed, 10 Mar 2021 19:52:58 +0000"  >&lt;p&gt;Serguei Smirnov (ssmirnov@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/41988&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41988&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14503&quot; title=&quot;kiblnd: assertion that all net connections are closed may fail on shutdown&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14503&quot;&gt;&lt;del&gt;LU-14503&lt;/del&gt;&lt;/a&gt; o2iblnd: clean up zombie connections on shutdown&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: d29f5d5998b9082f370bb52b337930ec6f246530&lt;/p&gt;</comment>
                            <comment id="295273" author="sihara" created="Wed, 17 Mar 2021 22:48:07 +0000"  >&lt;p&gt;I think patch &lt;a href=&quot;https://review.whamcloud.com/41988&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41988&lt;/a&gt; solved an crash problem which was reproduced by &lt;a href=&quot;https://review.whamcloud.com/41988&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41988&lt;/a&gt; in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14499&quot; title=&quot;o2iblnd: LU-13368 changes cause shutdown procedure to not complete&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14499&quot;&gt;LU-14499&lt;/a&gt;.&lt;br/&gt;
I&apos;ve continually ran same reproducer of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14499&quot; title=&quot;o2iblnd: LU-13368 changes cause shutdown procedure to not complete&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14499&quot;&gt;LU-14499&lt;/a&gt; more than 100 times, but the problem never happened.&lt;/p&gt;</comment>
                            <comment id="295299" author="gerrit" created="Thu, 18 Mar 2021 03:53:58 +0000"  >&lt;p&gt;Serguei Smirnov (ssmirnov@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/42068&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/42068&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14503&quot; title=&quot;kiblnd: assertion that all net connections are closed may fail on shutdown&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14503&quot;&gt;&lt;del&gt;LU-14503&lt;/del&gt;&lt;/a&gt; o2iblnd: clean up zombie connections on shutdown&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 016029d97a8af446452b9934f4a01d4ea800ea7e&lt;/p&gt;</comment>
                            <comment id="337382" author="gerrit" created="Sat, 11 Jun 2022 05:30:39 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/42068/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/42068/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14503&quot; title=&quot;kiblnd: assertion that all net connections are closed may fail on shutdown&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14503&quot;&gt;&lt;del&gt;LU-14503&lt;/del&gt;&lt;/a&gt; o2iblnd: clean up zombie connections on shutdown&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 2a183829cdcc7008f2b9706cb212b22b877dfce0&lt;/p&gt;</comment>
                            <comment id="337485" author="pjones" created="Sat, 11 Jun 2022 15:26:57 +0000"  >&lt;p&gt;Landed for 2.16&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i01oxb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>