<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:59:24 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-13218] conf-sanity test 98 hangs in socknal_sd00_01: lnet_nid2peerni_locked</title>
                <link>https://jira.whamcloud.com/browse/LU-13218</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;conf-sanity test_98 hangs for review-dne-zfs-part-3 for the patch &lt;a href=&quot;https://review.whamcloud.com/37445/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/37445/&lt;/a&gt; for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12593&quot; title=&quot;update_log corruption&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12593&quot;&gt;&lt;del&gt;LU-12593&lt;/del&gt;&lt;/a&gt;. We&apos;ve seen this test hang with the errors below once only so far.&lt;/p&gt;

&lt;p&gt;Looking at the hang at &lt;a href=&quot;https://testing.whamcloud.com/test_sets/469de552-4869-11ea-b58e-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/469de552-4869-11ea-b58e-52540065bddc&lt;/a&gt;, in the client1 console log we see LNET issues&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[19629.856282] Lustre: Unmounted lustre-client
[19659.244701] Lustre: DEBUG MARKER: lsmod | grep lnet &amp;gt; /dev/null &amp;amp;&amp;amp; lctl dl | grep &apos; ST &apos; || true
[19659.384715] Key type lgssc unregistered
[19661.272912] LNet: 5951:0:(socklnd.c:2550:ksocknal_shutdown()) waiting for 1 peers to disconnect
[19665.275764] LNet: 5951:0:(socklnd.c:2550:ksocknal_shutdown()) waiting for 1 peers to disconnect
[19673.280446] LNet: 5951:0:(socklnd.c:2550:ksocknal_shutdown()) waiting for 1 peers to disconnect
[19689.288850] LNet: 5951:0:(socklnd.c:2550:ksocknal_shutdown()) waiting for 1 peers to disconnect
[19721.304636] LNet: 5951:0:(socklnd.c:2550:ksocknal_shutdown()) waiting for 1 peers to disconnect
[19785.335214] LNet: 5951:0:(socklnd.c:2550:ksocknal_shutdown()) waiting for 1 peers to disconnect
[19809.073214] INFO: task socknal_sd00_01:4386 blocked for more than 120 seconds.
[19809.074558] &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
[19809.075866] socknal_sd00_01 D ffff8e17ffd1ac80     0  4386      2 0x00000080
[19809.077194] Call Trace:
[19809.077665]  [&amp;lt;ffffffffb0581929&amp;gt;] schedule_preempt_disabled+0x29/0x70
[19809.078735]  [&amp;lt;ffffffffb057f8b7&amp;gt;] __mutex_lock_slowpath+0xc7/0x1d0
[19809.079762]  [&amp;lt;ffffffffb057ec8f&amp;gt;] mutex_lock+0x1f/0x2f
[19809.080741]  [&amp;lt;ffffffffc0891e31&amp;gt;] lnet_nid2peerni_locked+0x71/0x150 [lnet]
[19809.082031]  [&amp;lt;ffffffffc087ed01&amp;gt;] lnet_parse+0x791/0x11f0 [lnet]
[19809.083044]  [&amp;lt;ffffffffc0916838&amp;gt;] ksocknal_process_receive+0x498/0xde0 [ksocklnd]
[19809.084277]  [&amp;lt;ffffffffc0917626&amp;gt;] ksocknal_scheduler+0x206/0xd50 [ksocklnd]
[19809.085499]  [&amp;lt;ffffffffafec72e0&amp;gt;] ? wake_up_atomic_t+0x30/0x30
[19809.086567]  [&amp;lt;ffffffffc0917420&amp;gt;] ? ksocknal_recv+0x2a0/0x2a0 [ksocklnd]
[19809.087734]  [&amp;lt;ffffffffafec61f1&amp;gt;] kthread+0xd1/0xe0
[19809.088612]  [&amp;lt;ffffffffafec6120&amp;gt;] ? insert_kthread_work+0x40/0x40
[19809.089668]  [&amp;lt;ffffffffb058dd37&amp;gt;] ret_from_fork_nospec_begin+0x21/0x21
[19809.090850]  [&amp;lt;ffffffffafec6120&amp;gt;] ? insert_kthread_work+0x40/0x40
[19913.395403] LNet: 5951:0:(socklnd.c:2550:ksocknal_shutdown()) waiting for 1 peers to disconnect
[19929.146713] INFO: task socknal_sd00_01:4386 blocked for more than 120 seconds.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="58026">LU-13218</key>
            <summary>conf-sanity test 98 hangs in socknal_sd00_01: lnet_nid2peerni_locked</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="ashehata">Amir Shehata</assignee>
                                    <reporter username="jamesanunez">James Nunez</reporter>
                        <labels>
                    </labels>
                <created>Fri, 7 Feb 2020 18:58:01 +0000</created>
                <updated>Thu, 24 Mar 2022 16:22:41 +0000</updated>
                            <resolved>Thu, 24 Mar 2022 16:22:41 +0000</resolved>
                                    <version>Lustre 2.12.4</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="263039" author="ashehata" created="Tue, 11 Feb 2020 00:08:20 +0000"  >&lt;p&gt;This looks like a shutdown race condition. We call shutdown while a message from a peer is arriving. We lock the api mutex during shutdown. Then the receiving thread tries to lock it to create a peer NI, leading to this deadlock. the Shudown is waiting for the lnd thread to finish.&lt;/p&gt;

&lt;p&gt;We shouldn&apos;t be trying to create any new NIs while we&apos;re shutting down.&lt;/p&gt;</comment>
                            <comment id="273637" author="jfilizetti" created="Wed, 24 Jun 2020 11:49:40 +0000"  >&lt;p&gt;I&apos;ve seen this same thing with ko2iblnd on 2.12.5 yesterday as well.&lt;/p&gt;</comment>
                            <comment id="330130" author="hornc" created="Thu, 24 Mar 2022 16:22:41 +0000"  >&lt;p&gt;Looks like &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15616&quot; title=&quot;sanity-lnet test_226: Timeout occurred after 112 minutes, last suite running was sanity-lnet&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15616&quot;&gt;&lt;del&gt;LU-15616&lt;/del&gt;&lt;/a&gt; or &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15618&quot; title=&quot;ksock_conn ref leak on shutdown&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15618&quot;&gt;&lt;del&gt;LU-15618&lt;/del&gt;&lt;/a&gt; (or both) were hit.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="68964">LU-15616</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="68971">LU-15618</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00tef:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>