<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:52:37 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-12442] recovery-small test_136: mounts stuck in lnet_discover_peer_locked()</title>
                <link>https://jira.whamcloud.com/browse/LU-12442</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for wangshilong &amp;lt;wshilong@ddn.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/d8642aba-90b2-11e9-a77a-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/d8642aba-90b2-11e9-a77a-52540065bddc&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;test_136 failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Timeout occurred after 387 mins, last suite running was recovery-small, restarting cluster to continue tests
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;[  236.158316] Pid: 4733, comm: mdt_out00_001 3.10.0-957.12.2.el7_lustre.x86_64 #1 SMP Wed Jun 5 06:59:00 UTC 2019&lt;br/&gt;
[  236.159304] Call Trace:&lt;br/&gt;
[  236.159604]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0c34894&amp;gt;&amp;#93;&lt;/span&gt; lnet_discover_peer_locked+0x124/0x3d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lnet&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  236.160389]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0c34bb0&amp;gt;&amp;#93;&lt;/span&gt; LNetPrimaryNID+0x70/0x1a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lnet&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  236.161207]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0fce5fe&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_connection_get+0x3e/0x450 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  236.162038]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0fd2664&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_send_reply+0x394/0x840 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  236.162790]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0fd2bdb&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_send_error+0x9b/0x1b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  236.163596]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0fd2d00&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_error+0x10/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  236.164310]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc1041898&amp;gt;&amp;#93;&lt;/span&gt; tgt_request_handle+0xad8/0x15c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  236.165230]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0fe57ee&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_server_handle_request+0x24e/0xab0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  236.166085]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0fe92dc&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_main+0xbac/0x1560 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  236.166803]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c0c1d21&amp;gt;&amp;#93;&lt;/span&gt; kthread+0xd1/0xe0&lt;br/&gt;
[  236.167414]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c775c37&amp;gt;&amp;#93;&lt;/span&gt; ret_from_fork_nospec_end+0x0/0x39&lt;br/&gt;
[  236.168091]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffffffffff&amp;gt;&amp;#93;&lt;/span&gt; 0xffffffffffffffff&lt;br/&gt;
[  240.225660] INFO: task mount.lustre:4609 blocked for more than 120 seconds.&lt;br/&gt;
[  240.226393] &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.&lt;br/&gt;
[  240.227196] mount.lustre    D ffff9be33654b0c0     0  4609   4608 0x00000080&lt;br/&gt;
[  240.228094] Call Trace:&lt;br/&gt;
[  240.228362]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c768e19&amp;gt;&amp;#93;&lt;/span&gt; schedule+0x29/0x70&lt;br/&gt;
[  240.228886]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c766921&amp;gt;&amp;#93;&lt;/span&gt; schedule_timeout+0x221/0x2d0&lt;br/&gt;
[  240.229600]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c0d31e2&amp;gt;&amp;#93;&lt;/span&gt; ? check_preempt_curr+0x92/0xa0&lt;br/&gt;
[  240.230222]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c0d3209&amp;gt;&amp;#93;&lt;/span&gt; ? ttwu_do_wakeup+0x19/0xe0&lt;br/&gt;
[  240.230868]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c7691cd&amp;gt;&amp;#93;&lt;/span&gt; wait_for_completion+0xfd/0x140&lt;br/&gt;
[  240.231490]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c0d6ae0&amp;gt;&amp;#93;&lt;/span&gt; ? wake_up_state+0x20/0x20&lt;br/&gt;
[  240.232086]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0cb0ae4&amp;gt;&amp;#93;&lt;/span&gt; llog_process_or_fork+0x244/0x450 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.232883]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0cb0d04&amp;gt;&amp;#93;&lt;/span&gt; llog_process+0x14/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.233558]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0ce3ca5&amp;gt;&amp;#93;&lt;/span&gt; class_config_parse_llog+0x125/0x350 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.234309]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0f61fd0&amp;gt;&amp;#93;&lt;/span&gt; mgc_process_cfg_log+0x790/0xc40 &lt;span class=&quot;error&quot;&gt;&amp;#91;mgc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.235079]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0f654b9&amp;gt;&amp;#93;&lt;/span&gt; mgc_process_log+0x3d9/0x8f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mgc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.235763]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0f6614f&amp;gt;&amp;#93;&lt;/span&gt; ? config_recover_log_add+0x13f/0x280 &lt;span class=&quot;error&quot;&gt;&amp;#91;mgc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.236494]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0cebf00&amp;gt;&amp;#93;&lt;/span&gt; ? class_config_dump_handler+0x7e0/0x7e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.237269]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0f66b1b&amp;gt;&amp;#93;&lt;/span&gt; mgc_process_config+0x88b/0x13f0 &lt;span class=&quot;error&quot;&gt;&amp;#91;mgc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.238017]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0cefb18&amp;gt;&amp;#93;&lt;/span&gt; lustre_process_log+0x2d8/0xad0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.238783]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0b891a7&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_debug_msg+0x57/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.239494]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0cda839&amp;gt;&amp;#93;&lt;/span&gt; ? lprocfs_counter_add+0xf9/0x160 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.240253]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0d1e924&amp;gt;&amp;#93;&lt;/span&gt; server_start_targets+0x13a4/0x2a20 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.241058]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0cda961&amp;gt;&amp;#93;&lt;/span&gt; ? lprocfs_counter_sub+0xc1/0x130 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.241835]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0cebf00&amp;gt;&amp;#93;&lt;/span&gt; ? class_config_dump_handler+0x7e0/0x7e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.242689]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0d2106c&amp;gt;&amp;#93;&lt;/span&gt; server_fill_super+0x10cc/0x1890 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.243430]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0b891a7&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_debug_msg+0x57/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.244122]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0cf5798&amp;gt;&amp;#93;&lt;/span&gt; lustre_fill_super+0x328/0x950 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.244905]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0cf5470&amp;gt;&amp;#93;&lt;/span&gt; ? lustre_common_put_super+0x270/0x270 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.245701]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c2457cf&amp;gt;&amp;#93;&lt;/span&gt; mount_nodev+0x4f/0xb0&lt;br/&gt;
[  240.246283]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc0ced968&amp;gt;&amp;#93;&lt;/span&gt; lustre_mount+0x38/0x60 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  240.246925]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c24634e&amp;gt;&amp;#93;&lt;/span&gt; mount_fs+0x3e/0x1b0&lt;br/&gt;
[  240.247505]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c263ec7&amp;gt;&amp;#93;&lt;/span&gt; vfs_kern_mount+0x67/0x110&lt;br/&gt;
[  240.248084]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c2664ef&amp;gt;&amp;#93;&lt;/span&gt; do_mount+0x1ef/0xce0&lt;br/&gt;
[  240.248670]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c23e7aa&amp;gt;&amp;#93;&lt;/span&gt; ? __check_object_size+0x1ca/0x250&lt;br/&gt;
[  240.249377]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c21caec&amp;gt;&amp;#93;&lt;/span&gt; ? kmem_cache_alloc_trace+0x3c/0x200&lt;br/&gt;
[  240.250037]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c267323&amp;gt;&amp;#93;&lt;/span&gt; SyS_mount+0x83/0xd0&lt;br/&gt;
[  240.250599]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c775ddb&amp;gt;&amp;#93;&lt;/span&gt; system_call_fastpath+0x22/0x27&lt;br/&gt;
[  240.251232]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8c775d21&amp;gt;&amp;#93;&lt;/span&gt; ? system_call_after_swapgs+0xae/0x146&lt;/p&gt;





&lt;p&gt;VVVVVVV DO NOT REMOVE LINES BELOW, Added by Maloo for auto-association VVVVVVV&lt;br/&gt;
recovery-small test_136 - Timeout occurred after 387 mins, last suite running was recovery-small, restarting cluster to continue tests&lt;/p&gt;</description>
                <environment></environment>
        <key id="55967">LU-12442</key>
            <summary>recovery-small test_136: mounts stuck in lnet_discover_peer_locked()</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Mon, 17 Jun 2019 04:22:56 +0000</created>
                <updated>Fri, 28 Jun 2019 05:56:10 +0000</updated>
                            <resolved>Fri, 28 Jun 2019 05:56:09 +0000</resolved>
                                    <version>Lustre 2.13.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>2</watches>
                                                                            <comments>
                            <comment id="249380" author="bzzz" created="Mon, 17 Jun 2019 08:42:46 +0000"  >&lt;p&gt;I saw very similar symptoms on a single-core VM - LNetPrimaryNID() has been spinning waiting for the NIC to get ready (AFAICS) leaving no chance for others.&lt;br/&gt;
I changed cores to 2 and that &quot;resolved&quot; the issue.&lt;/p&gt;</comment>
                            <comment id="250247" author="adilger" created="Fri, 28 Jun 2019 05:56:10 +0000"  >&lt;p&gt;Close as a duplicate because patch &lt;a href=&quot;https://review.whamcloud.com/35191&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/35191&lt;/a&gt; &quot;&lt;tt&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12424&quot; title=&quot;LNet MR routing: possible loop when discovery is off&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12424&quot;&gt;LU-12424&lt;/a&gt; lnet: prevent loop in LNetPrimaryNID()&lt;/tt&gt;&quot; has landed and should resolve the issue.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="51912">LU-10931</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="55924">LU-12424</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00ibb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>