<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:52:36 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5568] kernel crash when when network initialization failed</title>
                <link>https://jira.whamcloud.com/browse/LU-5568</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;  105.976884] Lustre: Lustre: Build Version: v2_6_51_0-g16c7568-CHANGED-3.10.1.el7_lustre&lt;br/&gt;
[  105.990490] LNetError: 2145:0:(socklnd.c:2660:ksocknal_enumerate_interfaces()) Can&apos;t find any usable interfaces&lt;br/&gt;
[  106.990120] LNetError: 105-4: Error -100 starting up LNI tcp&lt;br/&gt;
[  106.992703] LNetError: 2145:0:(api-ni.c:823:lnet_unprepare()) ASSERTION( list_empty(&amp;amp;the_lnet.ln_nis) ) failed: &lt;br/&gt;
[  106.994560] LNetError: 2145:0:(api-ni.c:823:lnet_unprepare()) LBUG&lt;br/&gt;
[  106.994561] Pid: 2145, comm: modprobe&lt;br/&gt;
[  106.994561] \x0aCall Trace:&lt;br/&gt;
[  106.994574]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa044f853&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x53/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.994578]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa044fdf5&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x45/0xc0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.994585]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa04f3267&amp;gt;&amp;#93;&lt;/span&gt; lnet_unprepare+0x297/0x340 &lt;span class=&quot;error&quot;&gt;&amp;#91;lnet&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.994587]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa04f3b5c&amp;gt;&amp;#93;&lt;/span&gt; LNetNIInit+0x25c/0x3e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lnet&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.994592]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81061bc6&amp;gt;&amp;#93;&lt;/span&gt; ? put_online_cpus+0x56/0x80&lt;br/&gt;
[  106.994631]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0983000&amp;gt;&amp;#93;&lt;/span&gt; ? init_module+0x0/0x1000 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.994658]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa081310c&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_ni_init+0x2c/0x1a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.994679]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0983000&amp;gt;&amp;#93;&lt;/span&gt; ? init_module+0x0/0x1000 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.994703]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0813291&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_init_portals+0x11/0xf0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.994722]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0983000&amp;gt;&amp;#93;&lt;/span&gt; ? init_module+0x0/0x1000 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.994739]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09831c4&amp;gt;&amp;#93;&lt;/span&gt; init_module+0x1c4/0x1000 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.994742]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810020e2&amp;gt;&amp;#93;&lt;/span&gt; do_one_initcall+0xe2/0x190&lt;br/&gt;
[  106.994744]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810ca7fb&amp;gt;&amp;#93;&lt;/span&gt; load_module+0x129b/0x1a90&lt;br/&gt;
[  106.994745]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff812da590&amp;gt;&amp;#93;&lt;/span&gt; ? ddebug_dyndbg_module_param_cb+0x0/0x60&lt;br/&gt;
[  106.994747]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810c7133&amp;gt;&amp;#93;&lt;/span&gt; ? copy_module_from_fd.isra.43+0x53/0x150&lt;br/&gt;
[  106.994748]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810cb1a6&amp;gt;&amp;#93;&lt;/span&gt; SyS_finit_module+0xa6/0xd0&lt;br/&gt;
[  106.994750]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff815f2119&amp;gt;&amp;#93;&lt;/span&gt; system_call_fastpath+0x16/0x1b&lt;br/&gt;
[  106.994750] &lt;br/&gt;
[  106.995032] Kernel panic - not syncing: LBUG&lt;br/&gt;
[  106.995034] CPU: 3 PID: 2145 Comm: modprobe Tainted: GF          O--------------   3.10.1.el7_lustre #1&lt;br/&gt;
[  106.995034] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013&lt;br/&gt;
[  106.995036]  ffffffffa0474d0d 00000000d711e588 ffff880036601bf0 ffffffff815e19ba&lt;br/&gt;
[  106.995037]  ffff880036601c70 ffffffff815db549 ffffffff00000008 ffff880036601c80&lt;br/&gt;
[  106.995037]  ffff880036601c20 00000000d711e588 ffffffffa051574f 0000000000000000&lt;br/&gt;
[  106.995038] Call Trace:&lt;br/&gt;
[  106.995052]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff815e19ba&amp;gt;&amp;#93;&lt;/span&gt; dump_stack+0x19/0x1b&lt;br/&gt;
[  106.995055]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff815db549&amp;gt;&amp;#93;&lt;/span&gt; panic+0xd8/0x1e7&lt;br/&gt;
[  106.995062]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa044fe5b&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0xab/0xc0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.995067]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa04f3267&amp;gt;&amp;#93;&lt;/span&gt; lnet_unprepare+0x297/0x340 &lt;span class=&quot;error&quot;&gt;&amp;#91;lnet&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.995070]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa04f3b5c&amp;gt;&amp;#93;&lt;/span&gt; LNetNIInit+0x25c/0x3e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lnet&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.995072]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81061bc6&amp;gt;&amp;#93;&lt;/span&gt; ? put_online_cpus+0x56/0x80&lt;br/&gt;
[  106.995088]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0983000&amp;gt;&amp;#93;&lt;/span&gt; ? 0xffffffffa0982fff&lt;br/&gt;
[  106.995113]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa081310c&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_ni_init+0x2c/0x1a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.995117]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0983000&amp;gt;&amp;#93;&lt;/span&gt; ? 0xffffffffa0982fff&lt;br/&gt;
[  106.995138]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0813291&amp;gt;&amp;#93;&lt;/span&gt; ptlrpc_init_portals+0x11/0xf0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.995141]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0983000&amp;gt;&amp;#93;&lt;/span&gt; ? 0xffffffffa0982fff&lt;br/&gt;
[  106.995179]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09831c4&amp;gt;&amp;#93;&lt;/span&gt; init_module+0x1c4/0x1000 &lt;span class=&quot;error&quot;&gt;&amp;#91;ptlrpc&amp;#93;&lt;/span&gt;&lt;br/&gt;
[  106.995181]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810020e2&amp;gt;&amp;#93;&lt;/span&gt; do_one_initcall+0xe2/0x190&lt;br/&gt;
[  106.995182]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810ca7fb&amp;gt;&amp;#93;&lt;/span&gt; load_module+0x129b/0x1a90&lt;br/&gt;
[  106.995185]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff812da590&amp;gt;&amp;#93;&lt;/span&gt; ? ddebug_proc_write+0xf0/0xf0&lt;br/&gt;
[  106.995186]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810c7133&amp;gt;&amp;#93;&lt;/span&gt; ? copy_module_from_fd.isra.43+0x53/0x150&lt;br/&gt;
[  106.995187]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810cb1a6&amp;gt;&amp;#93;&lt;/span&gt; SyS_finit_module+0xa6/0xd0&lt;br/&gt;
[  106.995189]  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff815f2119&amp;gt;&amp;#93;&lt;/span&gt; system_call_fastpath+0x16/0x1b&lt;/p&gt;</description>
                <environment></environment>
        <key id="26251">LU-5568</key>
            <summary>kernel crash when when network initialization failed</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="ashehata">Amir Shehata</assignee>
                                    <reporter username="wangshilong">Wang Shilong</reporter>
                        <labels>
                            <label>MB</label>
                            <label>patch</label>
                    </labels>
                <created>Mon, 1 Sep 2014 15:08:05 +0000</created>
                <updated>Wed, 17 Dec 2014 19:16:58 +0000</updated>
                            <resolved>Wed, 17 Dec 2014 19:16:58 +0000</resolved>
                                    <version>Lustre 2.7.0</version>
                                    <fixVersion>Lustre 2.7.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="92920" author="wangshilong" created="Mon, 1 Sep 2014 15:14:38 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#/c/11718/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/11718/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="92921" author="pjones" created="Mon, 1 Sep 2014 15:17:39 +0000"  >&lt;p&gt;Amir&lt;/p&gt;

&lt;p&gt;Could you please review this patch?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="93084" author="isaac" created="Wed, 3 Sep 2014 04:22:53 +0000"  >&lt;p&gt;The bug seemed to be introduced by commit 92c51841c50cc4061c20b277d3f7c4468f2a80cc. While the proposed patch fixed the symptom, it left the underlying API inconsistency unfixed: the lnet internal initialization APIs are somewhat transaction-like in that if a init function fails it cleanups itself so the caller don&apos;t have to call the corresponding fini function, e.g. if lnet_prepare() fails there&apos;s no need to call lnet_unprepare().&lt;/p&gt;

&lt;p&gt;But with 92c51841c50cc4061c20b277d3f7c4468f2a80cc, lnet_shutdown_lndnis() was removed from lnet_startup_lndnis(), so now the callers of lnet_startup_lndnis() will be responsible to clean up if lnet_startup_lndnis() has failed, which breaks the convention that init() functions clean up by themselves on failures. This kind of inconsistency will cause us troubles in the future. I&apos;d suggest to:&lt;br/&gt;
1. Move lnet_shutdown_lndnis() back to lnet_startup_lndnis() so that lnet_startup_lndnis() will cleanup itself.&lt;br/&gt;
2. Move the code in lnet_startup_lndnis() that starts a single NI into a new function e.g. startup_a_single_ni().&lt;br/&gt;
3. Make lnet_dyn_add_ni() call startup_a_single_ni() instead of lnet_startup_lndnis().&lt;/p&gt;</comment>
                            <comment id="95832" author="green" created="Tue, 7 Oct 2014 17:30:25 +0000"  >&lt;p&gt;Wang, do you have plans of addressing Isaac&apos;s points in your patch?&lt;/p&gt;</comment>
                            <comment id="95893" author="wangshilong" created="Wed, 8 Oct 2014 01:26:58 +0000"  >&lt;p&gt;Hi Oleg Drokin,&lt;/p&gt;

&lt;p&gt;Yeah, i did address lsaac&apos;s comment!&lt;/p&gt;</comment>
                            <comment id="96047" author="wangshilong" created="Thu, 9 Oct 2014 16:26:28 +0000"  >&lt;p&gt;Hi Isaac Huang,&lt;/p&gt;

&lt;p&gt;Thanks very much for your comments, could you take a look and give me some response&lt;br/&gt;
about last your comments:&lt;/p&gt;

&lt;p&gt;&quot;When we goto failed here, the ni is no longer on the nilist, then where is the ni going to be freed?&quot;&lt;/p&gt;

&lt;p&gt;My reply: lnet_shutdown_lndnis() could do that?&lt;/p&gt;

&lt;p&gt;btw, i am not sure why i reply comments at redmine, it did not give email or something...&lt;/p&gt;</comment>
                            <comment id="96236" author="isaac" created="Mon, 13 Oct 2014 17:25:00 +0000"  >&lt;p&gt;lnet_shutdown_lndnis() will not free the ni if the failure happened early, i.e. the ni hasn&apos;t been added to any global lists. For example, if it failed in the first few conditional checks in lnet_startup_lndni(),  lnet_shutdown_lndnis() will not be called at all.&lt;/p&gt;</comment>
                            <comment id="96246" author="isaac" created="Mon, 13 Oct 2014 18:07:45 +0000"  >&lt;p&gt;I think the fix should be considered together with &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5734&quot; title=&quot;LNet dynamic control: lnet_dyn_add_ni() can&amp;#39;t clean up failed NI in some cases&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5734&quot;&gt;&lt;del&gt;LU-5734&lt;/del&gt;&lt;/a&gt; - the two are closely related.&lt;/p&gt;</comment>
                            <comment id="96701" author="wangshilong" created="Mon, 20 Oct 2014 15:30:26 +0000"  >&lt;p&gt;Hi lsaac Huang,&lt;/p&gt;

&lt;p&gt;I am sorry for bothering you, could you please help review new version&lt;br/&gt;
and give me some feedbacks, thank you very much!&lt;/p&gt;


&lt;p&gt;Best regards,&lt;br/&gt;
Wang Shilong&lt;/p&gt;</comment>
                            <comment id="97925" author="pjones" created="Thu, 30 Oct 2014 12:13:12 +0000"  >&lt;p&gt;Landed for 2.7&lt;/p&gt;</comment>
                            <comment id="97955" author="pjones" created="Thu, 30 Oct 2014 17:03:19 +0000"  >&lt;p&gt;Patch reverted. Amir could you please look into this issue? Thanks&lt;/p&gt;</comment>
                            <comment id="99883" author="gerrit" created="Sun, 23 Nov 2014 07:01:38 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/12512/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/12512/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5568&quot; title=&quot;kernel crash when when network initialization failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5568&quot;&gt;&lt;del&gt;LU-5568&lt;/del&gt;&lt;/a&gt; lnet: fix kernel crash when network failed to start&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 66e9055b23433bd0aa8da5e49f3b665fb1b95532&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="26727">LU-5664</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="27506">LU-5884</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="15616">LU-2456</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="26986">LU-5734</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                    <customfield id="customfield_10030" key="com.atlassian.jira.plugin.system.customfieldtypes:labels">
                        <customfieldname>Epic/Theme</customfieldname>
                        <customfieldvalues>
                                        <label>lnet</label>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwv1j:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>15529</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>