<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:11:38 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-14655] BUG: unable to handle kernel NULL pointer dereference at 0000000000000058</title>
                <link>https://jira.whamcloud.com/browse/LU-14655</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Oops in lnet_health_check():&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2021-04-21 21:34:39 [722035.308494] BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
2021-04-21 21:34:39 [722035.317682] IP: [&amp;lt;ffffffffc0ac4f26&amp;gt;] lnet_finalize+0x1d6/0xf30 [lnet]
2021-04-21 21:34:39 [722035.325348] PGD 0 
2021-04-21 21:34:39 [722035.328535] Oops: 0000 [#1] SMP 
...
2021-04-21 21:34:40 [722035.467706] CPU: 12 PID: 88725 Comm: socknal_sd01_02 Kdump: loaded Tainted: P        W  OE  ------------   3.10.0-957.1.3957.1.3.x4.3.20.x86_64 #1
2021-04-21 21:34:40 [722035.483099] Hardware name: Viking Enterprise Solutions VSSEP1EA/VSSEP1EA, BIOS 10.06 05/26/2020
2021-04-21 21:34:40 [722035.492985] task: ffff89a20c35c100 ti: ffff89a21da40000 task.ti: ffff89a21da40000
2021-04-21 21:34:40 [722035.501649] RIP: 0010:[&amp;lt;ffffffffc0ac4f26&amp;gt;]  [&amp;lt;ffffffffc0ac4f26&amp;gt;] lnet_finalize+0x1d6/0xf30 [lnet]
2021-04-21 21:34:40 [722035.511741] RSP: 0018:ffff89a21da43d60  EFLAGS: 00010286
2021-04-21 21:34:40 [722035.518219] RAX: 0000000000000000 RBX: ffff89a8b4961658 RCX: 0000000000000001
2021-04-21 21:34:40 [722035.526516] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
2021-04-21 21:34:40 [722035.534797] RBP: ffff89a21da43dc8 R08: 000000000001f120 R09: ffffffffc0b4f727
2021-04-21 21:34:40 [722035.543070] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000
2021-04-21 21:34:40 [722035.551335] R13: 0000000000000000 R14: ffff89b1c6be9a00 R15: ffff89d1fafbfe00
2021-04-21 21:34:40 [722035.559580] FS:  0000000000000000(0000) GS:ffff89b22ef00000(0000) knlGS:0000000000000000
2021-04-21 21:34:40 [722035.568779] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
2021-04-21 21:34:40 [722035.575620] CR2: 0000000000000058 CR3: 0000003016f88000 CR4: 0000000000340fe0
2021-04-21 21:34:40 [722035.583846] Call Trace:
2021-04-21 21:34:40 [722035.587383]  [&amp;lt;ffffffffc0b4f8fe&amp;gt;] ksocknal_tx_done+0x9e/0x1f0 [ksocklnd]
2021-04-21 21:34:40 [722035.595157]  [&amp;lt;ffffffffc0b54930&amp;gt;] ksocknal_scheduler+0x350/0xd50 [ksocklnd]
2021-04-21 21:34:40 [722035.603180]  [&amp;lt;ffffffffb1ac3050&amp;gt;] ? wake_up_atomic_t+0x30/0x30
2021-04-21 21:34:40 [722035.610054]  [&amp;lt;ffffffffc0b545e0&amp;gt;] ? ksocknal_recv+0x2a0/0x2a0 [ksocklnd]
2021-04-21 21:34:40 [722035.617784]  [&amp;lt;ffffffffb1ac1f81&amp;gt;] kthread+0xd1/0xe0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Analysis from &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=c17819&quot; class=&quot;user-hover&quot; rel=&quot;c17819&quot;&gt;c17819&lt;/a&gt;&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;0xffffffffc0ac4eca &amp;lt;lnet_finalize+378&amp;gt;: mov &#160;&#160;&#160;0x4426f(%rip),%rcx &#160;&#160;&#160;&#160;&#160;&#160;&#160;# 0xffffffffc0b09140 &amp;lt;the_lnet+352&amp;gt; 
0xffffffffc0ac4ed1 &amp;lt;lnet_finalize+385&amp;gt;: mov &#160;&#160;&#160;0x80(%rbx),%r14 
0xffffffffc0ac4ed8 &amp;lt;lnet_finalize+392&amp;gt;: mov &#160;&#160;&#160;0xa0(%rbx),%r15 
0xffffffffc0ac4edf &amp;lt;lnet_finalize+399&amp;gt;: cmovne 0x78(%rbx),%r14 
0xffffffffc0ac4ee4 &amp;lt;lnet_finalize+404&amp;gt;: cmovne 0x98(%rbx),%r15 
0xffffffffc0ac4eec &amp;lt;lnet_finalize+412&amp;gt;: cmp &#160;&#160;&#160;$0x1,%al 
0xffffffffc0ac4eee &amp;lt;lnet_finalize+414&amp;gt;: sbb &#160;&#160;&#160;%eax,%eax 
0xffffffffc0ac4ef0 &amp;lt;lnet_finalize+416&amp;gt;: mov &#160;&#160;&#160;%eax,0x30(%rsp) 
0xffffffffc0ac4ef4 &amp;lt;lnet_finalize+420&amp;gt;: addb &#160;&#160;$0x1,0x30(%rsp) 
0xffffffffc0ac4ef9 &amp;lt;lnet_finalize+425&amp;gt;: xor &#160;&#160;&#160;%eax,%eax 
0xffffffffc0ac4efb &amp;lt;lnet_finalize+427&amp;gt;: xor &#160;&#160;&#160;$0x1,%r11d 
0xffffffffc0ac4eff &amp;lt;lnet_finalize+431&amp;gt;: cmpl &#160;&#160;$0x3,(%rcx) 
0xffffffffc0ac4f02 &amp;lt;lnet_finalize+434&amp;gt;: movzbl 0x30(%rsp),%ecx 
0xffffffffc0ac4f07 &amp;lt;lnet_finalize+439&amp;gt;: mov &#160;&#160;&#160;%eax,%edi 
0xffffffffc0ac4f09 &amp;lt;lnet_finalize+441&amp;gt;: cmovge %r11d,%edi 
0xffffffffc0ac4f0d &amp;lt;lnet_finalize+445&amp;gt;: cmovge %ecx,%eax 
0xffffffffc0ac4f10 &amp;lt;lnet_finalize+448&amp;gt;: test &#160;&#160;%r14,%r14 
0xffffffffc0ac4f13 &amp;lt;lnet_finalize+451&amp;gt;: mov &#160;&#160;&#160;%dil,0x2c(%rsp) 
0xffffffffc0ac4f18 &amp;lt;lnet_finalize+456&amp;gt;: mov &#160;&#160;&#160;%al,0x2b(%rsp) 
0xffffffffc0ac4f1c &amp;lt;lnet_finalize+460&amp;gt;: je &#160;&#160;&#160;&#160;0xffffffffc0ac4f30 &amp;lt;lnet_finalize+480&amp;gt; 
0xffffffffc0ac4f1e &amp;lt;lnet_finalize+462&amp;gt;: mov &#160;&#160;&#160;0x50(%r14),%rax 
0xffffffffc0ac4f22 &amp;lt;lnet_finalize+466&amp;gt;: mov &#160;&#160;&#160;0x20(%rax),%rax 
0xffffffffc0ac4f26 &amp;lt;lnet_finalize+470&amp;gt;: cmpl &#160;&#160;$0x1,0x58(%rax)                                  &amp;lt;==== crash
0xffffffffc0ac4f2a &amp;lt;lnet_finalize+474&amp;gt;: jle &#160;&#160;&#160;0xffffffffc0ac5879 &amp;lt;lnet_finalize+2857&amp;gt; 
0xffffffffc0ac4f30 &amp;lt;lnet_finalize+480&amp;gt;: test &#160;&#160;%dl,%dl 
0xffffffffc0ac4f32 &amp;lt;lnet_finalize+482&amp;gt;: jne &#160;&#160;&#160;0xffffffffc0ac5675 &amp;lt;lnet_finalize+2341&amp;gt; 
0xffffffffc0ac4f38 &amp;lt;lnet_finalize+488&amp;gt;: test &#160;&#160;%r14,%r14 
0xffffffffc0ac4f3b &amp;lt;lnet_finalize+491&amp;gt;: je &#160;&#160;&#160;&#160;0xffffffffc0ac5b4d &amp;lt;lnet_finalize+3581&amp;gt; 
0xffffffffc0ac4f41 &amp;lt;lnet_finalize+497&amp;gt;: test &#160;&#160;%r15,%r15 
0xffffffffc0ac4f44 &amp;lt;lnet_finalize+500&amp;gt;: je &#160;&#160;&#160;&#160;0xffffffffc0ac5b4d &amp;lt;lnet_finalize+3581&amp;gt; 
0xffffffffc0ac4f4a &amp;lt;lnet_finalize+506&amp;gt;: testb &#160;$0x2,-0x4f7f8(%rip) &#160;&#160;&#160;&#160;&#160;&#160;&#160;# 0xffffffffc0a75759 &amp;lt;libcfs_debug+1&amp;gt; 
0xffffffffc0ac4f51 &amp;lt;lnet_finalize+513&amp;gt;: je &#160;&#160;&#160;&#160;0xffffffffc0ac4f60 &amp;lt;lnet_finalize+528&amp;gt; 
0xffffffffc0ac4f53 &amp;lt;lnet_finalize+515&amp;gt;: testb &#160;$0x4,-0x4f7fd(%rip) &#160;&#160;&#160;&#160;&#160;&#160;&#160;# 0xffffffffc0a7575d &amp;lt;libcfs_subsystem_debug+1&amp;gt; 
0xffffffffc0ac4f5a &amp;lt;lnet_finalize+522&amp;gt;: jne &#160;&#160;&#160;0xffffffffc0ac5891 &amp;lt;lnet_finalize+2881&amp;gt; 
0xffffffffc0ac4f60 &amp;lt;lnet_finalize+528&amp;gt;: mov &#160;&#160;&#160;0x38(%rsp),%r8d 
0xffffffffc0ac4f65 &amp;lt;lnet_finalize+533&amp;gt;: test &#160;&#160;%r8d,%r8d 
0xffffffffc0ac4f68 &amp;lt;lnet_finalize+536&amp;gt;: jne &#160;&#160;&#160;0xffffffffc0ac56b0 &amp;lt;lnet_finalize+2400&amp;gt; 
0xffffffffc0ac4f6e &amp;lt;lnet_finalize+542&amp;gt;: mov &#160;&#160;&#160;0xf8(%r15),%ecx 
0xffffffffc0ac4f75 &amp;lt;lnet_finalize+549&amp;gt;: mov &#160;&#160;&#160;0x4402d(%rip),%edi &#160;&#160;&#160;&#160;&#160;&#160;&#160;# 0xffffffffc0b08fa8 &amp;lt;lnet_health_sensitivity&amp;gt; 
0xffffffffc0ac4f7b &amp;lt;lnet_finalize+555&amp;gt;: lea &#160;&#160;&#160;0xf8(%r15),%r10 
0xffffffffc0ac4f82 &amp;lt;lnet_finalize+562&amp;gt;: mov &#160;&#160;&#160;$0x3e8,%esi 
0xffffffffc0ac4f87 &amp;lt;lnet_finalize+567&amp;gt;: cmp &#160;&#160;&#160;$0x3e8,%ecx
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; lnet_peer_ni.lpni_peer_net -x
struct lnet_peer_ni {
   [0x50] struct lnet_peer_net *lpni_peer_net;
}
crash&amp;gt; struct lnet_peer.lp_nnis -x
struct lnet_peer {
   [0x58] int lp_nnis;
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;static int
lnet_health_check(struct lnet_msg *msg)
{
...
	if (the_lnet.ln_ping_target-&amp;gt;pb_nnis &amp;lt;= 2) {
		handle_local_health = false;
		attempt_local_resend = false;
	}

	/* For remote failures, health/recovery/resends are not needed if the
	 * peer only has a single interface. Special case for routers where we
	 * rely on health feature to manage route aliveness. NB: unlike pb_nnis
	 * above, lp_nnis does _not_ include the lolnd, so a single-rail node
	 * would have lp_nnis == 1.
	 */
	if (lpni &amp;amp;&amp;amp; lpni-&amp;gt;lpni_peer_net-&amp;gt;lpn_peer-&amp;gt;lp_nnis &amp;lt;= 1) {                     &amp;lt;==== crash
		attempt_remote_resend = false;
		if (!lnet_isrouter(lpni))
			handle_remote_health = false;
	}

	if (!lo)
		LASSERT(ni &amp;amp;&amp;amp; lpni);
	else
		LASSERT(ni);

	CDEBUG(D_NET, &quot;health check: %s-&amp;gt;%s: %s: %s\n&quot;,
	       libcfs_nid2str(ni-&amp;gt;ni_nid),
	       (lo) ? &quot;self&quot; : libcfs_nid2str(lpni-&amp;gt;lpni_nid),
	       lnet_msgtyp2str(msg-&amp;gt;msg_type),
	       lnet_health_error2str(hstatus));
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;however lpni-&amp;gt;lpni_peer_net-&amp;gt;lpn_peer isn&apos;t 0 at the moment of the crash dump :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; lnet_peer_ni.lpni_peer_net ffff89b1c6be9a00
  lpni_peer_net = 0xffff89a22d2f4180
crash&amp;gt; lnet_peer_net.lpn_peer 0xffff89a22d2f4180
  lpn_peer = 0xffff89cbbed23c00
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Buggy code introduced by &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13501&quot; title=&quot;Disable health on single-rail deployments&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13501&quot;&gt;&lt;del&gt;LU-13501&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</description>
                <environment></environment>
        <key id="63987">LU-14655</key>
            <summary>BUG: unable to handle kernel NULL pointer dereference at 0000000000000058</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="hornc">Chris Horn</assignee>
                                    <reporter username="hornc">Chris Horn</reporter>
                        <labels>
                    </labels>
                <created>Thu, 29 Apr 2021 19:47:06 +0000</created>
                <updated>Fri, 28 Jan 2022 23:56:26 +0000</updated>
                            <resolved>Thu, 29 Jul 2021 11:28:49 +0000</resolved>
                                                    <fixVersion>Lustre 2.15.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>2</watches>
                                                                            <comments>
                            <comment id="300150" author="gerrit" created="Thu, 29 Apr 2021 19:50:28 +0000"  >&lt;p&gt;Chris Horn (chris.horn@hpe.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/43503&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/43503&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14655&quot; title=&quot;BUG: unable to handle kernel NULL pointer dereference at 0000000000000058&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14655&quot;&gt;&lt;del&gt;LU-14655&lt;/del&gt;&lt;/a&gt; lnet: Protect lpni deref in lnet_health_check&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 4a1e5007f89e6aaff4e87f0686a49d989d28969d&lt;/p&gt;</comment>
                            <comment id="308576" author="gerrit" created="Tue, 27 Jul 2021 21:36:00 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/43503/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/43503/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14655&quot; title=&quot;BUG: unable to handle kernel NULL pointer dereference at 0000000000000058&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14655&quot;&gt;&lt;del&gt;LU-14655&lt;/del&gt;&lt;/a&gt; lnet: Protect lpni deref in lnet_health_check&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: d87af24452a2e883b0e7400661a5b768c35088b1&lt;/p&gt;</comment>
                            <comment id="308754" author="pjones" created="Thu, 29 Jul 2021 11:28:49 +0000"  >&lt;p&gt;Landed for 2.15&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i01tev:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>