<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:56:01 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5962] OSS crash on lnet_ptl_match_md() due to a null pointer because ptl-&gt;ptl_rotor is negatif</title>
                <link>https://jira.whamcloud.com/browse/LU-5962</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;During a restart (umount/mount) of OSTs, we meet an OSS crash &lt;br/&gt;
due to a null pointer :&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [&amp;lt;ffffffffa06b37c0&amp;gt;] lnet_ptl_match_md+0x250/0x870 [lnet]
PGD 0 Oops: 0000 [#1] SMP 
last sysfs file: /sys/devices/pci0000:00/0000:00:05.0/0000:05:00.1/host7/rport-7:0-0/target7:0:0/7:0:0:3/state
CPU 12 
Modules linked in: osp(U) ofd(U) lfsck(U) ost(U) mgc(U) fsfilt_ldiskfs(U) osd_ldiskfs(U) ldiskfs(U) lustre(U) lov(U) osc(U) mdc(U) lqu
ota(U) fid(U) fld(U) ko2iblnd(U) ptlrpc(U) obdclass(U) lnet(U) lvfs(U) libcfs(U) sha512_generic sha256_generic crc32c_intel nfs lockd 
fscache auth_rpcgss nfs_acl sunrpc ipmi_devintf cpufreq_ondemand acpi_cpufreq freq_table mperf rdma_ucm(U) rdma_cm(U) iw_cm(U) ib_addr
(U) ib_ipoib(U) ib_cm(U) ipv6 ib_uverbs(U) ib_umad(U) mlx4_ib(U) ib_sa(U) ib_mad(U) ib_core(U) mlx4_core(U) dm_round_robin scsi_dh_rda
c dm_multipath uinput sg lpc_ich mfd_core ioatdma compat(U) igb dca i2c_algo_bit i2c_core ptp pps_core lpfc scsi_transport_fc scsi_tgt
 ext4 jbd2 mbcache sd_mod crc_t10dif ahci dm_mirror dm_region_hash dm_log dm_mod megaraid_sas [last unloaded: libcfs]

Pid: 25204, comm: kiblnd_sd_00_01 Tainted: G        W  ---------------    2.6.32-431.23.3.el6.Bull.56.x86_64 #1 BULL bullx super-node
RIP: 0010:[&amp;lt;ffffffffa06b37c0&amp;gt;]  [&amp;lt;ffffffffa06b37c0&amp;gt;] lnet_ptl_match_md+0x250/0x870 [lnet]
RSP: 0018:ffff880c70589bf0  EFLAGS: 00010287
RAX: ffffffffd4888cbe RBX: ffff880c70589cf0 RCX: 00000000d4888cbd
RDX: fffffffffffffffe RSI: ffff880c5787b7d0 RDI: 0000000000000003
RBP: ffff880c70589c70 R08: 8980000000000000 R09: 4c00000000000000
R10: 000000000000002c R11: 0000000000000012 R12: ffff880434b24000
R13: ffff880c40941f40 R14: ffff880c40941f40 R15: 0000000000000000
FS:  0000000000000000(0000) GS:ffff8800282c0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000000000000000 CR3: 0000000001a85000 CR4: 00000000000007e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process kiblnd_sd_00_01 (pid: 25204, threadinfo ffff880c70588000, task ffff880c77ac9500)
Stack:
 ffff8803ab5eb278 ffff880c70589cb8 ffff8803ab5eb140 ffff8804d4888cbd
&amp;lt;d&amp;gt; ffff880c70589c70 ffffffffa06c5c36 ffff880c70589c70 0000000000000246
&amp;lt;d&amp;gt; ffff880c70589c70 ffff8803c94d1580 0000000000000000 ffff880434b24000
Call Trace:
 [&amp;lt;ffffffffa06bb05b&amp;gt;] lnet_parse+0xb9b/0x18c0 [lnet]
 [&amp;lt;ffffffffa08947fb&amp;gt;] kiblnd_handle_rx+0x2cb/0x640 [ko2iblnd]
 [&amp;lt;ffffffffa08954e3&amp;gt;] kiblnd_rx_complete+0x2d3/0x420 [ko2iblnd]
 [&amp;lt;ffffffffa0895692&amp;gt;] kiblnd_complete+0x62/0xe0 [ko2iblnd]
 [&amp;lt;ffffffffa0895a4a&amp;gt;] kiblnd_scheduler+0x33a/0x7b0 [ko2iblnd]
 [&amp;lt;ffffffff81099f56&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
Code: 00 00 00 48 8b 5d d8 4c 8b 65 e0 4c 8b 6d e8 4c 8b 75 f0 4c 8b 7d f8 c9 c3 66 90 49 8b 45 30 4c 8b 38 4d 85 ff 0f 84 39 fe ff ff
 &amp;lt;41&amp;gt; 8b 37 48 8b 3d c6 62 02 00 e8 01 6d f9 ff 8b 0d 77 64 02 00 

crash&amp;gt; sys
      KERNEL: /usr/lib/debug/lib/modules/2.6.32-431.23.3.el6.Bull.56.x86_64/vmlinux
    DUMPFILE: vmcore  [PARTIAL DUMP]
        CPUS: 32
        DATE: Mon Nov  3 17:30:13 2014
      UPTIME: 28 days, 01:29:06
LOAD AVERAGE: 75.19, 18.37, 8.71
       TASKS: 2258
    NODENAME: bigfoot27
     RELEASE: 2.6.32-431.23.3.el6.Bull.56.x86_64
     VERSION: #1 SMP Thu Jul 31 16:27:31 CEST 2014
     MACHINE: x86_64  (2266 Mhz)
      MEMORY: 64 GB
       PANIC: &quot;Oops: 0000 [#1] SMP &quot; (check log for details)
crash&amp;gt; 
crash&amp;gt; bt
PID: 25204  TASK: ffff880c77ac9500  CPU: 12  COMMAND: &quot;kiblnd_sd_00_01&quot;
 #0 [ffff880c705897e0] machine_kexec at ffffffff8103914b
 #1 [ffff880c70589840] crash_kexec at ffffffff810c6042
 #2 [ffff880c70589910] oops_end at ffffffff8152d9d0
 #3 [ffff880c70589940] no_context at ffffffff8104a19b
 #4 [ffff880c70589990] __bad_area_nosemaphore at ffffffff8104a425
 #5 [ffff880c705899e0] bad_area_nosemaphore at ffffffff8104a4f3
 #6 [ffff880c705899f0] __do_page_fault at ffffffff8104ac4f
 #7 [ffff880c70589b10] do_page_fault at ffffffff8152f91e
 #8 [ffff880c70589b40] page_fault at ffffffff8152ccd5
    [exception RIP: lnet_ptl_match_md+592]
    RIP: ffffffffa06b37c0  RSP: ffff880c70589bf0  RFLAGS: 00010287
    RAX: ffffffffd4888cbe  RBX: ffff880c70589cf0  RCX: 00000000d4888cbd
    RDX: fffffffffffffffe  RSI: ffff880c5787b7d0  RDI: 0000000000000003
    RBP: ffff880c70589c70   R8: 8980000000000000   R9: 4c00000000000000
    R10: 000000000000002c  R11: 0000000000000012  R12: ffff880434b24000
    R13: ffff880c40941f40  R14: ffff880c40941f40  R15: 0000000000000000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #9 [ffff880c70589c78] lnet_parse at ffffffffa06bb05b [lnet]
#10 [ffff880c70589d58] kiblnd_handle_rx at ffffffffa08947fb [ko2iblnd]
#11 [ffff880c70589da8] kiblnd_rx_complete at ffffffffa08954e3 [ko2iblnd]
#12 [ffff880c70589df8] kiblnd_complete at ffffffffa0895692 [ko2iblnd]
#13 [ffff880c70589e08] kiblnd_scheduler at ffffffffa0895a4a [ko2iblnd]
#14 [ffff880c70589ee8] kthread at ffffffff81099f56
#15 [ffff880c70589f48] kernel_thread at ffffffff8100c20a
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;We can find the ptl variable use by the function lnet_ptl_match_md()&lt;br/&gt;
and the crash occur because  ptl_rotor is negatif&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; struct lnet_portal 0xffff880c40941f40
struct lnet_portal {
  ptl_lock = {
    raw_lock = {
      slock = 409081954
    }
  }, 
  ptl_index = 28, 
  ptl_options = 5, 
  ptl_msg_stealing = {
    next = 0xffff880c40941f50, 
    prev = 0xffff880c40941f50
  }, 
  ptl_msg_delayed = {
    next = 0xffff880c40941f60, 
    prev = 0xffff880c40941f60
  }, 
  ptl_mtables = 0xffff880c5787b7d0, 
  ptl_rotor = -729246580, 
  ptl_mt_nmaps = 4, 
  ptl_mt_maps = 0xffff880c40941f80
}

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Proposal fix :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Nov-04 15:11:34 [root@lascaux0 lustre-2.5.3] # diff -up lnet/lnet/lib-ptl.c lnet/lnet/lib-ptl.c.apr
--- lnet/lnet/lib-ptl.c 2014-09-11 18:04:07.000000000 +0200
+++ lnet/lnet/lib-ptl.c.apr     2014-11-04 15:11:34.935503533 +0100
@@ -773,6 +773,7 @@ lnet_ptl_setup(struct lnet_portal *ptl,
        }

        ptl-&amp;gt;ptl_index = index;
+        ptl-&amp;gt;ptl_rotor = 0;
        CFS_INIT_LIST_HEAD(&amp;amp;ptl-&amp;gt;ptl_msg_delayed);
        CFS_INIT_LIST_HEAD(&amp;amp;ptl-&amp;gt;ptl_msg_stealing);
 #ifdef __KERNEL__
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I put my full analyze trace in attachment&lt;/p&gt;
</description>
                <environment>kernel 2.6.32-431.23.3 + bull fix&lt;br/&gt;
lustre 2.5.3 + bull fix </environment>
        <key id="27741">LU-5962</key>
            <summary>OSS crash on lnet_ptl_match_md() due to a null pointer because ptl-&gt;ptl_rotor is negatif</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="bfaccini">Bruno Faccini</assignee>
                                    <reporter username="apercher">Antoine Percher</reporter>
                        <labels>
                    </labels>
                <created>Fri, 28 Nov 2014 13:53:20 +0000</created>
                <updated>Tue, 9 Oct 2018 17:14:28 +0000</updated>
                            <resolved>Tue, 9 Oct 2018 17:14:28 +0000</resolved>
                                    <version>Lustre 2.5.3</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="100224" author="bfaccini" created="Fri, 28 Nov 2014 14:01:07 +0000"  >&lt;p&gt;Nice catch antoine !!&lt;br/&gt;
Can you push your patch to our git&apos;s master branch and add its reference/link to this ticket ??&lt;/p&gt;</comment>
                            <comment id="100225" author="bfaccini" created="Fri, 28 Nov 2014 14:10:58 +0000"  >&lt;p&gt;Hummm, in fact having a look to current master tree this seem to have been fixed by changing ptl_rotor as un &quot;unsigned int&quot; by Gerrit change 11936, with Commit e50fc41ad6383e31b896a9306307f10ced1b3de2, for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5639&quot; title=&quot;Message is hashed to invalid match-table of LNet request portal&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5639&quot;&gt;&lt;del&gt;LU-5639&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;So you may only need a back-port of this change for b2_5.&lt;/p&gt;</comment>
                            <comment id="100226" author="pjones" created="Fri, 28 Nov 2014 14:15:30 +0000"  >&lt;p&gt;If that is the case then we can probably just close this as a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5639&quot; title=&quot;Message is hashed to invalid match-table of LNet request portal&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5639&quot;&gt;&lt;del&gt;LU-5639&lt;/del&gt;&lt;/a&gt;. We have already back ported &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5639&quot; title=&quot;Message is hashed to invalid match-table of LNet request portal&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5639&quot;&gt;&lt;del&gt;LU-5639&lt;/del&gt;&lt;/a&gt; and intend to include it in 2.5.4&lt;/p&gt;</comment>
                            <comment id="100227" author="pjones" created="Fri, 28 Nov 2014 14:15:36 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#/c/11999/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/11999/&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="26635">LU-5639</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="16516" name="trace_debug_bigfoot27_lnet_ptl_match_md.txt" size="1137745" author="apercher" created="Fri, 28 Nov 2014 13:53:20 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10490" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>End date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Fri, 28 Nov 2014 13:53:20 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10030" key="com.atlassian.jira.plugin.system.customfieldtypes:labels">
                        <customfieldname>Epic/Theme</customfieldname>
                        <customfieldvalues>
                                        <label>mount</label>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzx1nb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>16654</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10493" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>Start date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Fri, 28 Nov 2014 13:53:20 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    </customfields>
    </item>
</channel>
</rss>