<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:53:28 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-12537] recovery-mds-scale failover_mds: BUG: unable to handle kernel NULL pointer dereference at (null) IP: lnet_attach_rsp_tracker.isra.32+0xad/0x200 [lnet]</title>
                <link>https://jira.whamcloud.com/browse/LU-12537</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;A race of sorts when starting LNet. The discovery thread is started before the monitor thread, so we may have PUT/GET issued before the monitor thread has initialized its data structures. Results in oops:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[  297.984001] BUG: unable to handle kernel NULL pointer dereference at           (null)
[  297.987525] IP: [&amp;lt;ffffffffc07db70d&amp;gt;] lnet_attach_rsp_tracker.isra.32+0xad/0x200 [lnet]
[  297.990999] PGD 0 
[  297.991954] Oops: 0000 [#1] SMP 
[  297.993493] Modules linked in: ksocklnd(OE) ptlrpc(OE+) obdclass(OE) lnet(OE) libcfs(OE) crc_t10dif crct10dif_generic crct10dif_common dm_mod rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache rpcrdma(OE) xprtrdma(OE) ib_isert(OE) ib_iser(OE) ib_srpt(OE) ib_srp(OE) ib_ipoib(OE) rdma_ucm(OE) mlx5_ib(OE) ib_ucm(OE) ib_uverbs(OE) ib_umad(OE) mlx5_core(OE) rdma_cm(OE) ib_cm(OE) iw_cm(OE) mlx4_ib(OE) ib_core(OE) mlx4_en(OE) ptp pps_core mlx4_core(OE) mlx_compat(OE) devlink cirrus ttm drm_kms_helper syscopyarea ppdev sysfillrect sysimgblt fb_sys_fops drm i2c_piix4 pcspkr joydev i2c_core virtio_balloon parport_pc parport nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables ext4 mbcache jbd2 ata_generic pata_acpi virtio_net virtio_blk ata_piix libata serio_raw virtio_pci virtio_ring virtio floppy
[  298.031286] CPU: 1 PID: 11179 Comm: lnet_discovery Tainted: G           OE  ------------   3.10.0-693.21.1.x3.2.152.x86_64 #1
[  298.038606] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[  298.042341] task: ffff8800b897cf10 ti: ffff880134cf8000 task.ti: ffff880134cf8000
[  298.047182] RIP: 0010:[&amp;lt;ffffffffc07db70d&amp;gt;]  [&amp;lt;ffffffffc07db70d&amp;gt;] lnet_attach_rsp_tracker.isra.32+0xad/0x200 [lnet]
[  298.053925] RSP: 0018:ffff880134cfbcc0  EFLAGS: 00010296
[  298.057366] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000017
[  298.061963] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8800b8b6df80
[  298.066598] RBP: ffff880134cfbcf0 R08: 00000000c565cd78 R09: 0000000000000000
[  298.071218] R10: 0000000000000009 R11: fffffffffffffff0 R12: ffff8800b8b6df80
[  298.075816] R13: ffff8800b8b6df80 R14: 0000004787d10555 R15: ffff880136265060
[  298.080443] FS:  0000000000000000(0000) GS:ffff88013fd00000(0000) knlGS:0000000000000000
[  298.085693] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[  298.089432] CR2: 0000000000000000 CR3: 00000000359c2000 CR4: 00000000000006e0
[  298.094034] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  298.098707] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  298.103328] Call Trace:
[  298.104940]  [&amp;lt;ffffffffc07e2751&amp;gt;] LNetGet+0x5d1/0xa80 [lnet]
[  298.108750]  [&amp;lt;ffffffffc07e2e05&amp;gt;] lnet_send_ping+0x1a5/0x1d0 [lnet]
[  298.112811]  [&amp;lt;ffffffffc07f24e8&amp;gt;] lnet_peer_send_ping+0xa8/0x250 [lnet]
[  298.117082]  [&amp;lt;ffffffffc07f7a20&amp;gt;] lnet_peer_discovery+0x800/0x11a0 [lnet]
[  298.121481]  [&amp;lt;ffffffff810b4fc0&amp;gt;] ? wake_up_atomic_t+0x30/0x30
[  298.123330]  [&amp;lt;ffffffffc07f7220&amp;gt;] ? lnet_peer_merge_data+0xf90/0xf90 [lnet]
[  298.126429]  [&amp;lt;ffffffff810b4031&amp;gt;] kthread+0xd1/0xe0
[  298.129468]  [&amp;lt;ffffffff810c28a7&amp;gt;] ? finish_task_switch+0x57/0x170
[  298.135321]  [&amp;lt;ffffffff810b3f60&amp;gt;] ? insert_kthread_work+0x40/0x40
[  298.141100]  [&amp;lt;ffffffff816c4577&amp;gt;] ret_from_fork+0x77/0xb0
[  298.146445]  [&amp;lt;ffffffff810b3f60&amp;gt;] ? insert_kthread_work+0x40/0x40
[  298.152179] Code: 03 00 e8 17 b2 f7 ff 4d 3b 24 24 74 11 4c 89 e7 e8 79 89 b6 c0 4d 89 24 24 4d 89 64 24 08 48 8b 05 01 8b 03 00 48 63 d3 4c 89 e7 &amp;lt;48&amp;gt; 8b 14 d0 48 8b 72 08 e8 96 88 b6 c0 48 8b 3d 9f 88 03 00 89 
[  298.173797] RIP  [&amp;lt;ffffffffc07db70d&amp;gt;] lnet_attach_rsp_tracker.isra.32+0xad/0x200 [lnet]
[  298.182665]  RSP &amp;lt;ffff880134cfbcc0&amp;gt;
[  298.187675] CR2: 0000000000000000
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The oops was here:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;/usr/src/debug/lustre-2.12.0.1_cray_136_g0e9df14/lnet/lnet/lib-move.c: 4623
0xffffffffc07d670d &amp;lt;lnet_attach_rsp_tracker+173&amp;gt;:   mov    (%rax,%rdx,8),%rdx&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Which is:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;list_add_tail(&amp;amp;local_rspt-&amp;gt;rspt_on_list, the_lnet.ln_mt_rstq[cpt]);&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And...:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash_x86_64&amp;gt; the_lnet | grep ln_mt_rstq
  ln_mt_rstq = 0x0,
crash_x86_64&amp;gt;&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt; rc = lnet_peer_discovery_start();
        if (rc != 0)
                goto err_destroy_push_target;
 
        rc = lnet_monitor_thr_start();
        if (rc != 0)
                goto err_stop_discovery_thr;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash_x86_64&amp;gt; ps | grep &apos;&amp;gt;&apos;
&amp;gt; 11163  11162   1  ffff880134cd8fd0  RU   0.0   18876   1032  modprobe
&amp;gt; 11176      2   0  ffff880135efcf10  RU   0.0       0      0  [lnet_discovery]
crash_x86_64&amp;gt;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="56390">LU-12537</key>
            <summary>recovery-mds-scale failover_mds: BUG: unable to handle kernel NULL pointer dereference at (null) IP: lnet_attach_rsp_tracker.isra.32+0xad/0x200 [lnet]</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="hornc">Chris Horn</assignee>
                                    <reporter username="hornc">Chris Horn</reporter>
                        <labels>
                    </labels>
                <created>Thu, 11 Jul 2019 21:46:49 +0000</created>
                <updated>Fri, 9 Aug 2019 15:07:14 +0000</updated>
                            <resolved>Fri, 9 Aug 2019 15:07:14 +0000</resolved>
                                    <version>Lustre 2.13.0</version>
                                    <fixVersion>Lustre 2.13.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="251145" author="pfarrell" created="Thu, 11 Jul 2019 21:58:03 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=ashehata&quot; class=&quot;user-hover&quot; rel=&quot;ashehata&quot;&gt;ashehata&lt;/a&gt;, I thought I saw you talking about a similar race recently?&lt;/p&gt;</comment>
                            <comment id="251146" author="gerrit" created="Thu, 11 Jul 2019 21:58:47 +0000"  >&lt;p&gt;Chris Horn (hornc@cray.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/35478&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/35478&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12537&quot; title=&quot;recovery-mds-scale failover_mds: BUG: unable to handle kernel NULL pointer dereference at (null) IP: lnet_attach_rsp_tracker.isra.32+0xad/0x200 [lnet]&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12537&quot;&gt;&lt;del&gt;LU-12537&lt;/del&gt;&lt;/a&gt; lnet: Have discovery wait for monitor thread&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: d74ab549081f634b671dc920194abe6886b89e11&lt;/p&gt;</comment>
                            <comment id="251151" author="ashehata" created="Thu, 11 Jul 2019 22:18:35 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=pfarrell&quot; class=&quot;user-hover&quot; rel=&quot;pfarrell&quot;&gt;pfarrell&lt;/a&gt;, this is a bug that Chris ran into in his testing. Not sure&#160; which race you&apos;re referring to.&lt;/p&gt;</comment>
                            <comment id="251152" author="pfarrell" created="Thu, 11 Jul 2019 22:22:44 +0000"  >&lt;p&gt;I am probably just confused - Sounded to me like something I had seen.&lt;/p&gt;</comment>
                            <comment id="252826" author="gerrit" created="Fri, 9 Aug 2019 04:39:40 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/35478/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/35478/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12537&quot; title=&quot;recovery-mds-scale failover_mds: BUG: unable to handle kernel NULL pointer dereference at (null) IP: lnet_attach_rsp_tracker.isra.32+0xad/0x200 [lnet]&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12537&quot;&gt;&lt;del&gt;LU-12537&lt;/del&gt;&lt;/a&gt; lnet: Sync the start of discovery and monitor threads&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 9283e2ed6655e89fe693d35313c9dcf1d5a6703a&lt;/p&gt;</comment>
                            <comment id="252875" author="pjones" created="Fri, 9 Aug 2019 15:07:14 +0000"  >&lt;p&gt;Landed for 2.13&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00jjz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>