<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:50:06 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-12152] lnetctl export corrupts memory on routers</title>
                <link>https://jira.whamcloud.com/browse/LU-12152</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Reproducer:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@cent75build01 ~]# cat /etc/modprobe.d/lnet.conf
options lnet networks=tcp
options lnet forwarding=enabled
[root@cent75build01 ~]# modprobe lnet
[root@cent75build01 ~]# lctl net up
LNET configured
[root@cent75build01 ~]# while true; do lnetctl export &amp;gt; /dev/null; echo &quot;still alive&quot;; done
still alive
still alive
still alive
still alive
still alive
still alive
still alive
Write failed: Broken pipe
[root@control01 ~]# ssh cent75build01
Last login: Tue Apr  2 22:01:13 2019 from 192.168.1.10
[root@cent75build01 ~]# cd /var/crash/127.0.0.1-2019-04-02-22:02:31
[root@cent75build01 127.0.0.1-2019-04-02-22:02:31]# tail --lines 36 vmcore-dmesg.txt
[  156.209529] BUG: unable to handle kernel paging request at 0000007a00000002
[  156.209598] IP: [&amp;lt;ffffffffa53fae34&amp;gt;] kmem_cache_alloc+0x74/0x1f0
[  156.209648] PGD 800000081d0c4067 PUD 0
[  156.209672] Oops: 0000 [#1] SMP
[  156.209695] Modules linked in: ksocklnd(OE) lnet(OE) libcfs(OE) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_uverbs(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) ptp pps_core mlx4_ib(OE) ib_core(OE) mlx4_core(OE) mlx_compat(OE) devlink sb_edac coretemp iosf_mbi crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd ppdev vmw_balloon pcspkr joydev sg vmw_vmci parport_pc parport shpchp i2c_piix4 binfmt_misc ip_tables ext4 mbcache jbd2 sr_mod sd_mod cdrom crc_t10dif crct10dif_generic ata_generic pata_acpi vmwgfx drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ata_piix mptspi drm scsi_transport_spi crct10dif_pclmul crct10dif_common mptscsih crc32c_intel libata serio_raw mptbase vmxnet3
[  156.210184]  i2c_core floppy dm_mirror dm_region_hash dm_log dm_mod
[  156.210239] CPU: 25 PID: 2169 Comm: lnetctl Kdump: loaded Tainted: G           OE  ------------   3.10.0-862.14.4.el7.x86_64 #1
[  156.210289] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/30/2013
[  156.210335] task: ffff97c376ba0fd0 ti: ffff97c377424000 task.ti: ffff97c377424000
[  156.210368] RIP: 0010:[&amp;lt;ffffffffa53fae34&amp;gt;]  [&amp;lt;ffffffffa53fae34&amp;gt;] kmem_cache_alloc+0x74/0x1f0
[  156.210409] RSP: 0018:ffff97c377427d10  EFLAGS: 00010282
[  156.210433] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000004993
[  156.210465] RDX: 0000000000004992 RSI: 00000000000080d0 RDI: ffff97c0bfc03700
[  156.210495] RBP: ffff97c377427d40 R08: 000000000001bb00 R09: ffffffffa54217ec
[  156.210526] R10: 8080808080808080 R11: 0000000000000000 R12: 0000007a00000002
[  156.210557] R13: 00000000000080d0 R14: ffff97c0bfc03700 R15: ffff97c0bfc03700
[  156.210588] FS:  0000000000000000(0000) GS:ffff97c77fc40000(0000) knlGS:0000000000000000
[  156.210623] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  156.210649] CR2: 0000007a00000002 CR3: 00000008322c6000 CR4: 00000000000407e0
[  156.210728] Call Trace:
[  156.210756]  [&amp;lt;ffffffffa54217ec&amp;gt;] ? get_empty_filp+0x5c/0x1a0
[  156.210786]  [&amp;lt;ffffffffa54217ec&amp;gt;] get_empty_filp+0x5c/0x1a0
[  156.210817]  [&amp;lt;ffffffffa543019d&amp;gt;] path_openat+0x4d/0x640
[  156.210846]  [&amp;lt;ffffffffa53c8544&amp;gt;] ? handle_pte_fault+0x2f4/0xd10
[  156.211731]  [&amp;lt;ffffffffa5431dbd&amp;gt;] do_filp_open+0x4d/0xb0
[  156.212562]  [&amp;lt;ffffffffa53caefd&amp;gt;] ? handle_mm_fault+0x39d/0x9b0
[  156.213420]  [&amp;lt;ffffffffa543f167&amp;gt;] ? __alloc_fd+0x47/0x170
[  156.214258]  [&amp;lt;ffffffffa541e0d7&amp;gt;] do_sys_open+0x137/0x240
[  156.215070]  [&amp;lt;ffffffffa59256d5&amp;gt;] ? system_call_after_swapgs+0xa2/0x146
[  156.215865]  [&amp;lt;ffffffffa541e1fe&amp;gt;] SyS_open+0x1e/0x20
[  156.216661]  [&amp;lt;ffffffffa592579b&amp;gt;] system_call_fastpath+0x22/0x27
[  156.217457]  [&amp;lt;ffffffffa59256e1&amp;gt;] ? system_call_after_swapgs+0xae/0x146
[  156.218236] Code: 63 c1 5a 49 8b 50 08 4d 8b 20 49 8b 40 10 4d 85 e4 0f 84 28 01 00 00 48 85 c0 0f 84 1f 01 00 00 49 63 46 20 48 8d 4a 01 4d 8b 06 &amp;lt;49&amp;gt; 8b 1c 04 4c 89 e0 65 49 0f c7 08 0f 94 c0 84 c0 74 ba 49 63
[  156.220699] RIP  [&amp;lt;ffffffffa53fae34&amp;gt;] kmem_cache_alloc+0x74/0x1f0
[  156.221468]  RSP &amp;lt;ffff97c377427d10&amp;gt;
[  156.222214] CR2: 0000007a00000002
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The problem is in lustre_lnet_show_routing(). I verified this by applying the following patch:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c
index c503223..62d34bb 100644
--- a/lnet/utils/lnetctl.c
+++ b/lnet/utils/lnetctl.c
@@ -1550,13 +1550,6 @@ static int jt_export(int argc, char **argv)
                err_rc = NULL;
        }

-       rc = lustre_lnet_show_routing(-1, &amp;amp;show_rc, &amp;amp;err_rc, backup);
-       if (rc != LUSTRE_CFG_RC_NO_ERR) {
-               cYAML_print_tree2file(stderr, err_rc);
-               cYAML_free_tree(err_rc);
-               err_rc = NULL;
-       }
-
        rc = lustre_lnet_show_peer(NULL, 2, -1, &amp;amp;show_rc, &amp;amp;err_rc, backup);
        if (rc != LUSTRE_CFG_RC_NO_ERR) {
                cYAML_print_tree2file(stderr, err_rc);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;With that patch applied the node does not crash.&lt;/p&gt;

&lt;p&gt;I checked master, Lustre 2.12.0 and Lustre 2.11.0 and the problem exists in all those versions.&lt;/p&gt;</description>
                <environment></environment>
        <key id="55338">LU-12152</key>
            <summary>lnetctl export corrupts memory on routers</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="ashehata">Amir Shehata</assignee>
                                    <reporter username="hornc">Chris Horn</reporter>
                        <labels>
                    </labels>
                <created>Wed, 3 Apr 2019 03:08:57 +0000</created>
                <updated>Sat, 8 Jun 2019 12:20:05 +0000</updated>
                            <resolved>Tue, 30 Apr 2019 12:54:54 +0000</resolved>
                                    <version>Lustre 2.11.0</version>
                    <version>Lustre 2.12.0</version>
                    <version>Lustre 2.13.0</version>
                                    <fixVersion>Lustre 2.13.0</fixVersion>
                    <fixVersion>Lustre 2.12.3</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>2</watches>
                                                                            <comments>
                            <comment id="245145" author="hornc" created="Wed, 3 Apr 2019 03:54:37 +0000"  >&lt;p&gt;I&apos;m suspicious of this change from commit 40ae5dd2f2d0c6abe8e2f83ab9652873bee92485  &lt;a href=&quot;https://review.whamcloud.com/18469&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/18469&lt;/a&gt;&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c
index 1f3119415f..3ae2ba3100 100644
--- a/lnet/lnet/router.c
+++ b/lnet/lnet/router.c
@@ -551,7 +551,7 @@ lnet_destroy_routes (void)

 int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg)
 {
-	int i, rc = -ENOENT, lidx, j;
+	int i, rc = -ENOENT, j;

 	if (the_lnet.ln_rtrpools == NULL)
 		return rc;
@@ -560,20 +560,16 @@ int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg)
 		lnet_rtrbufpool_t *rbp;

 		lnet_net_lock(LNET_LOCK_EX);
-		lidx = idx;
 		cfs_percpt_for_each(rbp, j, the_lnet.ln_rtrpools) {
-			if (lidx-- == 0) {
-				rc = 0;
-				pool_cfg-&amp;gt;pl_pools[i].pl_npages =
-					rbp[i].rbp_npages;
-				pool_cfg-&amp;gt;pl_pools[i].pl_nbuffers =
-					rbp[i].rbp_nbuffers;
-				pool_cfg-&amp;gt;pl_pools[i].pl_credits =
-					rbp[i].rbp_credits;
-				pool_cfg-&amp;gt;pl_pools[i].pl_mincredits =
-					rbp[i].rbp_mincredits;
-				break;
-			}
+			if (i++ != idx)
+				continue;
+
+			pool_cfg-&amp;gt;pl_pools[i].pl_npages = rbp[i].rbp_npages;
+			pool_cfg-&amp;gt;pl_pools[i].pl_nbuffers = rbp[i].rbp_nbuffers;
+			pool_cfg-&amp;gt;pl_pools[i].pl_credits = rbp[i].rbp_credits;
+			pool_cfg-&amp;gt;pl_pools[i].pl_mincredits = rbp[i].rbp_mincredits;
+			rc = 0;
+			break;
 		}
 		lnet_net_unlock(LNET_LOCK_EX);
 	}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="245148" author="hornc" created="Wed, 3 Apr 2019 04:23:48 +0000"  >&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;+			if (i++ != idx)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;pretty sure that&apos;s causing an off-by-one&lt;/p&gt;</comment>
                            <comment id="245150" author="hornc" created="Wed, 3 Apr 2019 04:49:05 +0000"  >&lt;p&gt;So, some clearer naming would have made this bug a lot more obvious. Reading the code carefully I believe that the &quot;idx&quot; parameter to lnet_get_rtr_pool_cfg() is actually a cpt number. So what we actually want to do is make sure that &quot;j&quot; is equal to &quot;idx&quot; when we copy the buffer information&lt;/p&gt;</comment>
                            <comment id="245224" author="gerrit" created="Thu, 4 Apr 2019 02:43:05 +0000"  >&lt;p&gt;Chris Horn (hornc@cray.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/34591&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34591&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12152&quot; title=&quot;lnetctl export corrupts memory on routers&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12152&quot;&gt;&lt;del&gt;LU-12152&lt;/del&gt;&lt;/a&gt; lnet: Cleanup lnet_get_rtr_pool_cfg&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 51887ae69d8e59f2f4510b52c4e679b9b26e7165&lt;/p&gt;</comment>
                            <comment id="246481" author="gerrit" created="Tue, 30 Apr 2019 03:35:36 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/34591/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34591/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12152&quot; title=&quot;lnetctl export corrupts memory on routers&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12152&quot;&gt;&lt;del&gt;LU-12152&lt;/del&gt;&lt;/a&gt; lnet: Cleanup lnet_get_rtr_pool_cfg&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 187117fd94e4904c168de02fc439b41a1fcc3e48&lt;/p&gt;</comment>
                            <comment id="246514" author="pjones" created="Tue, 30 Apr 2019 12:54:54 +0000"  >&lt;p&gt;Landed for 2.13&lt;/p&gt;</comment>
                            <comment id="247470" author="gerrit" created="Tue, 21 May 2019 19:00:42 +0000"  >&lt;p&gt;Minh Diep (mdiep@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/34922&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34922&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12152&quot; title=&quot;lnetctl export corrupts memory on routers&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12152&quot;&gt;&lt;del&gt;LU-12152&lt;/del&gt;&lt;/a&gt; lnet: Cleanup lnet_get_rtr_pool_cfg&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 544877453fdf588de6e7c80a894cd541ccffd478&lt;/p&gt;</comment>
                            <comment id="248793" author="gerrit" created="Sat, 8 Jun 2019 02:36:51 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/34922/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34922/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12152&quot; title=&quot;lnetctl export corrupts memory on routers&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12152&quot;&gt;&lt;del&gt;LU-12152&lt;/del&gt;&lt;/a&gt; lnet: Cleanup lnet_get_rtr_pool_cfg&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: b5cbe49a16b68ad60a8e7293d1b5450e0f97a430&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00efb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>