<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:32:05 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-10103] LBUG: lib-move.c:2121:lnet_send()) ASSERTION( msg-&gt;msg_txpeer == ((void *)0) ) failed</title>
                <link>https://jira.whamcloud.com/browse/LU-10103</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Testing  &lt;a href=&quot;https://review.whamcloud.com/29341.(Revert&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/29341.(Revert&lt;/a&gt; patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9810&quot; title=&quot;Melanox OFED 4.1 support&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9810&quot;&gt;&lt;del&gt;LU-9810&lt;/del&gt;&lt;/a&gt; to determine if preferring&lt;br/&gt;
Fast Reg breaks mounting targets.) &lt;br/&gt;
System mounts fine (&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10068&quot; title=&quot;OST fails to mount:LustreError: 14558:0:(pack_generic.c:588:__lustre_unpack_msg()) message length 0 too small for magic/version check&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10068&quot;&gt;&lt;del&gt;LU-10068&lt;/del&gt;&lt;/a&gt;) - but after a few hours, routers have LBUG:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;Oct  5 16:25:31 soak-14 kernel: LNet: 2153:0:(o2iblnd_modparams.c:253:kiblnd_tunables_setup()) Invalid map_on_demand (0), expects 1 - 256. Using &lt;span class=&quot;code-keyword&quot;&gt;default&lt;/span&gt; of 256
Oct  5 16:25:31 soak-14 kernel: LNet: Using FMR &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; registration
Oct  5 16:25:31 soak-14 kernel: LNetError: 4:0:(o2iblnd_cb.c:2304:kiblnd_passive_connect()) Can&apos;t accept conn from 192.168.1.121@o2ib on NA (ib1:0:192.168.1.114): bad dst nid 192.168.1.114@o2ib
Oct  5 16:25:31 soak-14 kernel: LNet: Added LNI 192.168.1.114@o2ib [8/256/0/180]
Oct  5 16:25:31 soak-14 kernel: LNet: Added LNI 172.16.1.14@o2ib1 [128/2048/0/180]
Oct  5 16:25:31 soak-14 sshd[2130]: Received disconnect from 10.10.1.116 port 38944:11: disconnected by user
Oct  5 16:25:31 soak-14 sshd[2130]: Disconnected from 10.10.1.116 port 38944
Oct  5 16:25:31 soak-14 sshd[2130]: pam_unix(sshd:session): session closed &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; user root
Oct  5 16:25:31 soak-14 systemd-logind: Removed session 4.
Oct  5 16:25:31 soak-14 systemd: Removed slice User Slice of root.
Oct  5 16:25:31 soak-14 systemd: Stopping User Slice of root.
Oct  5 16:37:04 soak-14 kernel: LNetError: 1979:0:(lib-move.c:2121:lnet_send()) ASSERTION( msg-&amp;gt;msg_txpeer == ((void *)0) ) failed:
Oct  5 16:37:04 soak-14 kernel: LNetError: 1979:0:(lib-move.c:2121:lnet_send()) LBUG
Oct  5 16:37:04 soak-14 kernel: Pid: 1979, comm: lnet_discovery
Oct  5 16:37:05 soak-14 kernel: #012Call Trace:
Oct  5 16:37:05 soak-14 kernel: [&amp;lt;ffffffffc09ec7ae&amp;gt;] libcfs_call_trace+0x4e/0x60 [libcfs]
Oct  5 16:37:05 soak-14 kernel: [&amp;lt;ffffffffc09ec83c&amp;gt;] lbug_with_loc+0x4c/0xb0 [libcfs]
Oct  5 16:37:05 soak-14 kernel: [&amp;lt;ffffffffc0a7179e&amp;gt;] lnet_send+0x17e/0x180 [lnet]
Oct  5 16:37:05 soak-14 kernel: [&amp;lt;ffffffffc0a80ef8&amp;gt;] lnet_peer_discovery_complete+0x178/0x320 [lnet]
Oct  5 16:37:05 soak-14 kernel: [&amp;lt;ffffffffc0a868a8&amp;gt;] lnet_peer_discovery+0x588/0x1030 [lnet]
Oct  5 16:37:05 soak-14 kernel: [&amp;lt;ffffffff810b1910&amp;gt;] ? autoremove_wake_function+0x0/0x40
Oct  5 16:37:05 soak-14 kernel: [&amp;lt;ffffffffc0a86320&amp;gt;] ? lnet_peer_discovery+0x0/0x1030 [lnet]
Oct  5 16:37:05 soak-14 kernel: [&amp;lt;ffffffff810b098f&amp;gt;] kthread+0xcf/0xe0
Oct  5 16:37:05 soak-14 kernel: [&amp;lt;ffffffff810b08c0&amp;gt;] ? kthread+0x0/0xe0
Oct  5 16:37:05 soak-14 kernel: [&amp;lt;ffffffff816b4f58&amp;gt;] ret_from_fork+0x58/0x90
Oct  5 16:37:05 soak-14 kernel: [&amp;lt;ffffffff810b08c0&amp;gt;] ? kthread+0x0/0xe0
Oct  5 16:37:05 soak-14 kernel:
Oct  5 16:37:05 soak-14 kernel: Kernel panic - not syncing: LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>Soak test cluster</environment>
        <key id="48647">LU-10103</key>
            <summary>LBUG: lib-move.c:2121:lnet_send()) ASSERTION( msg-&gt;msg_txpeer == ((void *)0) ) failed</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="ashehata">Amir Shehata</assignee>
                                    <reporter username="cliffw">Cliff White</reporter>
                        <labels>
                            <label>soak</label>
                    </labels>
                <created>Fri, 6 Oct 2017 23:41:30 +0000</created>
                <updated>Sat, 23 Mar 2019 12:53:05 +0000</updated>
                                            <version>Lustre 2.10.2</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="236262" author="gerrit" created="Fri, 2 Nov 2018 21:15:33 +0000"  >&lt;p&gt;Amir Shehata (ashehata@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/33561&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/33561&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10103&quot; title=&quot;LBUG: lib-move.c:2121:lnet_send()) ASSERTION( msg-&amp;gt;msg_txpeer == ((void *)0) ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10103&quot;&gt;LU-10103&lt;/a&gt; lnet: ensure txpeer = NULL when sending&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 746d4c0c11a831acf7f32f7b445ac44b44237597&lt;/p&gt;</comment>
                            <comment id="244573" author="sthiell" created="Sat, 23 Mar 2019 01:28:48 +0000"  >&lt;p&gt;Same issue with 2.12.0 +&#160;patch &quot;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12065&quot; title=&quot;Client got evicted when  lock callback timer expired  on OSS &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12065&quot;&gt;&lt;del&gt;LU-12065&lt;/del&gt;&lt;/a&gt; lnd: increase CQ entries&quot;&lt;/p&gt;

&lt;p&gt;This happened only on one of our 12 LNet routers that we upgraded in a rolling update fashion today to include patch from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12065&quot; title=&quot;Client got evicted when  lock callback timer expired  on OSS &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12065&quot;&gt;&lt;del&gt;LU-12065&lt;/del&gt;&lt;/a&gt;. No big deal I guess. And looks like a patch is ready but hasn&apos;t landed yet.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: ------------[ cut here ]------------     
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: WARNING: CPU: 4 PID: 87771 at lib/list_debug.c:62 __list_del_entry+0x82/0xd0
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: list_del corruption. next-&amp;gt;prev should be ffff8fa9f6c59c10, but was ffff8fa9f5a1a1a0
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: Modules linked in: ko2iblnd(OE) lnet(OE) libcfs(OE) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) dell_rbu sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper mxm_wmi iTCO_wdt iTCO_vendor_support cryptd dcdbas cdc_ether usbnet mii mgag200 i2c_algo_bit ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm drm_panel_orientation_quirks pcspkr sg ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_power_meter mei_me mei lpc_ich sunrpc ip_tables xfs libcrc32c mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic crct10dif_pclmul crct10dif_common crc32c_intel
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: ahci mlx4_core(OE) libahci tg3 mlx_compat(OE) megaraid_sas ptp libata devlink pps_core
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: CPU: 4 PID: 87771 Comm: lnet_discovery Kdump: loaded Tainted: G           OE  ------------   3.10.0-957.10.1.el7.x86_64 #1
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: Hardware name: Dell Inc. PowerEdge R630/0CNCJW, BIOS 2.8.0 005/17/2018
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: Call Trace:                              
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffff84b62e41&amp;gt;] dump_stack+0x19/0x1b
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffff84497688&amp;gt;] __warn+0xd8/0x100   
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffff8449770f&amp;gt;] warn_slowpath_fmt+0x5f/0x80
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffffc08bc0e4&amp;gt;] ? lnet_ni_send+0x44/0xd0 [lnet]
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffff84795112&amp;gt;] __list_del_entry+0x82/0xd0
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffffc08d5352&amp;gt;] lnet_peer_discovery_complete+0x1a2/0x340 [lnet]
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffffc08da0a0&amp;gt;] lnet_peer_discovery+0x6c0/0x1150 [lnet]
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffff844c2d40&amp;gt;] ? wake_up_atomic_t+0x30/0x30
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffffc08d99e0&amp;gt;] ? lnet_peer_merge_data+0xde0/0xde0 [lnet]
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffff844c1c71&amp;gt;] kthread+0xd1/0xe0   
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffff844c1ba0&amp;gt;] ? insert_kthread_work+0x40/0x40
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffff84b75c37&amp;gt;] ret_from_fork_nospec_begin+0x21/0x21
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffff844c1ba0&amp;gt;] ? insert_kthread_work+0x40/0x40
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: ---[ end trace d6bf07925ff146d5 ]---     
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: LNetError: 87771:0:(lib-move.c:2645:lnet_send()) ASSERTION( msg-&amp;gt;msg_txpeer == ((void *)0) ) failed: 
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: LNetError: 87771:0:(lib-move.c:2645:lnet_send()) LBUG
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: Pid: 87771, comm: lnet_discovery 3.10.0-957.10.1.el7.x86_64 #1 SMP Mon Mar 18 15:06:45 UTC 2019
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: Call Trace:                              
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffffc08217cc&amp;gt;] libcfs_call_trace+0x8c/0xc0 [libcfs]
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffffc082187c&amp;gt;] lbug_with_loc+0x4c/0xa0 [libcfs]
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffffc08c3ec8&amp;gt;] lnet_send+0x1b8/0x1c0 [lnet]
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffffc08d5328&amp;gt;] lnet_peer_discovery_complete+0x178/0x340 [lnet]
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffffc08da0a0&amp;gt;] lnet_peer_discovery+0x6c0/0x1150 [lnet]
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffff844c1c71&amp;gt;] kthread+0xd1/0xe0   
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffff84b75c37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
Mar 22 17:29:15 sh-rtr-oak-1-1 kernel: [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff  
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzljj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>