<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:34:24 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-17313] sanity-lnet test_205/test_220: LNetError: (lib-md.c:281:lnet_assert_handler_unused()) ASSERTION( md-&gt;md_handler != handler )</title>
                <link>https://jira.whamcloud.com/browse/LU-17313</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Arshad &amp;lt;arshad.hussain@aeoncomputing.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/1432600f-ba6e-45a0-a998-c2b871760b9b&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/1432600f-ba6e-45a0-a998-c2b871760b9b&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;test_205 failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;trevis-33vm2 crashed during sanity-lnet test_205
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Test session details:&lt;br/&gt;
clients: &lt;a href=&quot;https://build.whamcloud.com/job/lustre-reviews/100460&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://build.whamcloud.com/job/lustre-reviews/100460&lt;/a&gt; - 4.18.0-477.27.1.el8_8.x86_64&lt;br/&gt;
servers: &lt;a href=&quot;https://build.whamcloud.com/job/lustre-reviews/100460&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://build.whamcloud.com/job/lustre-reviews/100460&lt;/a&gt; - 4.18.0-477.27.1.el8_lustre.x86_64&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Client Output:&lt;/b&gt;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;=================================== 16:17:04 \(1700756224\)^M
[21692.122539] Lustre: DEBUG MARKER: == sanity-lnet test 204: Check no health or resends for single-rail local failures ========================================================== 16:17:04 (1700756224)^M
[21692.403638] LNet: 1467339:0:(lib-ptl.c:956:lnet_clear_lazy_portal()) Active lazy portal 0 on exit^M
[21692.405821] LNet: Removed LNI 10.1.2.3@tcp^M
[21692.413980] Lustre: DEBUG MARKER: lsmod | grep lnet &amp;gt; /dev/null &amp;amp;&amp;amp;^M
[21692.413980] lctl dl | grep &apos; ST &apos; || true^M
[21692.511140] Key type .llcrypt unregistered^M
[21692.512020] Key type ._llcrypt unregistered^M
[21692.800163] Key type ._llcrypt registered^M
[21692.801024] Key type .llcrypt registered^M
[21692.832028] libcfs: HW NUMA nodes: 1, HW CPU cores: 2, npartitions: 2^M
[21692.836702] alg: No test for adler32 (adler32-zlib)^M
[21693.611267] Lustre: DEBUG MARKER: /usr/sbin/lnetctl lnet configure --all^M
[21693.617623] LNet: Added LNI 10.240.39.5@tcp [8/256/0/180]^M
[21693.618922] LNet: Accept all, port 7988^M
[21694.553419] Lustre: DEBUG MARKER: /usr/sbin/lnetctl discover 10.240.38.123@tcp^M
[21694.846378] Lustre: DEBUG MARKER: /usr/sbin/lnetctl ping 10.240.38.123@tcp^M
[21694.850290] LNet: There was an unexpected network error while writing to 10.240.38.123: rc = -22^M
[21694.896561] LNet: 1467564:0:(api-ni.c:358:recovery_interval_set()) &apos;lnet_recovery_interval&apos; has been deprecated^M
[21694.911244] Lustre: DEBUG MARKER: /usr/sbin/lnetctl peer set --health 1000 --all^M
[21694.917894] Lustre: DEBUG MARKER: /usr/sbin/lnetctl net set --health 1000 --all^M
[21694.971823] Lustre: DEBUG MARKER: /usr/sbin/lnetctl ping 10.240.38.123@tcp^M
[21695.052546] Lustre: DEBUG MARKER: /usr/sbin/lnetctl peer set --health 1000 --all^M
[21695.059301] Lustre: DEBUG MARKER: /usr/sbin/lnetctl net set --health 1000 --all^M
[21695.111848] Lustre: DEBUG MARKER: /usr/sbin/lnetctl ping 10.240.38.123@tcp^M
[21695.199325] Lustre: DEBUG MARKER: /usr/sbin/lnetctl peer set --health 1000 --all^M
[21695.206117] Lustre: DEBUG MARKER: /usr/sbin/lnetctl net set --health 1000 --all^M
[21695.258286] Lustre: DEBUG MARKER: /usr/sbin/lnetctl ping 10.240.38.123@tcp^M
[21695.382743] LNet: There was an unexpected network error while writing to 10.240.38.123: rc = -22^M
[21695.384355] LNet: Skipped 2 previous similar messages^M
[21695.428743] LNet: 1467750:0:(api-ni.c:358:recovery_interval_set()) &apos;lnet_recovery_interval&apos; has been deprecated^M
[21695.430652] LNet: 1467750:0:(api-ni.c:358:recovery_interval_set()) Skipped 2 previous similar messages^M
[21695.445043] Lustre: DEBUG MARKER: /usr/sbin/lnetctl peer set --health 1000 --all^M
[21695.451861] Lustre: DEBUG MARKER: /usr/sbin/lnetctl net set --health 1000 --all^M
[21695.506046] Lustre: DEBUG MARKER: /usr/sbin/lnetctl ping 10.240.38.123@tcp^M
[21695.607926] Lustre: DEBUG MARKER: /usr/sbin/lnetctl peer set --health 1000 --all^M
[21695.614630] Lustre: DEBUG MARKER: /usr/sbin/lnetctl net set --health 1000 --all^M
[21695.667799] Lustre: DEBUG MARKER: /usr/sbin/lnetctl ping 10.240.38.123@tcp^M
[21695.808409] Lustre: DEBUG MARKER: /usr/sbin/lnetctl peer set --health 1000 --all^M
[21695.815303] Lustre: DEBUG MARKER: /usr/sbin/lnetctl net set --health 1000 --all^M
[21695.828815] Lustre: DEBUG MARKER: lsmod | grep lnet &amp;gt; /dev/null &amp;amp;&amp;amp;^M
[21695.828815] lctl dl | grep &apos; ST &apos; || true^M
[21695.860860] LNet: 1467904:0:(lib-ptl.c:956:lnet_clear_lazy_portal()) Active lazy portal 0 on exit^M
[21695.924073] LNetError: 1467470:0:(socklnd.c:1261:ksocknal_create_conn()) Not creating conn 12345-10.240.38.123@tcp type 2: peer_ni/conn_cb removed^M
[21695.926432] LNetError: 11e-e: Unexpected error -116 connecting to 10.240.38.123@tcp at host 10.240.38.123:7988^M
[21696.874054] LNet: Removed LNI 10.240.39.5@tcp^M
[21696.938956] Key type .llcrypt unregistered^M
[21696.939795] Key type ._llcrypt unregistered^M
[21705.904334] Key type ._llcrypt registered^M
[21705.905266] Key type .llcrypt registered^M
[21706.693491] Key type .llcrypt unregistered^M
[21706.694372] Key type ._llcrypt unregistered^M
[21706.974304] Key type ._llcrypt registered^M
[21706.978361] Key type .llcrypt registered^M
[21707.011233] libcfs: HW NUMA nodes: 1, HW CPU cores: 2, npartitions: 2^M
[21707.015533] alg: No test for adler32 (adler32-zlib)^M
[21707.790659] Lustre: DEBUG MARKER: /usr/sbin/lnetctl lnet configure --all^M
[21707.797247] LNet: Added LNI 10.240.39.5@tcp [8/256/0/180]^M
[21707.798822] LNet: Accept all, port 7988^M
[21708.368775] Lustre: DEBUG MARKER: /usr/sbin/lnetctl discover 10.240.38.123@tcp^M
[21715.241659] Lustre: DEBUG MARKER: /usr/sbin/lnetctl lnet configure^M
[21715.248115] Lustre: DEBUG MARKER: /usr/sbin/lnetctl net add --net tcp1 --if eth0^M
[21715.252231] LNet: Added LNI 10.240.39.5@tcp1 [8/256/0/180]^M
[21715.441585] Lustre: DEBUG MARKER: /usr/sbin/lnetctl ping 10.240.38.123@tcp^M
[21715.445581] LNet: There was an unexpected network error while writing to 10.240.38.123: rc = -22^M&#160;
[21715.447305] LNet: 1 local NIs in recovery (showing 1): 10.240.39.5@tcp^M
[21715.637913] LNet: 1469390:0:(api-ni.c:358:recovery_interval_set()) &apos;lnet_recovery_interval&apos; has been deprecated^M
[21715.652679] Lustre: DEBUG MARKER: /usr/sbin/lnetctl peer set --health 1000 --all^M
[21715.659414] Lustre: DEBUG MARKER: /usr/sbin/lnetctl net set --health 1000 --all^M
[21715.729247] Lustre: DEBUG MARKER: /usr/sbin/lnetctl ping 10.240.38.123@tcp^M
[21715.949323] Lustre: DEBUG MARKER: /usr/sbin/lnetctl peer set --health 1000 --all^M
[21715.956120] Lustre: DEBUG MARKER: /usr/sbin/lnetctl net set --health 1000 --all^M
[21716.026111] Lustre: DEBUG MARKER: /usr/sbin/lnetctl ping 10.240.38.123@tcp^M
[21716.029989] LNet: There was an unexpected network error while writing to 10.240.38.123: rc = -22^M&#160;
[21716.031631] LNet: Skipped 5 previous similar messages^M
[21716.231601] LNet: 1469526:0:(api-ni.c:358:recovery_interval_set()) &apos;lnet_recovery_interval&apos; has been deprecated^M
[21716.233446] LNet: 1469526:0:(api-ni.c:358:recovery_interval_set()) Skipped 1 previous similar message^M
[21716.248327] Lustre: DEBUG MARKER: /usr/sbin/lnetctl peer set --health 1000 --all^M
[21716.255340] Lustre: DEBUG MARKER: /usr/sbin/lnetctl net set --health 1000 --all^M
[21716.326242] Lustre: DEBUG MARKER: /usr/sbin/lnetctl ping 10.240.38.123@tcp^M
[21716.628909] Lustre: DEBUG MARKER: /usr/sbin/lnetctl peer set --health 1000 --all^M
[21716.636214] Lustre: DEBUG MARKER: /usr/sbin/lnetctl net set --health 1000 --all^M
[21716.707624] Lustre: DEBUG MARKER: /usr/sbin/lnetctl ping 10.240.38.123@tcp^M
[21717.005150] Lustre: DEBUG MARKER: /usr/sbin/lnetctl peer set --health 1000 --all^M
21717.011880] Lustre: DEBUG MARKER: /usr/sbin/lnetctl net set --health 1000 --all^M
[21717.079281] Lustre: DEBUG MARKER: /usr/sbin/lnetctl ping 10.240.38.123@tcp^M
[21717.083320] LNet: There was an unexpected network error while writing to 10.240.38.123: rc = -22^M&#160;
[21717.084987] LNet: Skipped 9 previous similar messages^M
[21717.142283] Lustre: DEBUG MARKER: /usr/sbin/lnetctl peer set --health 1000 --all^M
[21717.149327] Lustre: DEBUG MARKER: /usr/sbin/lnetctl net set --health 1000 --all^M
[21720.087147] Lustre: DEBUG MARKER: lsmod | grep lnet &amp;gt; /dev/null &amp;amp;&amp;amp;^M
[21720.087147] lctl dl | grep &apos; ST &apos; || true^M
[21720.119727] LNetError: 1469074:0:(lib-md.c:281:lnet_assert_handler_unused()) ASSERTION( md-&amp;gt;md_handler != handler ) failed: ^M
[21720.121839] LNetError: 1469074:0:(lib-md.c:281:lnet_assert_handler_unused()) LBUG^M
[21720.123252] Pid: 1469074, comm: lnet_discovery 4.18.0-477.27.1.el8_8.x86_64 #1 SMP Thu Aug 31 10:29:22 EDT 2023^M
[21720.125123] Call Trace TBD:^M
[21720.125763] [&amp;lt;0&amp;gt;] libcfs_call_trace+0x63/0x90 [libcfs]^M
[21720.126772] [&amp;lt;0&amp;gt;] lbug_with_loc+0x3f/0x70 [libcfs]^M
[21720.127754] [&amp;lt;0&amp;gt;] lnet_assert_handler_unused+0xa0/0xd0 [lnet]^M
[21720.128903] [&amp;lt;0&amp;gt;] lnet_peer_discovery+0x14fb/0x1730 [lnet]^M
[21720.129985] [&amp;lt;0&amp;gt;] kthread+0x134/0x150^M
[21720.130761] [&amp;lt;0&amp;gt;] ret_from_fork+0x35/0x40^M
[21720.131608] Kernel panic - not syncing: LBUG^M
[21720.132444] CPU: 0 PID: 1469074 Comm: lnet_discovery Kdump: loaded Tainted: G &#160; &#160; &#160; &#160;W &#160;OE &#160; &#160;--------- - &#160;- 4.18.0-477.27.1.el8_8.x86_64 #1^M
[21720.134714] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011^M
[21720.134720] LNetError: 1469071:0:(lib-move.c:4933:lnet_parse()) 10.240.38.123@tcp, src 10.240.38.123@tcp: Dropping ACK (error -108 looking up sender)^M
[21720.135832] Call Trace:^M
[21720.135865] &#160;dump_stack+0x41/0x60^M
[21720.139358] &#160;panic+0xe7/0x2ac^M
[21720.139973] &#160;? ret_from_fork+0x35/0x40^M
[21720.140690] &#160;? lnet_discovery_event_reply+0xb00/0xb00 [lnet]^M
[21720.141763] &#160;lbug_with_loc.cold.8+0x18/0x18 [libcfs]^M
[21720.142713] &#160;lnet_assert_handler_unused+0xa0/0xd0 [lnet]^M
[21720.143717] &#160;lnet_peer_discovery+0x14fb/0x1730 [lnet]^M
[21720.144692] &#160;? finish_wait+0x80/0x80^M
[21720.145393] &#160;? lnet_peer_merge_data+0x1110/0x1110 [lnet]^M
[21720.146400] &#160;kthread+0x134/0x150^M
[21720.147027] &#160;? set_kthread_struct+0x50/0x50^M
[21720.147820] &#160;ret_from_fork+0x35/0x40^M
[21720.087147] lctl dl | grep &apos; ST &apos; || true^M
[21720.119727] LNetError: 1469074:0:(lib-md.c:281:lnet_assert_handler_unused()) ASSERTION( md-&amp;gt;md_handler != handler ) failed: ^M
[21720.121839] LNetError: 1469074:0:(lib-md.c:281:lnet_assert_handler_unused()) LBUG^M
[21720.123252] Pid: 1469074, comm: lnet_discovery 4.18.0-477.27.1.el8_8.x86_64 #1 SMP Thu Aug 31 10:29:22 EDT 2023^M
[21720.125123] Call Trace TBD:^M
[21720.125763] [&amp;lt;0&amp;gt;] libcfs_call_trace+0x63/0x90 [libcfs]^M
[21720.126772] [&amp;lt;0&amp;gt;] lbug_with_loc+0x3f/0x70 [libcfs]^M
[21720.127754] [&amp;lt;0&amp;gt;] lnet_assert_handler_unused+0xa0/0xd0 [lnet]^M
[21720.128903] [&amp;lt;0&amp;gt;] lnet_peer_discovery+0x14fb/0x1730 [lnet]^M
[21720.129985] [&amp;lt;0&amp;gt;] kthread+0x134/0x150^M
[21720.130761] [&amp;lt;0&amp;gt;] ret_from_fork+0x35/0x40^M
[21720.131608] Kernel panic - not syncing: LBUG^M
[21720.132444] CPU: 0 PID: 1469074 Comm: lnet_discovery Kdump: loaded Tainted: G &#160; &#160; &#160; &#160;W &#160;OE &#160; &#160;--------- - &#160;- 4.18.0-477.27.1.el8_8.x86_64 #1^M
[21720.134714] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011^M
[21720.134720] LNetError: 1469071:0:(lib-move.c:4933:lnet_parse()) 10.240.38.123@tcp, src 10.240.38.123@tcp: Dropping ACK (error -108 looking up sender)^M
[21720.135832] Call Trace:^M
[21720.135865] &#160;dump_stack+0x41/0x60^M
[21720.139358] &#160;panic+0xe7/0x2ac^M
[21720.139973] &#160;? ret_from_fork+0x35/0x40^M
[21720.140690] &#160;? lnet_discovery_event_reply+0xb00/0xb00 [lnet]^M
[21720.141763] &#160;lbug_with_loc.cold.8+0x18/0x18 [libcfs]^M
[21720.142713] &#160;lnet_assert_handler_unused+0xa0/0xd0 [lnet]^M
[21720.143717] &#160;lnet_peer_discovery+0x14fb/0x1730 [lnet]^M
[21720.144692] &#160;? finish_wait+0x80/0x80^M
[21720.145393] &#160;? lnet_peer_merge_data+0x1110/0x1110 [lnet]^M
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Sanity-lnet 204/205 run output&lt;/b&gt;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;== sanity-lnet test 204: Check no health or resends for single-rail local failures ========================================================== 16:17:04 (1700756224)
Cleaning up LNet
CMD: trevis-33vm2.trevis.whamcloud.com lsmod | grep lnet &amp;gt; /dev/null &amp;amp;&amp;amp;
lctl dl | grep &apos; ST &apos; || true
Writer error: failed to resolve Netlink family id
modules unloaded.
Loading modules from /usr/lib64/lustre
detected 2 online CPUs by sysfs
Force libcfs to create 2 CPU partitions
../libcfs/libcfs/libcfs options: &apos;cpu_npartitions=2&apos;
../lnet/lnet/lnet options: &apos;accept=all&apos;
/usr/sbin/lnetctl lnet configure --all
CMD: trevis-26vm4 /usr/sbin/lctl list_nids
/usr/sbin/lnetctl discover 10.240.38.123@tcp
discover:
- primary nid: 10.240.38.123@tcp
&#160; Multi-Rail: true
&#160; peer_ni:
&#160; - nid: 10.240.38.123@tcp
net:
- &#160; &#160; net type: lo
&#160; &#160; &#160; local NI(s):
&#160; &#160; &#160; - &#160; &#160; nid: 0@lo
&#160; &#160; &#160; &#160; &#160; &#160; status: up
- &#160; &#160; net type: tcp
&#160; &#160; &#160; local NI(s):
&#160; &#160; &#160; - &#160; &#160; nid: 10.240.39.5@tcp
&#160; &#160; &#160; &#160; &#160; &#160; status: up
&#160; &#160; &#160; &#160; &#160; &#160; interfaces:
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; 0: eth0
- &#160; &#160; primary nid: 10.240.38.123@tcp
&#160; &#160; &#160; - &#160; &#160; nid: 10.240.38.123@tcp
&#160; &#160; &#160; &#160; &#160; &#160; health stats:
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; health value: 1000
debug=+net
Simulate local_interrupt
Added drop rule 10.240.39.5@tcp-&amp;gt;10.240.38.123@tcp (1/1)
Added drop rule 10.240.39.5@tcp-&amp;gt;10.240.39.5@tcp (1/1)
/usr/sbin/lnetctl ping 10.240.38.123@tcp
manage:
&#160; &#160; - ping:
&#160; &#160; &#160; &#160; &#160; errno: -1
&#160; &#160; &#160; &#160; &#160; descr: failed to ping 10.240.38.123@tcp: Input/output error
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
Pre resends: 0
Post resends: 0
Resends delta: 0
Pre local health: 1000
Post local health: 1000
Pre remote health: 1000
Post remote health: 1000
/usr/sbin/lnetctl peer set --health 1000 --all
/usr/sbin/lnetctl net set --health 1000 --all
Removed 2 drop rules
Check that no resends took place
Check that local NI health is unchanged
Simulate local_dropped
Added drop rule 10.240.39.5@tcp-&amp;gt;10.240.38.123@tcp (1/1)
Added drop rule 10.240.39.5@tcp-&amp;gt;10.240.39.5@tcp (1/1)
/usr/sbin/lnetctl ping 10.240.38.123@tcp
manage:
&#160; &#160; - ping:
&#160; &#160; &#160; &#160; &#160; errno: -1
&#160; &#160; &#160; &#160; &#160; descr: failed to ping 10.240.38.123@tcp: Input/output error
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
Pre resends: 0
Post resends: 0
Resends delta: 0
Pre local health: 2000
Post local health: 2000
Pre remote health: 1000
Post remote health: 1000
/usr/sbin/lnetctl peer set --health 1000 --all
/usr/sbin/lnetctl net set --health 1000 --all
Removed 2 drop rules
Check that no resends took place
Check that local NI health is unchanged
Simulate local_aborted
Added drop rule 10.240.39.5@tcp-&amp;gt;10.240.38.123@tcp (1/1)
Added drop rule 10.240.39.5@tcp-&amp;gt;10.240.39.5@tcp (1/1)
/usr/sbin/lnetctl ping 10.240.38.123@tcp
manage:
&#160; &#160; - ping:
&#160; &#160; &#160; &#160; &#160; errno: -1
&#160; &#160; &#160; &#160; &#160; descr: failed to ping 10.240.38.123@tcp: Input/output error
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
Pre resends: 0
Post resends: 0
Resends delta: 0
Pre local health: 2000
Post local health: 2000
Pre remote health: 1000
Post remote health: 1000
/usr/sbin/lnetctl peer set --health 1000 --all
/usr/sbin/lnetctl net set --health 1000 --all
Removed 2 drop rules
Check that no resends took place
Check that local NI health is unchanged
Simulate local_no_route
Added drop rule 10.240.39.5@tcp-&amp;gt;10.240.38.123@tcp (1/1)
Added drop rule 10.240.39.5@tcp-&amp;gt;10.240.39.5@tcp (1/1)
/usr/sbin/lnetctl ping 10.240.38.123@tcp
manage:
&#160; &#160; - ping:
&#160; &#160; &#160; &#160; &#160; errno: -1
&#160; &#160; &#160; &#160; &#160; descr: failed to ping 10.240.38.123@tcp: Input/output error
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
Pre resends: 0
Post resends: 0
Resends delta: 0
Pre local health: 2000
Post local health: 2000
Pre remote health: 1000
Post remote health: 1000
/usr/sbin/lnetctl peer set --health 1000 --all
/usr/sbin/lnetctl net set --health 1000 --all
Removed 2 drop rules
Check that no resends took place
Check that local NI health is unchanged
Simulate local_timeout
Added drop rule 10.240.39.5@tcp-&amp;gt;10.240.38.123@tcp (1/1)
Added drop rule 10.240.39.5@tcp-&amp;gt;10.240.39.5@tcp (1/1)
/usr/sbin/lnetctl ping 10.240.38.123@tcp
manage:
&#160; &#160; - ping:
&#160; &#160; &#160; &#160; &#160; errno: -1
&#160; &#160; &#160; &#160; &#160; descr: failed to ping 10.240.38.123@tcp: Input/output error
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
Pre resends: 0
Post resends: 0
Resends delta: 0
Pre local health: 2000
Post local health: 2000
Pre remote health: 1000
Post remote health: 1000
/usr/sbin/lnetctl peer set --health 1000 --all
/usr/sbin/lnetctl net set --health 1000 --all
Removed 2 drop rules
Check that no resends took place
Check that local NI health is unchanged
Simulate local_error
Added drop rule 10.240.39.5@tcp-&amp;gt;10.240.38.123@tcp (1/1)
Added drop rule 10.240.39.5@tcp-&amp;gt;10.240.39.5@tcp (1/1)
/usr/sbin/lnetctl ping 10.240.38.123@tcp
manage:
&#160; &#160; - ping:
&#160; &#160; &#160; &#160; &#160; errno: -1
&#160; &#160; &#160; &#160; &#160; descr: failed to ping 10.240.38.123@tcp: Input/output error
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
Pre resends: 0
Post resends: 0
Resends delta: 0
Pre local health: 2000
Post local health: 2000
Pre remote health: 1000
Post remote health: 1000
/usr/sbin/lnetctl peer set --health 1000 --all
/usr/sbin/lnetctl net set --health 1000 --all
Removed 2 drop rules
Check that no resends took place
Check that local NI health is unchanged
CMD: trevis-33vm2.trevis.whamcloud.com lsmod | grep lnet &amp;gt; /dev/null &amp;amp;&amp;amp;
lctl dl | grep &apos; ST &apos; || true
Writer error: failed to resolve Netlink family id
modules unloaded.
pdsh@trevis-33vm2: trevis-33vm2: ssh exited with exit code 2
pdsh@trevis-33vm2: trevis-33vm2: ssh exited with exit code 2
PASS 204 (10s)
CMD: trevis-26vm4 sysctl -wq kernel/kptr_restrict=1
CMD: trevis-33vm2.trevis.whamcloud.com sysctl -wq kernel/kptr_restrict=1
CMD: trevis-33vm3 sysctl -wq kernel/kptr_restrict=1
CMD: trevis-67vm7 sysctl -wq kernel/kptr_restrict=1
CMD: trevis-79vm7 sysctl -wq kernel/kptr_restrict=1
CMD: trevis-26vm4 sysctl --values kernel/kptr_restrict
CMD: trevis-26vm4 sysctl -wq kernel/kptr_restrict=1
CMD: trevis-33vm2.trevis.whamcloud.com sysctl --values kernel/kptr_restrict
CMD: trevis-33vm2.trevis.whamcloud.com sysctl -wq kernel/kptr_restrict=1
CMD: trevis-33vm3 sysctl --values kernel/kptr_restrict
CMD: trevis-33vm3 sysctl -wq kernel/kptr_restrict=1
CMD: trevis-67vm7 sysctl --values kernel/kptr_restrict
CMD: trevis-67vm7 sysctl -wq kernel/kptr_restrict=1
CMD: trevis-79vm7 sysctl --values kernel/kptr_restrict
CMD: trevis-79vm7 sysctl -wq kernel/kptr_restrict=1
== sanity-lnet test 205: Check health and resends for multi-rail local failures ========================================================== 16:17:18 (1700756238)
Cleaning up LNet
CMD: trevis-33vm2.trevis.whamcloud.com lsmod | grep lnet &amp;gt; /dev/null &amp;amp;&amp;amp;
lctl dl | grep &apos; ST &apos; || true
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;Nothing alarming/useful under MDS/OST output&lt;/p&gt;

&lt;p&gt;VVVVVVV DO NOT REMOVE LINES BELOW, Added by Maloo for auto-association VVVVVVV&lt;br/&gt;
sanity-lnet test_205 - trevis-33vm2 crashed during sanity-lnet test_205&lt;/p&gt;</description>
                <environment></environment>
        <key id="79154">LU-17313</key>
            <summary>sanity-lnet test_205/test_220: LNetError: (lib-md.c:281:lnet_assert_handler_unused()) ASSERTION( md-&gt;md_handler != handler )</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Fri, 24 Nov 2023 09:09:20 +0000</created>
                <updated>Tue, 6 Feb 2024 06:50:04 +0000</updated>
                                                                                <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="398691" author="adilger" created="Fri, 5 Jan 2024 17:48:17 +0000"  >&lt;p&gt;+1 on master: &lt;a href=&quot;https://testing.whamcloud.com/sub_tests/121f699a-8b7c-40dc-be4e-99f534b874b4&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/sub_tests/121f699a-8b7c-40dc-be4e-99f534b874b4&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="399794" author="arshad512" created="Tue, 16 Jan 2024 05:08:55 +0000"  >&lt;p&gt;+1 on (master) &lt;a href=&quot;https://testing.whamcloud.com/test_sets/3f6810cc-7c8e-4ee4-9397-e09bad232d54&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/3f6810cc-7c8e-4ee4-9397-e09bad232d54&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Client:&lt;/b&gt;&lt;br/&gt;
Lustre Version:2.15.60.13&lt;br/&gt;
Kernel version:5.14.0-284.30.1.el9_2.x86_64&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Server:&lt;/b&gt;&lt;br/&gt;
Lustre Version:2.15.60.13&lt;br/&gt;
Kernel version:4.18.0-477.27.1.el8_lustre.x86_64&lt;/p&gt;

&lt;p&gt;This time it is seen under sanity-lnet/test_220 (panic under client)&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[21725.920801] Lustre: DEBUG MARKER: == sanity-lnet test 220: Add routes w/&lt;span class=&quot;code-keyword&quot;&gt;default&lt;/span&gt; options - check aliveness ========================================================== 16:47:56 (1705337276)
[21726.373169] Lustre: DEBUG MARKER: /usr/sbin/lustre_rmmod
[21726.999708] Key type lgssc unregistered
[21727.051580] LNetError: 6094:0:(lib-md.c:281:lnet_assert_handler_unused()) ASSERTION( md-&amp;gt;md_handler != handler ) failed:&#160;
[21727.053408] LNetError: 6094:0:(lib-md.c:281:lnet_assert_handler_unused()) LBUG
[21727.054422] Pid: 6094, comm: lnet_discovery 5.14.0-284.30.1.el9_2.x86_64 #1 SMP PREEMPT_DYNAMIC Fri Aug 25 09:13:12 EDT 2023
[21727.055793] Call Trace TBD:
[21727.056218] Kernel panic - not syncing: LBUG
[21727.056813] CPU: 1 PID: 6094 Comm: lnet_discovery Kdump: loaded Tainted: G &#160; &#160; &#160; &#160; &#160; OE &#160; &#160;-------- &#160;--- &#160;5.14.0-284.30.1.el9_2.x86_64 #1
[21727.058258] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[21727.059002] Call Trace:
[21727.059418] &#160;&amp;lt;TASK&amp;gt;
[21727.059756] &#160;dump_stack_lvl+0x34/0x48
[21727.060365] &#160;panic+0xf4/0x2c6
[21727.060810] &#160;? lnet_discovery_event_reply+0xbc0/0xbc0 [lnet]
[21727.062285] &#160;lbug_with_loc.cold+0x18/0x18 [libcfs]
[21727.063335] &#160;lnet_assert_handler_unused+0x9c/0xd0 [lnet]
[21727.064169] &#160;lnet_peer_discovery+0x997/0xaf0 [lnet]
[21727.064850] &#160;? cpuacct_percpu_seq_show+0x10/0x10
[21727.065492] &#160;? lnet_peer_data_present+0x580/0x580 [lnet]
[21727.066187] &#160;kthread+0xd9/0x100
[21727.066651] &#160;? kthread_complete_and_exit+0x20/0x20
[21727.067276] &#160;ret_from_fork+0x22/0x30
[21727.067792] &#160;&amp;lt;/TASK&amp;gt;&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="402277" author="adilger" created="Thu, 1 Feb 2024 18:45:22 +0000"  >&lt;p&gt;+9 crashes of sanity-LNet test_220 this week:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/search?status%5B%5D=CRASH&amp;amp;test_set_script_id=a2b1c4b2-b449-11e9-b88c-52540065bddc&amp;amp;sub_test_script_id=03c13a24-99ad-4246-9749-8279e6e898a7&amp;amp;start_date=2024-01-26&amp;amp;end_date=2024-02-01&amp;amp;source=sub_tests#redirect&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/search?status%5B%5D=CRASH&amp;amp;test_set_script_id=a2b1c4b2-b449-11e9-b88c-52540065bddc&amp;amp;sub_test_script_id=03c13a24-99ad-4246-9749-8279e6e898a7&amp;amp;start_date=2024-01-26&amp;amp;end_date=2024-02-01&amp;amp;source=sub_tests#redirect&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="402796" author="arshad512" created="Tue, 6 Feb 2024 06:32:53 +0000"  >&lt;p&gt;+1 on Master(for aarch64)&#160; &lt;a href=&quot;https://testing.whamcloud.com/test_sessions/ab07dba8-8b72-4468-9c0b-65182aafc48a&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/ab07dba8-8b72-4468-9c0b-65182aafc48a&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;On MDS&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
[25868.167715] Lustre: DEBUG MARKER: == sanity-lnet test 220: Add routes w/&lt;span class=&quot;code-keyword&quot;&gt;default&lt;/span&gt; options - check aliveness ========================================================== 21:21:22 (1707168082)
[25868.709079] Lustre: DEBUG MARKER: /usr/sbin/lustre_rmmod
[25869.289481] Key type lgssc unregistered
[25869.351068] LNetError: 11254:0:(lib-md.c:281:lnet_assert_handler_unused()) ASSERTION( md-&amp;gt;md_handler != handler ) failed:@
[25869.352915] LNetError: 11254:0:(lib-md.c:281:lnet_assert_handler_unused()) LBUG&#160;
[25869.354122] CPU: 0 PID: 11254 Comm: lnet_discovery Kdump: loaded Tainted: G &#160; &#160; &#160; &#160;W &#160;OE &#160; &#160;--------- - &#160;- 4.18.0-477.27.1.el8_lustre.x86_64 #1
[25869.356169] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[25869.357115] Call Trace:
[25869.357602] &#160;? lnet_discovery_event_reply+0xb00/0xb00 [lnet]
[25869.358667] &#160;dump_stack+0x41/0x60
[25869.359294] &#160;lbug_with_loc.cold.8+0x5/0x43 [libcfs]
[25869.360176] &#160;lnet_assert_handler_unused+0xa0/0xd0 [lnet]
[25869.361101] &#160;lnet_peer_discovery+0x13e6/0x1620 [lnet]
[25869.361978] &#160;? finish_wait+0x80/0x80
[25869.362617] &#160;? lnet_peer_merge_data+0x1110/0x1110 [lnet]
[25869.363522] &#160;kthread+0x134/0x150
[25869.364110] &#160;? set_kthread_struct+0x50/0x50
[25869.364827] &#160;ret_from_fork+0x35/0x40
[25869.365473] Kernel panic - not syncing: LBUG&#160;
[25869.366204] CPU: 0 PID: 11254 Comm: lnet_discovery Kdump: loaded Tainted: G &#160; &#160; &#160; &#160;W &#160;OE &#160; &#160;--------- - &#160;- 4.18.0-477.27.1.el8_lustre.x86_64 #1
[25869.368229] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[25869.369173] Call Trace:
[25869.369619] &#160;dump_stack+0x41/0x60
[25869.370194] &#160;panic+0xe7/0x2ac
[25869.370741] &#160;? __switch_to_asm+0x51/0x80
[25869.371412] &#160;? __switch_to_asm+0x51/0x80
[25869.372087] &#160;? lnet_discovery_event_reply+0xb00/0xb00 [lnet]
[25869.373044] &#160;lbug_with_loc.cold.8+0x1a/0x43 [libcfs]
[25869.373882] &#160;lnet_assert_handler_unused+0xa0/0xd0 [lnet]
[25869.374787] &#160;lnet_peer_discovery+0x13e6/0x1620 [lnet]
[25869.375658] &#160;? finish_wait+0x80/0x80
[25869.376281] &#160;? lnet_peer_merge_data+0x1110/0x1110 [lnet]
[25869.377185] &#160;kthread+0x134/0x150
[25869.377750] &#160;? set_kthread_struct+0x50/0x50
[25869.378457] &#160;ret_from_fork+0x35/0x40&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="64974">LU-14810</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i042ov:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>