<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:20:15 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8752] mlx5_warn:mlx5_0:dump_cqe:257:</title>
                <link>https://jira.whamcloud.com/browse/LU-8752</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Running lnet selftest on a mlx5 card we get these errors.&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[1477328975.069684] mlx5_warn:mlx5_0:dump_cqe:257:(pid 10912): dump error cqe
[1477328975.085684] mlx5_warn:mlx5_0:dump_cqe:257:(pid 10906): dump error cqe
[1477328975.085684] 00000000 00000000 00000000 00000000
[1477328975.085684] 00000000 00000000 00000000 00000000
[1477328975.085684] 00000000 00000000 00000000 00000000
[1477328975.085684] 00000000 08007806 2500002f 00085dd0
[1477328975.085684] LustreError: 11028:0:(brw_test.c:388:brw_bulk_ready()) BRW bulk READ failed &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; RPC from 12345-10.151.27.25@o2ib: -5
[1477328975.085684] LustreError: 11028:0:(brw_test.c:362:brw_server_rpc_done()) Bulk transfer to 12345-10.151.27.25@o2ib has failed: -5
[1477328975.093683] mlx5_warn:mlx5_0:dump_cqe:257:(pid 10922): dump error cqe
[1477328975.093683] 00000000 00000000 00000000 00000000
[1477328975.093683] 00000000 00000000 00000000 00000000
[1477328975.093683] 00000000 00000000 00000000 00000000
[1477328975.093683] 00000000 08007806 25000030 000842d0
[1477328975.105683] mlx5_warn:mlx5_0:dump_cqe:257:(pid 10915): dump error cqe
[1477328975.105683] 00000000 00000000 00000000 00000000
[1477328975.105683] 00000000 00000000 00000000 00000000
[1477328975.105683] 00000000 00000000 00000000 00000000
[1477328975.105683] 00000000 08007806 25000031 000843d0
[1477328975.113683] mlx5_warn:mlx5_0:dump_cqe:257:(pid 10900): dump error cqe
[1477328975.113683] 00000000 00000000 00000000 00000000
[1477328975.113683] 00000000 00000000 00000000 00000000
[1477328975.113683] 00000000 00000000 00000000 00000000
[1477328975.113683] 00000000 08007806 25000032 000840d0
[1477328975.121683] mlx5_warn:mlx5_0:dump_cqe:257:(pid 10900): dump error cqe
[1477328975.121683] 00000000 00000000 00000000 00000000
[1477328975.121683] 00000000 00000000 00000000 00000000
[1477328975.121683] 00000000 00000000 00000000 00000000
[1477328975.121683] 00000000 08007806 25000033 000841d0
[1477328975.129683] mlx5_warn:mlx5_0:dump_cqe:257:(pid 10915): dump error cqe
[1477328975.129683] 00000000 00000000 00000000 00000000
[1477328975.129683] 00000000 00000000 00000000 00000000
[1477328975.129683] 00000000 00000000 00000000 00000000
[1477328975.129683] 00000000 08007806 2500002e 00085cd0
[1477328975.133683] mlx5_warn:mlx5_0:dump_cqe:257:(pid 10907): dump error cqe
[1477328975.133683] 00000000 00000000 00000000 00000000
[1477328975.133683] 00000000 00000000 00000000 00000000
[1477328975.133683] 00000000 00000000 00000000 00000000
[1477328975.133683] 00000000 08007806 25000034 000846d0
[1477328975.205682] 00000000 00000000 00000000 00000000
[1477328975.281682] 00000000 00000000 00000000 00000000
[1477328975.305681] 00000000 00000000 00000000 00000000
[1477328975.305681] 00000000 08007806 2500002d 000b57d0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
</description>
                <environment>mlnx ofed3.2&lt;br/&gt;
lustre-2.7.2-2nas-fe&lt;br/&gt;
Linux elrtr1 3.0.101-77.1.20160630-nasa #1 SMP Thu Jun 30 00:56:32 UTC 2016 (a082ea6) x86_64 x86_64 x86_64 GNU/Linux&lt;br/&gt;
</environment>
        <key id="40985">LU-8752</key>
            <summary>mlx5_warn:mlx5_0:dump_cqe:257:</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="doug">Doug Oucharek</assignee>
                                    <reporter username="mhanafi">Mahmoud Hanafi</reporter>
                        <labels>
                    </labels>
                <created>Mon, 24 Oct 2016 17:17:52 +0000</created>
                <updated>Thu, 18 Jan 2018 17:13:46 +0000</updated>
                            <resolved>Wed, 18 Jan 2017 19:12:31 +0000</resolved>
                                    <version>Lustre 2.7.0</version>
                                    <fixVersion>Lustre 2.10.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>15</watches>
                                                                            <comments>
                            <comment id="170783" author="simmonsja" created="Mon, 24 Oct 2016 17:39:29 +0000"  >&lt;p&gt;Do you have map_on_demand enabled?  We saw this with one of our systems as well and when we disabled map_on_demand it went away. This issue only shows up on specific systems. If you look at your logs you will see IB_BIND_ERRORS (something along that line). See ticket &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8693&quot; title=&quot;ko2iblnd recieving IB_WC_MW_BIND_ERR errors.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8693&quot;&gt;&lt;del&gt;LU-8693&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="170796" author="pjones" created="Mon, 24 Oct 2016 18:12:15 +0000"  >&lt;p&gt;Doug&lt;/p&gt;

&lt;p&gt;Anything to add here?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="170799" author="doug" created="Mon, 24 Oct 2016 18:29:00 +0000"  >&lt;p&gt;Mahmoud: can you confirm if IB_BIND_ERRORS are present in your logs?&lt;/p&gt;</comment>
                            <comment id="170820" author="mhanafi" created="Mon, 24 Oct 2016 21:05:41 +0000"  >&lt;p&gt;I don&apos;t see IB_BIND_ERRORS. But we are using map_on_demand. Our cluster wide setting is &lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;options ko2iblnd timeout=150 retry_count=7 map_on_demand=32 peer_credits=63 concurrent_sends=63
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Can we keep the peer_credits and concurrent_sends at 63 with map_on_demand disabled?&lt;/p&gt;</comment>
                            <comment id="170825" author="simmonsja" created="Mon, 24 Oct 2016 21:34:39 +0000"  >&lt;p&gt;Do you see the following in your logs:&lt;/p&gt;

&lt;p&gt;[ 170.597651] LNet: 8714:0:(o2iblnd_cb.c:3433:kiblnd_complete()) FastReg failed: 6&lt;br/&gt;
[ 170.597728] LNet: 8713:0:(o2iblnd_cb.c:3444:kiblnd_complete()) RDMA (tx: c000003c6a78c5a8) failed: 5&lt;/p&gt;

&lt;p&gt;That error code 6 you see is the ib_wc_status which is IB_WC_MW_BIND_ERR.&lt;/p&gt;

&lt;p&gt;I found in my testing you only need map_on_demand if you have hardware using the mlx4 driver talking to&lt;br/&gt;
hardware using the mlx5 driver. If you mlx5 to mlx5 communications then map_on_demand is not needed.&lt;br/&gt;
As for keeping peer_credits and concurrent_sends settings that depends on the setup.I found in my testing &lt;br/&gt;
with one Power8 client cluster I had to reduce the concurrent_sends to 31 to reduce the queue pair depth&lt;br/&gt;
usage. At concurrent_send set to 64 the pair depth was 64K in size with that max in hardware 32K. You have&lt;br/&gt;
to see for your hardware if this is the case. Its all tinkering.&lt;/p&gt;</comment>
                            <comment id="170832" author="mhanafi" created="Mon, 24 Oct 2016 22:01:07 +0000"  >&lt;p&gt;If I set map_on_demand=0 then I get this error&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;elrtr1 login: [1477346024.576963] LNet: 81:0:(o2iblnd_cb.c:2367:kiblnd_passive_connect()) Can&apos;t accept conn from 10.151.27.22@o2ib (version 12): max_frags 32 incompatible without FMR pool (256 wanted)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;if I set map_on_demand=256 on the mlnx5 host I get &lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;#mlx4 config
options ko2iblnd timeout=150 retry_count=7 peer_timeout=0 map_on_demand=32 peer_credits=63 concurrent_sends=63
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;#mlx5 config
options ko2iblnd timeout=150 retry_count=7 map_on_demand=256  peer_credits=63 concurrent_sends=63
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[1477346171.287627] mlx5_warn:mlx5_0:calc_sq_size:516:(pid 81): wqe count(65536) exceeds limits(32768)
[1477346171.315626] mlx5_warn:mlx5_0:create_kernel_qp:1078:(pid 81): err -12
[1477346171.335626] mlx5_warn:mlx5_0:create_qp_common:1823:(pid 81): err -12
[1477346171.359626] mlx5_warn:mlx5_0:__create_qp:2267:(pid 81): create_qp_common failed
[1477346171.379626] mlx5_warn:mlx5_0:calc_sq_size:516:(pid 81): wqe count(65536) exceeds limits(32768)
[1477346171.407626] mlx5_warn:mlx5_0:create_kernel_qp:1078:(pid 81): err -12
[1477346171.423625] mlx5_warn:mlx5_0:create_qp_common:1823:(pid 81): err -12
[1477346171.447625] mlx5_warn:mlx5_0:__create_qp:2267:(pid 81): create_qp_common failed
[1477346171.479625] mlx5_warn:mlx5_0:dump_cqe:257:(pid 13624): dump error cqe
[1477346171.495625] LustreError: 13730:0:(brw_test.c:362:brw_server_rpc_done()) Bulk transfer from 12345-10.151.27.56@o2ib has failed: -5
[1477346171.495625] LustreError: 13735:0:(brw_test.c:388:brw_bulk_ready()) BRW bulk READ failed &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; RPC from 12345-10.151.27.56@o2ib: -5
[1477346171.567624] LustreError: 13730:0:(brw_test.c:362:brw_server_rpc_done()) Skipped 1 previous similar message
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="170839" author="mhanafi" created="Mon, 24 Oct 2016 22:23:23 +0000"  >&lt;p&gt;So this configuration eliminated the LNET errors but still getting &apos;dump error cqe&apos;&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;#mlx4 config
options ko2iblnd timeout=150 retry_count=7 peer_timeout=0 map_on_demand=32 peer_credits=16 concurrent_sends=16

#mlx5 config
options ko2iblnd timeout=150 retry_count=7 map_on_demand=256  peer_credits=16 concurrent_sends=16
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;ERRORS &lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[1477347536.651112] mlx5_warn:mlx5_0:dump_cqe:257:(pid 14273): dump error cqe
[1477347536.671112] 00000000 00000000 00000000 00000000
[1477347536.671112] 00000000 00000000 00000000 00000000
[1477347536.671112] 00000000 00000000 00000000 00000000
[1477347536.671112] 00000000 08007806 2500005c 000e3cd0
[1477347536.671112] LustreError: 14383:0:(brw_test.c:362:brw_server_rpc_done()) Bulk transfer from 12345-10.151.27.56@o2ib has failed: -5
[1477347536.671112] LustreError: 14381:0:(brw_test.c:388:brw_bulk_ready()) BRW bulk READ failed &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; RPC from 12345-10.151.27.56@o2ib: -5
[1477347536.743111] LustreError: 14383:0:(brw_test.c:362:brw_server_rpc_done()) Skipped 1 previous similar message

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="170849" author="simmonsja" created="Mon, 24 Oct 2016 23:14:53 +0000"  >&lt;p&gt;Why map_on_demand=32 ?  You are sending 32 * 4K in size on x86 platforms. We use map_on_demand=16 on Power8 since the page is 64K  in size. So 1MB divided by 64K pages equals 16 fragments for map_on_demand. For x86 platforms the optimal is map_on_demand=256 since 1 MB divided by 4K pages equals 256.&lt;/p&gt;</comment>
                            <comment id="170881" author="mhanafi" created="Tue, 25 Oct 2016 02:27:23 +0000"  >&lt;p&gt;The 32 was a recommendation from a previous LU. &lt;/p&gt;</comment>
                            <comment id="171022" author="mhanafi" created="Tue, 25 Oct 2016 19:43:53 +0000"  >&lt;p&gt;I tried turning off map_on_demand on both mlx4 and mlx5 and that worked. So we need to understand why map_on_demand causes the errors.&lt;/p&gt;</comment>
                            <comment id="171027" author="jaylan" created="Tue, 25 Oct 2016 19:58:22 +0000"  >&lt;p&gt;James A. Simmons wrote:&lt;br/&gt;
&quot;I found in my testing you only need map_on_demand if you have hardware using the mlx4 driver talking to hardware using the mlx5 driver.&quot;&lt;/p&gt;

&lt;p&gt;We do have a mix of mlx4 and mlx5 hardwares in our fabrics, don&apos;t we, Mahmoud?&lt;/p&gt;</comment>
                            <comment id="171035" author="simmonsja" created="Tue, 25 Oct 2016 20:26:32 +0000"  >&lt;p&gt;Also if you disable map_on_demand you lose a good chunk of performance &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/sad.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;</comment>
                            <comment id="171043" author="doug" created="Tue, 25 Oct 2016 21:59:17 +0000"  >&lt;p&gt;I thought that map_on_demand with Mellanox hardware only really helps in cases where you have a high latency network.  Under normal conditions, the difference should be minimal.  Are you having a difference experience, James?&lt;/p&gt;</comment>
                            <comment id="171602" author="mhanafi" created="Fri, 28 Oct 2016 16:48:27 +0000"  >&lt;p&gt;So when map_on_demand is set to any thing, even 256, causes this error this must point to creating pools with fmr/fastReq.&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[1477674572.063221] Lustre: Lustre: Build Version: 2.7.2-3.2nasC_mofed32v3f
[1477674572.123220] LNet: Using FastReg &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; registration
[1477674572.659216] LNet: Using FastReg &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; registration
[1477674572.659216] LNet: Skipped 5 previous similar messages
[1477674572.915214] LNet: Added LNI 10.151.25.168@o2ib [16/1024/0/180]
[1477674573.807206] LNet: Added LNI 10.150.25.168@o2ib233 [16/1024/0/180]
[1477674632.270692] mlx5_warn:mlx5_0:dump_cqe:257:(pid 9951): dump error cqe
[1477674632.286692] 00000000 00000000 00000000 00000000
[1477674632.286692] 00000000 00000000 00000000 00000000
[1477674632.286692] 00000000 00000000 00000000 00000000
[1477674632.286692] 00000000 08007806 2500002d 000d4fd2
[1477674632.290692] LustreError: 10088:0:(brw_test.c:362:brw_server_rpc_done()) Bulk transfer from 12345-10.151.27.56@o2ib has failed: -5
[1477674632.290692] LustreError: 10083:0:(brw_test.c:388:brw_bulk_ready()) BRW bulk READ failed &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; RPC from 12345-10.151.27.56@o2ib: -5
[1477674632.362691] LustreError: 10088:0:(brw_test.c:362:brw_server_rpc_done()) Skipped 1 previous similar message
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="171639" author="doug" created="Fri, 28 Oct 2016 20:20:31 +0000"  >&lt;p&gt;Mahmoud: do you have neterrors turn on (going to /var/log/messages)?  I&apos;m wondering if there is some other feedback we are getting from the lower drivers.&lt;/p&gt;</comment>
                            <comment id="171663" author="mhanafi" created="Fri, 28 Oct 2016 23:17:35 +0000"  >&lt;p&gt;Attaching +net debug output for mlx5 and mlx4 hosts.&lt;/p&gt;

&lt;p&gt;There are no other errors in /var/log/messages or console.&lt;/p&gt;</comment>
                            <comment id="171683" author="doug" created="Fri, 28 Oct 2016 23:53:55 +0000"  >&lt;p&gt;Did this also include +neterror (or do you have those on by default)?  Neterror is not included by default yet.  I have a patch to change this but it is not getting much traction.  &lt;/p&gt;</comment>
                            <comment id="171766" author="mhanafi" created="Mon, 31 Oct 2016 16:08:23 +0000"  >&lt;p&gt;yes the debug output included +neterror&lt;/p&gt;</comment>
                            <comment id="171803" author="mhanafi" created="Mon, 31 Oct 2016 19:06:39 +0000"  >&lt;p&gt;The above tests where all done with connectIB card. We tried a EDR card and got once piece of additional info. Just before the cqe error we get this &lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[1477940064.283784] LNetError: 11647:0:(o2iblnd_cb.c:1082:kiblnd_init_rdma()) RDMA is too large &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; peer 10.151.27.56@o2ib (131072), src size: 1048576 dst size: 1048576
[1477940064.327784] LNetError: 11647:0:(o2iblnd_cb.c:434:kiblnd_handle_rx()) Can&apos;t setup rdma &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; PUT to 10.151.27.56@o2ib: -90
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[1477940064.363784] mlx5_warn:mlx5_1:dump_cqe:257:(pid 11647): dump error cqe
[1477940064.383784] 00000000 00000000 00000000 00000000
[1477940064.383784] 00000000 00000000 00000000 00000000
[1477940064.383784] 00000000 00000000 00000000 00000000
[1477940064.383784] 00000000 08007806 2500002e 026fc4d2
[1477940064.383784] LustreError: 11597:0:(brw_test.c:362:brw_server_rpc_done()) Bulk transfer from 12345-10.151.27.56@o2ib has failed: -5
[1477940064.383784] LustreError: 11598:0:(brw_test.c:388:brw_bulk_ready()) BRW bulk READ failed &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; RPC from 12345-10.151.27.56@o2ib: -5
[1477940064.455783] LustreError: 11597:0:(brw_test.c:362:brw_server_rpc_done()) Skipped 1 previous similar message
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="171841" author="doug" created="Mon, 31 Oct 2016 22:48:31 +0000"  >&lt;p&gt;I can&apos;t comment on dump_cqe message and what it means, but I do understand what is causing the &quot;RDMA is too large for peer&quot; message and its breakage.&lt;/p&gt;

&lt;p&gt;This was caused by the patch associated with &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7650&quot; title=&quot;ko2iblnd map_on_demand can&amp;#39;t negotitate when page sizes are different between nodes.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7650&quot;&gt;&lt;del&gt;LU-7650&lt;/del&gt;&lt;/a&gt;.  The patch was trying to make o2iblnd fragments work with PPC&apos;s different page sizes.  Even after inspections (I was one of those), we did not see the breakage.  To be honest, I still don&apos;t know why that patch is causing this issue.  As a result, a revert patch was created for master and has landed: &lt;a href=&quot;http://review.whamcloud.com/#/c/23439/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/23439/&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;I suspect that NASA has picked up the patch from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7650&quot; title=&quot;ko2iblnd map_on_demand can&amp;#39;t negotitate when page sizes are different between nodes.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7650&quot;&gt;&lt;del&gt;LU-7650&lt;/del&gt;&lt;/a&gt;, but not the revert.  Either remove the patch from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7650&quot; title=&quot;ko2iblnd map_on_demand can&amp;#39;t negotitate when page sizes are different between nodes.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7650&quot;&gt;&lt;del&gt;LU-7650&lt;/del&gt;&lt;/a&gt; or apply the revert patch.&lt;/p&gt;

&lt;p&gt;Hopefully, the dump_cqe message is associated with this and will go away one this is fixed.&lt;/p&gt;</comment>
                            <comment id="171846" author="simmonsja" created="Mon, 31 Oct 2016 23:16:51 +0000"  >&lt;p&gt;Only if they applied the patch. The patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7650&quot; title=&quot;ko2iblnd map_on_demand can&amp;#39;t negotitate when page sizes are different between nodes.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7650&quot;&gt;&lt;del&gt;LU-7650&lt;/del&gt;&lt;/a&gt; never landed to lustre 2.7 or lustre 2.8.0. In fact it doesn&apos;t exist anywhere now but the upstream client. This problem they are seeing was first seen by Jeremy in one of the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3322&quot; title=&quot;ko2iblnd support for different map_on_demand and peer_credits between systems&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3322&quot;&gt;&lt;del&gt;LU-3322&lt;/del&gt;&lt;/a&gt; patches. I have to find it in the comments. The patch was still landed due to no else being able to reproduce this problem. I think he couldn&apos;t reproduce it after awhile. It is hit or miss with this.&lt;/p&gt;</comment>
                            <comment id="171847" author="jaylan" created="Mon, 31 Oct 2016 23:32:11 +0000"  >&lt;p&gt;I did picked up &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7650&quot; title=&quot;ko2iblnd map_on_demand can&amp;#39;t negotitate when page sizes are different between nodes.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7650&quot;&gt;&lt;del&gt;LU-7650&lt;/del&gt;&lt;/a&gt;. Dont remember why I picked that one up.&lt;/p&gt;

&lt;p&gt;However, I picked up &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7650&quot; title=&quot;ko2iblnd map_on_demand can&amp;#39;t negotitate when page sizes are different between nodes.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7650&quot;&gt;&lt;del&gt;LU-7650&lt;/del&gt;&lt;/a&gt; in 2.7.2-3.1nas build, but Mahmoud reported this problem against 2.7.2-2nas. The &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7650&quot; title=&quot;ko2iblnd map_on_demand can&amp;#39;t negotitate when page sizes are different between nodes.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7650&quot;&gt;&lt;del&gt;LU-7650&lt;/del&gt;&lt;/a&gt; patch is not in 2.7.2-2nas.&lt;/p&gt;</comment>
                            <comment id="171849" author="doug" created="Mon, 31 Oct 2016 23:39:39 +0000"  >&lt;p&gt;So, I assume that means the dump_cqe message happens without the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7650&quot; title=&quot;ko2iblnd map_on_demand can&amp;#39;t negotitate when page sizes are different between nodes.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7650&quot;&gt;&lt;del&gt;LU-7650&lt;/del&gt;&lt;/a&gt; patch but the &quot;RDMA is too large for peer&quot; message does not.  They must then be unrelated.&lt;/p&gt;

&lt;p&gt;I&apos;m going to have to go on the Mellanox community board to ask about this mlx5-specific failure.  We do not have a support contract with Mellanox and I doubt they will help fix this failure without it.  To my knowledge, there is nothing we are doing in ko2iblnd which would break the MLX code &quot;assuming&quot; they did proper backwards compatibility between mlx4 and mlx5.  We have never seen such an error or problem with mlx4 so I have to assume they did something to mlx5 to change things.&lt;/p&gt;

&lt;p&gt;If NASA has a support contract with Mellanox, perhaps you can raise a ticket with them to get their input on the potential causes of these error messages.  That would help us to determine if there is something we need to change in ko2iblnd.&lt;/p&gt;</comment>
                            <comment id="171942" author="mhanafi" created="Tue, 1 Nov 2016 19:12:01 +0000"  >&lt;p&gt;We have opened a case with Mellanox and sent them some debugging. Waiting to hear back, but they did say the message is related to Fast Memory Registration Mode.&lt;/p&gt;
</comment>
                            <comment id="171946" author="doug" created="Tue, 1 Nov 2016 19:15:06 +0000"  >&lt;p&gt;Thank you Mahmoud.  I have suspected the Fast Memory Registration code could be the culprit here.  We had to add support for that over the older FMR code when using newer MLX cards which only support Fast Memory.  As such, that code is fairly new and can have an issue.  It will be great if Mellanox can point us specifically at where we are making a mistake so we can address it.&lt;/p&gt;</comment>
                            <comment id="174360" author="mhanafi" created="Sat, 19 Nov 2016 00:21:39 +0000"  >&lt;p&gt;After the host is reboot, the error can be reproduced when read from mlx4 to mlx5. &lt;br/&gt;
This will produce the dump_cqe error.&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;#test1
lst add_test --batch bulk_rw --from mlx4_host --to mlx5_host brw read size=1M check=full
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;But this will work!&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;#test2
lst add_test --batch bulk_rw --from mlx4_host --to mlx5_host brw write size=1M check=full 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;What&apos;s interesting once the write test is ran the read will start to work.&lt;/p&gt;

</comment>
                            <comment id="174794" author="doug" created="Wed, 23 Nov 2016 07:54:26 +0000"  >&lt;p&gt;I&apos;ve been able to get mixed mlx4 and mlx5 cards in the same cluster.  Using your test examples, I have been able to reproduce this issue getting the same results as you.&lt;/p&gt;

&lt;p&gt;I verified that the issue only occurs when map_on_demand is non zero.  I have reproduced the issue with both upstream OFED and MOFED (latest version for both).  I have also reproduced it with RHEL 6.8 and 7.3.&lt;/p&gt;

&lt;p&gt;No solution yet, but at least I have a way to investigate this now.&lt;/p&gt;</comment>
                            <comment id="174836" author="mhanafi" created="Wed, 23 Nov 2016 15:05:19 +0000"  >&lt;p&gt;Mellanox has also been able to reproduce this issue in their lab and are looking at it.&lt;/p&gt;
</comment>
                            <comment id="174848" author="doug" created="Wed, 23 Nov 2016 16:27:34 +0000"  >&lt;p&gt;Another interesting data point:&lt;/p&gt;

&lt;p&gt;I flipped the from and to parameters in your test making it &quot;--from mlx5_host --to mlx4_host&quot;.  In theory, the issue should happen when I then do a write as this is almost the same thing as what you have.  However, in this case, I am not able to reproduce the problem with either read or write.&lt;/p&gt;

&lt;p&gt;I believe the difference when you flip the from and to parameters is who initiates the test.&lt;/p&gt;</comment>
                            <comment id="176091" author="mhanafi" created="Thu, 1 Dec 2016 19:18:26 +0000"  >&lt;p&gt;Here is update from mellanox engineering&lt;/p&gt;

&lt;p&gt;&quot;The failure is happening because of fast reg mr called twice, second time the mkey is not free - but it set for a check if free and its meaning the operation will succeed only if mkey is free.&lt;/p&gt;

&lt;p&gt;In order to call for second fast reg mr customer needed to do local invalidate before that call.&lt;/p&gt;

&lt;p&gt;(These operation are explained in IB spec).&quot;&lt;/p&gt;</comment>
                            <comment id="176092" author="doug" created="Thu, 1 Dec 2016 19:20:47 +0000"  >&lt;p&gt;Thank you for that feedback. &#160;I will dig into the code for the path where we are making this mistake.&lt;/p&gt;</comment>
                            <comment id="176630" author="gerrit" created="Tue, 6 Dec 2016 03:13:00 +0000"  >&lt;p&gt;Doug Oucharek (doug.s.oucharek@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/24162&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/24162&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8752&quot; title=&quot;mlx5_warn:mlx5_0:dump_cqe:257:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8752&quot;&gt;&lt;del&gt;LU-8752&lt;/del&gt;&lt;/a&gt; lnet: Debugging Patch&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 235b2ce7d07ae80e31a170cb7e424f859dce97b1&lt;/p&gt;</comment>
                            <comment id="177461" author="gerrit" created="Mon, 12 Dec 2016 17:36:23 +0000"  >&lt;p&gt;Doug Oucharek (doug.s.oucharek@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/24306&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/24306&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8752&quot; title=&quot;mlx5_warn:mlx5_0:dump_cqe:257:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8752&quot;&gt;&lt;del&gt;LU-8752&lt;/del&gt;&lt;/a&gt; lnet: Stop MLX5 triggering a dump_cqe&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 3dcf679c7f1e2aa99d99a10deb65ac2faa30a629&lt;/p&gt;</comment>
                            <comment id="177533" author="doug" created="Tue, 13 Dec 2016 05:09:33 +0000"  >&lt;p&gt;I have submitted a fix to this issue above (ignore the earlier Gerrit patch as it was just for my debugging). &#160;I have validated it on our mlx4 / mlx5 mixed cluster. &#160;I still need to validate it on OmniPath to ensure it does not cause a problem there.&lt;/p&gt;</comment>
                            <comment id="177582" author="mhanafi" created="Tue, 13 Dec 2016 15:35:23 +0000"  >&lt;p&gt;Thanks we will build and test it.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="177600" author="doug" created="Tue, 13 Dec 2016 16:45:54 +0000"  >&lt;p&gt;OmniPath is not affected by this patch as it uses FMR and not FastReg. &#160;So this change should only affect MLX5 based cards.&lt;/p&gt;

&lt;p&gt;Did you need me to push a 2.7FE patch?&lt;/p&gt;</comment>
                            <comment id="177638" author="ndauchy" created="Tue, 13 Dec 2016 21:00:58 +0000"  >&lt;p&gt;Yes, hopefully it is trivial, but we will want to use the 2.7fe patch.  Thanks!&lt;/p&gt;</comment>
                            <comment id="177683" author="doug" created="Wed, 14 Dec 2016 08:19:00 +0000"  >&lt;p&gt;The 2.7FE patch is:&#160;&lt;a href=&quot;https://review.whamcloud.com/24336&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/24336&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="178387" author="mhanafi" created="Mon, 19 Dec 2016 17:06:16 +0000"  >&lt;p&gt;Here is some feed back from MLX.&lt;/p&gt;

&lt;p&gt;&quot;&lt;/p&gt;

&lt;p&gt;According to IB spec state should be set to Free.&lt;/p&gt;

&lt;p&gt;Under &quot;MEMORY REGION TYPES&quot; section there is an explanation of states, specifically:&lt;br/&gt;
Table: &quot;Memory Region States Summary&quot;&lt;br/&gt;
&quot;The following table summarizes the states of Memory Regions L_Keys and R_Keys and the operations allowed on each state:&quot;&lt;br/&gt;
Looking at Property / Operation Allowed: for &quot;Fast Register&quot;, there are 3 possible states: Invalid, Free, Valid.&lt;br/&gt;
The only allowed state is - Free.&lt;/p&gt;

&lt;p&gt;&quot;&lt;/p&gt;</comment>
                            <comment id="179079" author="jaylan" created="Tue, 27 Dec 2016 21:27:15 +0000"  >&lt;p&gt;Do we need this patch for Lustre 2.9 release?&lt;/p&gt;</comment>
                            <comment id="179096" author="pjones" created="Wed, 28 Dec 2016 14:17:23 +0000"  >&lt;p&gt;Jay&lt;/p&gt;

&lt;p&gt;Are you running with 2.9.x? I had thought that you were using 2.7.x...&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="179097" author="simmonsja" created="Wed, 28 Dec 2016 15:58:46 +0000"  >&lt;p&gt;ORNL needs it for the 2.8 release.&lt;/p&gt;</comment>
                            <comment id="179122" author="yujian" created="Wed, 28 Dec 2016 18:40:37 +0000"  >&lt;p&gt;Hi James,&lt;br/&gt;
Here is the patch for FE 2.8.x release: &lt;a href=&quot;https://review.whamcloud.com/24365&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/24365&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="179130" author="jaylan" created="Wed, 28 Dec 2016 19:02:15 +0000"  >&lt;p&gt;Hi Peter,&lt;br/&gt;
NASA Ames is running 2.7.2+ in production, but we started testing 2.9.0 client on SLES12 SP2.&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;jay&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="179429" author="doug" created="Tue, 3 Jan 2017 20:17:33 +0000"  >&lt;p&gt;Mahmoud:&lt;/p&gt;

&lt;p&gt;I suspect there is a bug in the Mellanox MLX5 driver somewhere. &#160;My debugging showed that the first time we use the L-key/R-key for an RDMA operation (reading from MLX5), it is being rejected as invalid. &#160;I spent a lot of time tracing the code to see if we have invoked anything &#160;which could have changed the state of the R-key/L-key and found nothing.&lt;/p&gt;

&lt;p&gt;That is why I came up with the solution of just invalidating the first key we set and advancing to the next one. &#160;I have not found an issue with invalidating a free key so am hoping this fix will not cause any problems down the road (Dmitry and Amir&apos;s concerns in the code review).&lt;/p&gt;

&lt;p&gt;Doug&lt;/p&gt;</comment>
                            <comment id="181202" author="gerrit" created="Wed, 18 Jan 2017 18:59:17 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/24306/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/24306/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8752&quot; title=&quot;mlx5_warn:mlx5_0:dump_cqe:257:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8752&quot;&gt;&lt;del&gt;LU-8752&lt;/del&gt;&lt;/a&gt; lnet: Stop MLX5 triggering a dump_cqe&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 783428b60a98874b4783f8da48c66019d68d84d6&lt;/p&gt;</comment>
                            <comment id="181212" author="pjones" created="Wed, 18 Jan 2017 19:12:31 +0000"  >&lt;p&gt;Landed for 2.10&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10120">
                    <name>Blocker</name>
                                                                <inwardlinks description="is blocked by">
                                                        </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="40465">LU-8693</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="23868" name="netdebug.mlx4host.gz" size="18362" author="mhanafi" created="Fri, 28 Oct 2016 23:17:35 +0000"/>
                            <attachment id="23869" name="netdebug.mlx5host.gz" size="28610" author="mhanafi" created="Fri, 28 Oct 2016 23:17:35 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzyt6f:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10021"><![CDATA[2]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>