<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:19:45 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8693] ko2iblnd recieving IB_WC_MW_BIND_ERR errors.</title>
                <link>https://jira.whamcloud.com/browse/LU-8693</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Moving to our production Power8 system running an MOFED stack we are seeing a new IB error in the ko2iblnd that wasn&apos;t encountered before.&lt;/p&gt;

&lt;p&gt;[  170.597561] mlx5_warn:mlx5_0:dump_cqe:257:(pid 8738): dump error cqe&lt;br/&gt;
[  170.597620] mlx5_warn:mlx5_0:dump_cqe:257:(pid 8714): dump error cqe&lt;br/&gt;
[  170.597622] 00000000 00000000 00000000 00000000&lt;br/&gt;
[  170.597623] 00000000 00000000 00000000 00000000&lt;br/&gt;
[  170.597625] 00000000 00000000 00000000 00000000&lt;br/&gt;
[  170.597626] 00000000 08007806 25000039 0642b3d2&lt;br/&gt;
[  170.597651] LNet: 8714:0:(o2iblnd_cb.c:3433:kiblnd_complete()) FastReg failed: 6&lt;br/&gt;
[  170.597728] LNet: 8713:0:(o2iblnd_cb.c:3444:kiblnd_complete()) RDMA (tx: c000003c6a78c5a8) failed: 5&lt;br/&gt;
[  170.598355] 00000000 00000000 00000000 00000000&lt;br/&gt;
[  170.598403] 00000000 00000000 00000000 00000000&lt;br/&gt;
[  170.599245] powernv-cpufreq: CPU 104 on Chip 1 has Pmax restored to 0&lt;br/&gt;
[  170.599647] LNet: 8714:0:(o2iblnd_cb.c:990:kiblnd_tx_complete()) Tx -&amp;gt; 10.39.232.11@o2ib6 cookie 0x63e sending 1 waiting 0: failed 5&lt;br/&gt;
[  170.599651] LNet: 8714:0:(o2iblnd_cb.c:990:kiblnd_tx_complete()) Skipped 2 previous similar messages&lt;br/&gt;
[  170.599654] LNet: 8713:0:(o2iblnd_cb.c:1934:kiblnd_close_conn_locked()) Closing conn to 10.39.232.11@o2ib6: error -5(waiting)&lt;br/&gt;
[  170.599669] LustreError: 8714:0:(events.c:201:client_bulk_callback()) event type 1, status -5, desc c000003c62cf5c00&lt;br/&gt;
[  170.599675] Lustre: 8896:0:(client.c:2063:ptlrpc_expire_one_request()) @@@ Request sent has failed due to network error: &lt;span class=&quot;error&quot;&gt;&amp;#91;sent 1476124274/real 1476124274&amp;#93;&lt;/span&gt;  req@c000003c4e340000 x1547828424878916/t0(0) o4-&amp;gt;atlastds-OST0035-osc-c000001fc5b75000@10.36.226.69@o2ib:6/4 lens 608/448 e 0 to 1 dl 1476124841 ref 2 fl Rpc:X/0/ffffffff rc 0/-1&lt;br/&gt;
[  170.599681] Lustre: atlastds-OST0035-osc-c000001fc5b75000: Connection to atlastds-OST0035 (at 10.36.226.69@o2ib) was lost; in progress operations using this service will wait for recovery to complete&lt;br/&gt;
[  170.611219] 00000000 00000000 00000000 00000000&lt;br/&gt;
[  170.612270] 00000000 08007806 2500003a 06789cd2&lt;br/&gt;
[  170.613866] LustreError: 8737:0:(events.c:201:client_bulk_callback()) event type 1, status -5, desc c000001fb98c0400&lt;/p&gt;</description>
                <environment>Power8 running RHEL with a MOFED 3.3 stack.</environment>
        <key id="40465">LU-8693</key>
            <summary>ko2iblnd recieving IB_WC_MW_BIND_ERR errors.</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="ashehata">Amir Shehata</assignee>
                                    <reporter username="simmonsja">James A Simmons</reporter>
                        <labels>
                    </labels>
                <created>Tue, 11 Oct 2016 15:28:03 +0000</created>
                <updated>Thu, 12 Oct 2017 01:20:08 +0000</updated>
                            <resolved>Tue, 31 Jan 2017 18:00:02 +0000</resolved>
                                    <version>Lustre 2.8.0</version>
                    <version>Lustre 2.9.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>9</watches>
                                                                            <comments>
                            <comment id="169175" author="pjones" created="Tue, 11 Oct 2016 17:16:30 +0000"  >&lt;p&gt;Doug&lt;/p&gt;

&lt;p&gt;Could you please advise on this one?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="169188" author="doug" created="Tue, 11 Oct 2016 17:59:05 +0000"  >&lt;p&gt;James, do you know if this is using FastReg or the older FMR?&lt;/p&gt;</comment>
                            <comment id="169192" author="simmonsja" created="Tue, 11 Oct 2016 18:12:01 +0000"  >&lt;p&gt;FastReg&lt;/p&gt;</comment>
                            <comment id="169223" author="doug" created="Tue, 11 Oct 2016 22:36:22 +0000"  >&lt;p&gt;Is this only happening with Power8 to/from x86?  &lt;/p&gt;</comment>
                            <comment id="169484" author="simmonsja" created="Thu, 13 Oct 2016 17:23:38 +0000"  >&lt;p&gt;That all we have.&lt;/p&gt;</comment>
                            <comment id="169504" author="doug" created="Thu, 13 Oct 2016 18:13:10 +0000"  >&lt;p&gt;It would be very useful to know under what conditions MOFED returns this error.  Without access to the MOFED source or the firmware source (if the error is generated by firmware), I cannot determine that.&lt;/p&gt;

&lt;p&gt;Do you have a support ticket opened with Mellanox for this?  If they can provide us with a list of conditions which generate this error, we would have something to work with to debug what we are doing wrong in o2iblnd.&lt;/p&gt;</comment>
                            <comment id="169537" author="doug" created="Thu, 13 Oct 2016 21:19:33 +0000"  >&lt;p&gt;The only reference I can find to IB_WC_MW_BIND_ERR in the upstream OFED code is in Linux/drivers/infiniband/hw/mlx5/cq.c, routine: mlx5_handle_error_cqe():&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;...
switch (cqe-&amp;gt;syndrome) {
...
        case MLX5_CQE_SYNDROME_MW_BIND_ERR:
                wc-&amp;gt;status = IB_WC_MW_BIND_ERR;
                break;
...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I cannot find any other reference to MLX5_CQE_SYNDROME_MW_BIND_ERR so I am assuming this comes from the MLX5 driver or firmware.&lt;/p&gt;</comment>
                            <comment id="171916" author="mhanafi" created="Tue, 1 Nov 2016 16:51:02 +0000"  >&lt;p&gt;Does OFED reproduce this error?&lt;/p&gt;</comment>
                            <comment id="171921" author="doug" created="Tue, 1 Nov 2016 17:11:55 +0000"  >&lt;p&gt;That&apos;s a good question.  James?  Have you tried the upstream OFED for this?&lt;/p&gt;</comment>
                            <comment id="173363" author="bhoagland" created="Fri, 11 Nov 2016 19:34:30 +0000"  >&lt;p&gt;Hi &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=simmonsja&quot; class=&quot;user-hover&quot; rel=&quot;simmonsja&quot;&gt;simmonsja&lt;/a&gt;,&lt;br/&gt;
Any thoughts on Doug and Mahmoud&apos;s OFED query?&lt;/p&gt;</comment>
                            <comment id="173364" author="simmonsja" created="Fri, 11 Nov 2016 19:52:02 +0000"  >&lt;p&gt;We only use OFED 3.12 in our production systems. Also for our Cray systems we don&apos;t enable map_on_demand so we don&apos;t see any problems.&lt;/p&gt;</comment>
                            <comment id="178164" author="doug" created="Fri, 16 Dec 2016 23:04:52 +0000"  >&lt;p&gt;I believe this bug is addressed by the fix to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8752&quot; title=&quot;mlx5_warn:mlx5_0:dump_cqe:257:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8752&quot;&gt;&lt;del&gt;LU-8752&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="40985">LU-8752</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="29170">LU-6387</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzyraf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>