<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:06:11 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-7124] MLX5: Limit hit in cap.max_send_wr</title>
                <link>https://jira.whamcloud.com/browse/LU-7124</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Running Lustre with MLX5&lt;/p&gt;

&lt;p&gt;We were trying to increase O2IBLND&apos;s peer_credits to 32 on MLX5.  Here is the problematic code:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;        init_qp_attr-&amp;gt;event_handler = kiblnd_qp_event;
        init_qp_attr-&amp;gt;qp_context = conn;
        init_qp_attr-&amp;gt;cap.max_send_wr = IBLND_SEND_WRS(version);
        init_qp_attr-&amp;gt;cap.max_recv_wr = IBLND_RECV_WRS(version);
        init_qp_attr-&amp;gt;cap.max_send_sge = 1;
        init_qp_attr-&amp;gt;cap.max_recv_sge = 1;
        init_qp_attr-&amp;gt;sq_sig_type = IB_SIGNAL_REQ_WR;
        init_qp_attr-&amp;gt;qp_type = IB_QPT_RC;
        init_qp_attr-&amp;gt;send_cq = cq;
        init_qp_attr-&amp;gt;recv_cq = cq;

        rc = rdma_create_qp(cmid, conn-&amp;gt;ibc_hdev-&amp;gt;ibh_pd, init_qp_attr);

#define IBLND_SEND_WRS(v)          ((IBLND_RDMA_FRAGS(v) + 1) * IBLND_CONCURRENT_SENDS(v))

#define IBLND_RDMA_FRAGS(v)        ((v) == IBLND_MSG_VERSION_1 ? \
                                     IBLND_MAX_RDMA_FRAGS : IBLND_CFG_RDMA_FRAGS)

#define IBLND_CFG_RDMA_FRAGS       (*kiblnd_tunables.kib_map_on_demand != 0 ? \
                                    *kiblnd_tunables.kib_map_on_demand :      \
                                     IBLND_MAX_RDMA_FRAGS)  &lt;span class=&quot;code-comment&quot;&gt;/* max # of fragments configured by user */&lt;/span&gt;

#define IBLND_MAX_RDMA_FRAGS         LNET_MAX_IOV           &lt;span class=&quot;code-comment&quot;&gt;/* max # of fragments supported */&lt;/span&gt;

&lt;span class=&quot;code-comment&quot;&gt;/** limit on the number of fragments in discontiguous MDs */&lt;/span&gt;
#define LNET_MAX_IOV    256
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Basically, when setting peer_credits to 32 then&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;init_qp_attr-&amp;gt;cap.max_send_wr = 8224

[root@wt-2-00 ~]# ibv_devinfo -v | grep max_qp_wr
 max_qp_wr:   16384
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;API returns -12 (out of memory)&lt;/p&gt;

&lt;p&gt;peer_credits 16 == 4112 seems to work.&lt;/p&gt;

&lt;p&gt;We&apos;re running on MOFED 3.0&lt;/p&gt;

&lt;p&gt;Is there any limitation that we&apos;re hitting on the MLX side? As far as I know MLX4 works with peer_credits set to 32.&lt;/p&gt;

&lt;p&gt;Full device info:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[wt2user1@wildcat2 ~]$ ibv_devinfo -v
hca_id: mlx5_0
        transport:                      InfiniBand (0)
        fw_ver:                         12.100.6440
        node_guid:                      e41d:2d03:0060:7652
        sys_image_guid:                 e41d:2d03:0060:7652
        vendor_id:                      0x02c9
        vendor_part_id:                 4115
        hw_ver:                         0x0
        board_id:                       MT_2180110032
        phys_port_cnt:                  1
        max_mr_size:                    0xffffffffffffffff
        page_size_cap:                  0xfffff000
        max_qp:                         262144
        max_qp_wr:                      16384
        device_cap_flags:               0x40509c36
                                        BAD_PKEY_CNTR
                                        BAD_QKEY_CNTR
                                        AUTO_PATH_MIG
                                        CHANGE_PHY_PORT
                                        PORT_ACTIVE_EVENT
                                        SYS_IMAGE_GUID
                                        RC_RNR_NAK_GEN
                                        XRC
                                        Unknown flags: 0x40408000
        device_cap_exp_flags:           0x5020007100000000
                                        EXP_DC_TRANSPORT
                                        EXP_MEM_MGT_EXTENSIONS
                                        EXP_CROSS_CHANNEL
                                        EXP_MR_ALLOCATE
                                        EXT_ATOMICS
                                        EXT_SEND NOP
                                        EXP_UMR
        max_sge:                        30
        max_sge_rd:                     0
        max_cq:                         16777216
        max_cqe:                        4194303
        max_mr:                         16777216
        max_pd:                         16777216
        max_qp_rd_atom:                 16
        max_ee_rd_atom:                 0
        max_res_rd_atom:                4194304
        max_qp_init_rd_atom:            16
        max_ee_init_rd_atom:            0
        atomic_cap:                     ATOMIC_HCA_REPLY_BE (64)
        log atomic arg sizes (mask)             3c
        max fetch and add bit boundary  64
        log max atomic inline           5
        max_ee:                         0
        max_rdd:                        0
        max_mw:                         0
        max_raw_ipv6_qp:                0
        max_raw_ethy_qp:                0
        max_mcast_grp:                  2097152
        max_mcast_qp_attach:            48
        max_total_mcast_qp_attach:      100663296
        max_ah:                         2147483647
        max_fmr:                        0
        max_srq:                        8388608
        max_srq_wr:                     16383
        max_srq_sge:                    31
        max_pkeys:                      128
        local_ca_ack_delay:             16
        hca_core_clock:                 0
        max_klm_list_size:              65536
        max_send_wqe_inline_klms:       20
        max_umr_recursion_depth:        4
        max_umr_stride_dimension:       1
        general_odp_caps:
        rc_odp_caps:
                                        NO SUPPORT
        uc_odp_caps:
                                        NO SUPPORT
        ud_odp_caps:
                                        NO SUPPORT
        dc_odp_caps:
                                        NO SUPPORT
        xrc_odp_caps:
                                        NO SUPPORT
        raw_eth_odp_caps:
                                        NO SUPPORT
        max_dct:                        262144
                port:   1
                        state:                  PORT_ACTIVE (4)
                        max_mtu:                4096 (5)
                        active_mtu:             4096 (5)
                        sm_lid:                 19
                        port_lid:               1
                        port_lmc:               0x00
                        link_layer:             InfiniBand
                        max_msg_sz:             0x40000000
                        port_cap_flags:         0x2651e848
                        max_vl_num:             4 (3)
                        bad_pkey_cntr:          0x0
                        qkey_viol_cntr:         0x0
                        sm_sl:                  0
                        pkey_tbl_len:           128
                        gid_tbl_len:            8
                        subnet_timeout:         18
                        init_type_reply:        0
                        active_width:           4X (2)
                        active_speed:           25.0 Gbps (32)
                        phys_state:             LINK_UP (5)
                        GID[  0]:               fe80:0000:0000:0000:e41d:2d03:0060:7652
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="32012">LU-7124</key>
            <summary>MLX5: Limit hit in cap.max_send_wr</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="ashehata">Amir Shehata</assignee>
                                    <reporter username="ashehata">Amir Shehata</reporter>
                        <labels>
                    </labels>
                <created>Wed, 9 Sep 2015 17:14:00 +0000</created>
                <updated>Tue, 24 Nov 2020 19:33:58 +0000</updated>
                            <resolved>Mon, 14 Mar 2016 03:13:37 +0000</resolved>
                                                    <fixVersion>Lustre 2.9.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>18</watches>
                                                                            <comments>
                            <comment id="126815" author="ashehata" created="Wed, 9 Sep 2015 19:15:37 +0000"  >&lt;p&gt;Here is the reply I got from a Mellanox engineer:&lt;/p&gt;

&lt;p&gt;Hi,&lt;/p&gt;

&lt;p&gt;I sent in the past an explanation to this list and I am going to repeat it.&lt;/p&gt;

&lt;p&gt;The number reported for max_qp_wr is the maximum value the HCA supports. But it is not guaranteed that this maximum is supported for any configuration of a QP. For example, the number of send SGEs and the transport service can affect this max value.&lt;/p&gt;

&lt;p&gt;From the spec:&lt;/p&gt;

&lt;p&gt;11.2.1.2 QUERY HCA&lt;br/&gt;
Description:&lt;br/&gt;
Returns the attributes for the specified HCA.&lt;br/&gt;
The maximum values defined in this section are guaranteed not-to-exceed&lt;br/&gt;
values. It is possible for an implementation to allocate some HCA&lt;br/&gt;
resources from the same space. In that case, the maximum values returned&lt;br/&gt;
are not guaranteed for all of those resources simultaneously&lt;/p&gt;

&lt;p&gt;Mlx5 supported devices work as described above. Mlx4 supported devices has some flexibility allowing it to user larger work queues so this is why you can define 16K WRs for mlx4 and for mlx5 you can do only 8K (in your specific case).&lt;/p&gt;</comment>
                            <comment id="129772" author="simmonsja" created="Thu, 8 Oct 2015 01:05:36 +0000"  >&lt;p&gt;I found if I used map_on_demand you can increase the peer_credits for mlx5. Now that ko2iblnd no longer supports PMR you can&apos;t do this anymore.  Now if we implement Fast Registration API we could possible push the peer_credits higher using map_on_demand again. As a bonus Fast Registration API is supported with mlx4 so it can be tested more broadly.&lt;/p&gt;</comment>
                            <comment id="141501" author="gerrit" created="Mon, 8 Feb 2016 13:04:50 +0000"  >&lt;p&gt;Dmitry Eremin (dmitry.eremin@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/18347&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/18347&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7124&quot; title=&quot;MLX5: Limit hit in cap.max_send_wr&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7124&quot;&gt;&lt;del&gt;LU-7124&lt;/del&gt;&lt;/a&gt; o2iblnd: limit cap.max_send_wr for MLX5&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: e11441614b76bbc2887cc28fd4dfae7c2128963f&lt;/p&gt;</comment>
                            <comment id="141735" author="shadow" created="Wed, 10 Feb 2016 05:07:55 +0000"  >&lt;p&gt;first of all, don&apos;t fill a new ticket when older exist. Please start your&apos;s work from search.&lt;/p&gt;

&lt;p&gt;second - it&apos;s not a solution. If someone want to avoid ENOMEM in that case, it may do via tunable, but you silence a horror that settings. &lt;/p&gt;

&lt;p&gt;Real fix in that case will implementing a shared receive queue for an o2iblnd and new memory registration model which able to dramatically reduce a number work requests.&lt;/p&gt;
</comment>
                            <comment id="145069" author="chunteraa" created="Wed, 9 Mar 2016 21:23:00 +0000"  >&lt;p&gt;Is the problem memory fragmentation as in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5718&quot; title=&quot;RDMA too fragmented with router&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5718&quot;&gt;&lt;del&gt;LU-5718&lt;/del&gt;&lt;/a&gt; or matching o2iblnd settings between client &amp;amp; server ?&lt;/p&gt;</comment>
                            <comment id="145074" author="simmonsja" created="Wed, 9 Mar 2016 22:17:20 +0000"  >&lt;p&gt;For me the problem was not being about to set my peer credit setting higher than 16.&lt;/p&gt;</comment>
                            <comment id="145372" author="gerrit" created="Mon, 14 Mar 2016 02:42:09 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/18347/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/18347/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7124&quot; title=&quot;MLX5: Limit hit in cap.max_send_wr&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7124&quot;&gt;&lt;del&gt;LU-7124&lt;/del&gt;&lt;/a&gt; o2iblnd: limit cap.max_send_wr for MLX5&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 4083806828a94ee09c2dadf2cca8c224547d5ebc&lt;/p&gt;</comment>
                            <comment id="148236" author="sergey" created="Fri, 8 Apr 2016 15:43:24 +0000"  >&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[49672.067906] mlx5_ib:mlx5_0:calc_sq_size:485:(pid 8297): wqe_size 192
[49672.067908] mlx5_ib:mlx5_0:calc_sq_size:507:(pid 8297): wqe count(65536) exceeds limits(16384)
[49672.067910] mlx5_ib:mlx5_0:create_kernel_qp:1051:(pid 8297): err -12
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Don&apos;t think that &lt;a href=&quot;http://review.whamcloud.com/18347/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/18347/&lt;/a&gt; is right solution.&lt;br/&gt;
It hides information that mlx5 doesn&apos;t support peer_credits &amp;gt; 16.&lt;br/&gt;
At least warning should be added there.&lt;/p&gt;

&lt;p&gt;Moreover patch leads to several memory free/allocation and mutex locking/unlocking inside rdma_create_qp-&amp;gt;mlx5_ib_create_qp...:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;mlx5_ib_create_qp: attrx = kzalloc(sizeof(*attrx), GFP_KERNEL); 
__create_qp: qp = kzalloc(sizeof(*qp), GFP_KERNEL);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Also there is a chance that ENOMEM could be returned in case of low system memory. I mean not from calc_sq_size.&lt;br/&gt;
In such case it is bad idea to alloc and free small peaces of memory.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="27145">LU-5783</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="49201">LU-10213</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="57232">LU-12901</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzxmzr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>