<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:34:29 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-10373] LNet OPA Performance Drop</title>
                <link>https://jira.whamcloud.com/browse/LU-10373</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;A drop in OPA LNet bandwidth has occurred since Lustre 2.10.0.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# lctl --version
lctl 2.10.0

----------------------------------------------------------
Running test: lst add_test --batch rperf --concurrency 32 --distribute 1:1 --from clients --to servers brw read size=1M
Client Read RPC/s: 23426.6428571429
Client Write RPC/s: 11714.1428571429
Client Read MiB/s: 11713.6164285714
Client Write MiB/s: 1.78714285714286
----------------------------------------------------------
Running test: lst add_test --batch rperf --concurrency 64 --distribute 1:1 --from clients --to servers brw read size=1M
Client Read RPC/s: 23577.5714285714
Client Write RPC/s: 11790.2857142857
Client Read MiB/s: 11789.2135714286
Client Write MiB/s: 1.79928571428571
----------------------------------------------------------
Running test: lst add_test --batch rperf --concurrency 128 --distribute 1:1 --from clients --to servers brw read size=1M
Client Read RPC/s: 23595.5714285714
Client Write RPC/s: 11798.2857142857
Client Read MiB/s: 11799.1114285714
Client Write MiB/s: 1.8
----------------------------------------------------------
Running test: lst add_test --batch wperf --concurrency 32 --distribute 1:1 --from clients --to servers brw write size=1M
Client Read RPC/s: 21268.3571428571
Client Write RPC/s: 10635.2142857143
Client Read MiB/s: 1.62357142857143
Client Write MiB/s: 10634.2071428571
----------------------------------------------------------
Running test: lst add_test --batch wperf --concurrency 64 --distribute 1:1 --from clients --to servers brw write size=1M
Client Read RPC/s: 22236.9285714286
Client Write RPC/s: 11118.9285714286
Client Read MiB/s: 1.69714285714286
Client Write MiB/s: 11118.7914285714
----------------------------------------------------------
Running test: lst add_test --batch wperf --concurrency 128 --distribute 1:1 --from clients --to servers brw write size=1M
Client Read RPC/s: 22178.6428571429
Client Write RPC/s: 11087.2142857143
Client Read MiB/s: 1.69142857142857
Client Write MiB/s: 11089.0557142857


&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# lctl --version
lctl 2.10.55_127_g063a83a

----------------------------------------------------------
Running test: lst add_test --batch rperf --concurrency 32 --distribute 1:1 --from clients --to servers brw read size=1M
Client Read RPC/s: 16879.5
Client Write RPC/s: 8441.14285714286
Client Read MiB/s: 8439.57857142857
Client Write MiB/s: 1.28785714285714
----------------------------------------------------------
Running test: lst add_test --batch rperf --concurrency 64 --distribute 1:1 --from clients --to servers brw read size=1M
Client Read RPC/s: 21844
Client Write RPC/s: 10923.2857142857
Client Read MiB/s: 10922.4635714286
Client Write MiB/s: 1.66714285714286
----------------------------------------------------------
Running test: lst add_test --batch rperf --concurrency 128 --distribute 1:1 --from clients --to servers brw read size=1M
Client Read RPC/s: 21928.4285714286
Client Write RPC/s: 10964.7857142857
Client Read MiB/s: 10965.17
Client Write MiB/s: 1.67357142857143
----------------------------------------------------------
Running test: lst add_test --batch wperf --concurrency 32 --distribute 1:1 --from clients --to servers brw write size=1M
Client Read RPC/s: 17288.2142857143
Client Write RPC/s: 8645.07142857143
Client Read MiB/s: 1.32
Client Write MiB/s: 8643.84928571428
----------------------------------------------------------
Running test: lst add_test --batch wperf --concurrency 64 --distribute 1:1 --from clients --to servers brw write size=1M
Client Read RPC/s: 18382.8571428571
Client Write RPC/s: 9192.92857142857
Client Read MiB/s: 1.40214285714286
Client Write MiB/s: 9191.25285714285
----------------------------------------------------------
Running test: lst add_test --batch wperf --concurrency 128 --distribute 1:1 --from clients --to servers brw write size=1M
Client Read RPC/s: 14966.3571428571
Client Write RPC/s: 7486.07142857143
Client Read MiB/s: 1.14285714285714
Client Write MiB/s: 7482.79071428571


&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;LNet configuration is:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# cat /etc/lnet.conf
net:
    - net type: o2ib1
      local NI(s):
        - nid: 10.2.0.40@o2ib1
          interfaces:
              0: ib0
          tunables:
              peer_timeout: 180
              peer_credits: 128
              peer_buffer_credits: 0
              credits: 1024
          lnd tunables:
              peercredits_hiw: 64
              map_on_demand: 256
              concurrent_sends: 256
              fmr_pool_size: 2048
              fmr_flush_trigger: 512
              fmr_cache: 1
              ntx: 2048
              conns_per_peer: 2
          CPT: &quot;[0,1]&quot;

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;OPA driver configuration is:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# cat /etc/modprobe.d/hfi1.conf
options hfi1 piothreshold=0 sge_copy_mode=2 wss_threshold=70

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>CentOS 7.3</environment>
        <key id="49697">LU-10373</key>
            <summary>LNet OPA Performance Drop</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="ashehata">Amir Shehata</assignee>
                                    <reporter username="iziemba">Ian Ziemba</reporter>
                        <labels>
                    </labels>
                <created>Tue, 12 Dec 2017 17:08:24 +0000</created>
                <updated>Wed, 17 Jan 2018 16:36:04 +0000</updated>
                            <resolved>Wed, 17 Jan 2018 16:35:42 +0000</resolved>
                                                    <fixVersion>Lustre 2.11.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>9</watches>
                                                                            <comments>
                            <comment id="216071" author="iziemba" created="Tue, 12 Dec 2017 17:19:28 +0000"  >&lt;p&gt;For Lustre 2.10.0 results, map_on_demand was set to 32.&lt;/p&gt;</comment>
                            <comment id="216770" author="pjones" created="Tue, 19 Dec 2017 19:22:16 +0000"  >&lt;p&gt;Amir&lt;/p&gt;

&lt;p&gt;Please can you advise&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="216862" author="iziemba" created="Wed, 20 Dec 2017 17:53:15 +0000"  >&lt;p&gt;It looks like with master, 256 RDMA fragments are used for a 1M OPA LNet transfer whereas Lustre 2.10 used a single RDMA fragment. Could this be a possible reason for the performance drop?&lt;/p&gt;</comment>
                            <comment id="216866" author="dougo" created="Wed, 20 Dec 2017 18:18:12 +0000"  >&lt;p&gt;That&apos;s strange.  I would have thought switching from 256 fragments to one would be better for performance.&lt;/p&gt;</comment>
                            <comment id="216870" author="iziemba" created="Wed, 20 Dec 2017 18:29:58 +0000"  >&lt;p&gt;Doug - That is what I seeing. A single RDMA fragment (Lustre 2.10) does perform much better than 256 RDMA fragments (Lustre master). Sorry if my prior comment did not make that clear.&lt;/p&gt;</comment>
                            <comment id="216877" author="iziemba" created="Wed, 20 Dec 2017 19:10:55 +0000"  >&lt;p&gt;Here is the the latest data I have with CentOS 7.4. Note that Lustre 2.10.2 does not experience the issues the performance issues that master does.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@client01 lst_performance]# uname -r
3.10.0-693.11.1.el7.x86_64
[root@client01 lst_performance]# lctl --version
lctl 2.10.2
[root@client01 lst_performance]# opaconfig -V
10.6.1.0.2

----------------------------------------------------------
Running test: lst add_test --batch rperf --concurrency 32 --distribute 1:1 --from clients --to servers brw read size=1m
Client Read RPC/s: 16600.1428571429
Client Write RPC/s: 8301.85714285714
Client Read MiB/s: 8299.86857142857
Client Write MiB/s: 1.26785714285714
----------------------------------------------------------
Running test: lst add_test --batch rperf --concurrency 64 --distribute 1:1 --from clients --to servers brw read size=1m
Client Read RPC/s: 16048.0714285714
Client Write RPC/s: 8025.28571428571
Client Read MiB/s: 8023.49428571428
Client Write MiB/s: 1.22428571428571
----------------------------------------------------------
Running test: lst add_test --batch rperf --concurrency 128 --distribute 1:1 --from clients --to servers brw read size=1m
Client Read RPC/s: 16942.7857142857
Client Write RPC/s: 8471.21428571429
Client Read MiB/s: 8471.78357142857
Client Write MiB/s: 1.29428571428571
----------------------------------------------------------
Running test: lst add_test --batch wperf --concurrency 32 --distribute 1:1 --from clients --to servers brw write size=1m
Client Read RPC/s: 21703.3571428571
Client Write RPC/s: 10852.9285714286
Client Read MiB/s: 1.65571428571429
Client Write MiB/s: 10851.7657142857
----------------------------------------------------------
Running test: lst add_test --batch wperf --concurrency 64 --distribute 1:1 --from clients --to servers brw write size=1m
Client Read RPC/s: 21922.0714285714
Client Write RPC/s: 10961.4285714286
Client Read MiB/s: 1.67214285714286
Client Write MiB/s: 10961.2514285714
----------------------------------------------------------
Running test: lst add_test --batch wperf --concurrency 128 --distribute 1:1 --from clients --to servers brw write size=1m
Client Read RPC/s: 21912.2857142857
Client Write RPC/s: 10953.8571428571
Client Read MiB/s: 1.67071428571429
Client Write MiB/s: 10956.0221428571



&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@client01 lst_performance]# uname -r
3.10.0-693.11.1.el7.x86_64
[root@client01 lst_performance]# lctl --version
lctl 2.10.56_39_gbe4507f
[root@client01 lst_performance]# opaconfig -V
10.6.1.0.2

----------------------------------------------------------
Running test: lst add_test --batch rperf --concurrency 32 --distribute 1:1 --from clients --to servers brw read size=1m
Client Read RPC/s: 14908.8571428571
Client Write RPC/s: 7456
Client Read MiB/s: 7453.895
Client Write MiB/s: 1.13928571428571
----------------------------------------------------------
Running test: lst add_test --batch rperf --concurrency 64 --distribute 1:1 --from clients --to servers brw read size=1m
Client Read RPC/s: 14782.8571428571
Client Write RPC/s: 7393.5
Client Read MiB/s: 7390.86071428571
Client Write MiB/s: 1.12928571428571
----------------------------------------------------------
Running test: lst add_test --batch rperf --concurrency 128 --distribute 1:1 --from clients --to servers brw read size=1m
Client Read RPC/s: 14793.1428571429
Client Write RPC/s: 7397.5
Client Read MiB/s: 7396.55285714286
Client Write MiB/s: 1.13
----------------------------------------------------------
Running test: lst add_test --batch wperf --concurrency 32 --distribute 1:1 --from clients --to servers brw write size=1m
Client Read RPC/s: 14475.2857142857
Client Write RPC/s: 7238.64285714286
Client Read MiB/s: 1.10642857142857
Client Write MiB/s: 7237.25142857143
----------------------------------------------------------
Running test: lst add_test --batch wperf --concurrency 64 --distribute 1:1 --from clients --to servers brw write size=1m
Client Read RPC/s: 18805
Client Write RPC/s: 9403.14285714286
Client Read MiB/s: 1.43428571428571
Client Write MiB/s: 9402.445
----------------------------------------------------------
Running test: lst add_test --batch wperf --concurrency 128 --distribute 1:1 --from clients --to servers brw write size=1m
Client Read RPC/s: 14235
Client Write RPC/s: 7115.71428571429
Client Read MiB/s: 1.08714285714286
Client Write MiB/s: 7116.90714285714



&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;In addition, I am seeing ECONNABORTED with Lustre master that I do not see with 2.10:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000800:00000100:0.0F:1513796499.976702:0:117:0:(o2iblnd_cb.c:1920:kiblnd_close_conn_locked()) Closing conn to 10.2.0.40@o2ib1: error 0(waiting)
00000400:00000100:11.0F:1513796499.977076:0:2289:0:(rpc.c:1418:srpc_lnet_ev_handler()) LNet event status -103 type 5, RPC errors 1
00000400:00000100:11.0:1513796499.977081:0:2289:0:(rpc.c:1418:srpc_lnet_ev_handler()) LNet event status -103 type 3, RPC errors 2
00000001:00020000:13.0F:1513796499.977088:0:2329:0:(brw_test.c:415:brw_bulk_ready()) BRW bulk WRITE failed for RPC from 12345-10.2.0.40@o2ib1: -103
00000400:00000100:11.0:1513796499.977114:0:2289:0:(rpc.c:1418:srpc_lnet_ev_handler()) LNet event status -103 type 5, RPC errors 3
00000400:00000100:11.0:1513796499.977116:0:2289:0:(rpc.c:1418:srpc_lnet_ev_handler()) LNet event status -103 type 3, RPC errors 4
00000001:00020000:1.0F:1513796499.977122:0:2325:0:(brw_test.c:415:brw_bulk_ready()) BRW bulk WRITE failed for RPC from 12345-10.2.0.40@o2ib1: -103
00000400:00000100:1.0:1513796499.977125:0:2325:0:(rpc.c:905:srpc_server_rpc_done()) Server RPC ffff881049cd9400 done: service brw_test, peer 12345-10.2.0.40@o2ib1, status SWI_STATE_BULK_STARTED:-5
00000001:00020000:1.0:1513796499.977128:0:2325:0:(brw_test.c:389:brw_server_rpc_done()) Bulk transfer from 12345-10.2.0.40@o2ib1 has failed: -5
00000400:00000100:19.0F:1513796499.977146:0:2289:0:(rpc.c:1418:srpc_lnet_ev_handler()) LNet event status -103 type 5, RPC errors 5
00000400:00000100:19.0:1513796499.977149:0:2289:0:(rpc.c:1418:srpc_lnet_ev_handler()) LNet event status -103 type 3, RPC errors 6
00000001:00020000:5.0F:1513796499.977155:0:2330:0:(brw_test.c:415:brw_bulk_ready()) BRW bulk WRITE failed for RPC from 12345-10.2.0.40@o2ib1: -103


&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="217609" author="ashehata" created="Fri, 5 Jan 2018 18:27:23 +0000"  >&lt;p&gt;Can you let me know how you determined it&apos;s using 256 fragments? Did you conclude that by looking at the map_on_demand value in the stats?&lt;/p&gt;

&lt;p&gt;This value indicates the maximum number of fragments being negotiated between the peers. However with OPA we should always be collapsing everything in one fragment.&lt;/p&gt;

&lt;p&gt;There is also a cray OPA bugzzilla open that indicates that there is a ~2GB/s performance drop between different IFS versions: Bug 142506.&lt;/p&gt;

&lt;p&gt;Is this the same issue?&lt;/p&gt;</comment>
                            <comment id="218035" author="iziemba" created="Thu, 11 Jan 2018 19:54:59 +0000"  >&lt;p&gt;Hi Amir,&lt;/p&gt;

&lt;p&gt;I was monitoring the number of work requests of the transmit message when kiblnd_init_rdma finished. I pulled the latest master version, and have verified that only 1 RDMA fragment is being used with OPA. It appears that the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10129&quot; title=&quot;map-on-demand set to 32 doesn&amp;#39;t work on OPA&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10129&quot;&gt;&lt;del&gt;LU-10129&lt;/del&gt;&lt;/a&gt; patch resolved the issue I was seeing. Just to verify, I built Lustre from the commit before the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10129&quot; title=&quot;map-on-demand set to 32 doesn&amp;#39;t work on OPA&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10129&quot;&gt;&lt;del&gt;LU-10129&lt;/del&gt;&lt;/a&gt; patch, and confirmed I was seeing 256 RDMA fragments with 1M messages.&lt;/p&gt;

&lt;p&gt;I think we can close this ticket.&lt;/p&gt;</comment>
                            <comment id="218051" author="ashehata" created="Thu, 11 Jan 2018 22:48:39 +0000"  >&lt;p&gt;Hi Ian,&lt;/p&gt;

&lt;p&gt;Thanks for verifying. And you are correct that prior to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10129&quot; title=&quot;map-on-demand set to 32 doesn&amp;#39;t work on OPA&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10129&quot;&gt;&lt;del&gt;LU-10129&lt;/del&gt;&lt;/a&gt;, 256 fragments would be used. &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10129&quot; title=&quot;map-on-demand set to 32 doesn&amp;#39;t work on OPA&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10129&quot;&gt;&lt;del&gt;LU-10129&lt;/del&gt;&lt;/a&gt; re-worked the map-on-demand code to behave more appropriately. But it&apos;s quiet interesting that using more fragments causes reduced performance.&lt;/p&gt;</comment>
                            <comment id="218429" author="jgmitter" created="Wed, 17 Jan 2018 16:35:42 +0000"  >&lt;p&gt;Issue fixed by patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10394&quot; title=&quot;IB_MR_TYPE_SG_GAPS mlx5 LNet performance drop&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10394&quot;&gt;&lt;del&gt;LU-10394&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="49757">LU-10394</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzp7z:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>