<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:01:01 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-6531] Fujitsu&apos;s o2iblnd Channel Bonding Solution</title>
                <link>https://jira.whamcloud.com/browse/LU-6531</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Work on Fujitsu&apos;s o2iblnd channel bonding solution.&lt;/p&gt;</description>
                <environment></environment>
        <key id="29737">LU-6531</key>
            <summary>Fujitsu&apos;s o2iblnd Channel Bonding Solution</summary>
                <type id="2" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11311&amp;avatarType=issuetype">New Feature</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="10200">Won&apos;t Do</resolution>
                                        <assignee username="ashehata">Amir Shehata</assignee>
                                    <reporter username="ashehata">Amir Shehata</reporter>
                        <labels>
                    </labels>
                <created>Mon, 27 Apr 2015 22:39:54 +0000</created>
                <updated>Sat, 16 Sep 2017 07:54:00 +0000</updated>
                            <resolved>Sat, 16 Sep 2017 07:54:00 +0000</resolved>
                                                                        <due>Tue, 30 Jun 2015 00:00:00 +0000</due>
                            <votes>0</votes>
                                    <watches>20</watches>
                                                                            <comments>
                            <comment id="113563" author="ashehata" created="Mon, 27 Apr 2015 22:47:40 +0000"  >&lt;p&gt;Duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6495&quot; title=&quot;push Fujitsu IB multi-rail patch to Gerrit&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6495&quot;&gt;&lt;del&gt;LU-6495&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="113567" author="gerrit" created="Tue, 28 Apr 2015 00:52:10 +0000"  >&lt;p&gt;Amir Shehata (amir.shehata@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/14625&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/14625&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6531&quot; title=&quot;Fujitsu&amp;#39;s o2iblnd Channel Bonding Solution&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6531&quot;&gt;&lt;del&gt;LU-6531&lt;/del&gt;&lt;/a&gt; lnet: Fujitsu&apos;s Channel Bonding Solution&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 2528a0d791fe8c6f2b046905725d12ec56c9bf6a&lt;/p&gt;</comment>
                            <comment id="116188" author="nozaki" created="Fri, 22 May 2015 08:40:24 +0000"  >&lt;p&gt;Sorry I made a mistake. Please forget about the above patch.&lt;/p&gt;</comment>
                            <comment id="117686" author="gerrit" created="Sat, 6 Jun 2015 23:24:12 +0000"  >&lt;p&gt;Amir Shehata (amir.shehata@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/15170&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/15170&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6531&quot; title=&quot;Fujitsu&amp;#39;s o2iblnd Channel Bonding Solution&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6531&quot;&gt;&lt;del&gt;LU-6531&lt;/del&gt;&lt;/a&gt; lnet: DLC interface for o2iblnd Channel Bonding&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: db91f5a2fb066a316e383807ad8fec1633237a55&lt;/p&gt;</comment>
                            <comment id="118349" author="fsaunier" created="Fri, 12 Jun 2015 12:27:31 +0000"  >&lt;p&gt;I&apos;ve experimented the lnet channel bonding solution patch using lnet-selftest with following configuration:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;4 clients having a single IB interface&lt;/li&gt;
	&lt;li&gt;1 server having two IB interfaces&lt;/li&gt;
	&lt;li&gt;all IB interfaces are connected to same switch&lt;br/&gt;
Here are the test results:
&lt;blockquote&gt;&lt;p&gt;size=1M duration=10 check= concurrency=16&lt;br/&gt;
Lnet data bandwidth of all the servers (MB/s)&lt;/p&gt;
&lt;div class=&apos;table-wrap&apos;&gt;
&lt;table class=&apos;confluenceTable&apos;&gt;&lt;tbody&gt;
&lt;tr&gt;
&lt;th class=&apos;confluenceTh&apos;&gt;#clients&lt;/th&gt;
&lt;th class=&apos;confluenceTh&apos;&gt;write&lt;/th&gt;
&lt;th class=&apos;confluenceTh&apos;&gt;read&lt;/th&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;1&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;5610&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;5816&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;2&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;10773&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;10155&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;3&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;12069&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;6358&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;4&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;12044&lt;/td&gt;
&lt;td class=&apos;confluenceTd&apos;&gt;6332&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/blockquote&gt;
&lt;p&gt;Write figures are good with respect with hardware capabilities, but I&apos;m puzzled by figures for read.&lt;/p&gt;&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="119029" author="ashehata" created="Thu, 18 Jun 2015 20:35:26 +0000"  >&lt;p&gt;hi Frederic,&lt;/p&gt;

&lt;p&gt;Is it possible to share your full configuration?&lt;/p&gt;

&lt;p&gt;thanks&lt;br/&gt;
amir&lt;/p&gt;</comment>
                            <comment id="119392" author="fsaunier" created="Tue, 23 Jun 2015 14:34:27 +0000"  >&lt;p&gt;On server&apos;s lnet.conf is:&lt;br/&gt;
  options lnet networks=o2ib0(ib0,ib1)&lt;br/&gt;
And each client has the following lnet.conf&lt;br/&gt;
  options lnet networks=o2ib0(ib0)&lt;br/&gt;
All nodes were using the same lustre_o2ibs_config input file:&lt;br/&gt;
  10.1.0.41@o2ib0 10.1.0.41 10.1.0.101&lt;br/&gt;
  10.1.0.31@o2ib0 10.1.0.31&lt;br/&gt;
  10.1.0.32@o2ib0 10.1.0.32&lt;br/&gt;
  10.1.0.35@o2ib0 10.1.0.35&lt;br/&gt;
I also tried changing ko2iblnd parameters on all nodes:&lt;br/&gt;
  options ko2iblnd credits=2048 peer_credits=126 concurrent_sends=63 peer_buffer_credits=128&lt;/p&gt;</comment>
                            <comment id="119526" author="ashehata" created="Wed, 24 Jun 2015 20:21:04 +0000"  >&lt;p&gt;This is a high-level design document I wrote based on the Fujitsu Channel Bonding solution.  It also describes the new DLC interface I added to configure the Channel bonding solution.&lt;/p&gt;</comment>
                            <comment id="119594" author="olaf" created="Thu, 25 Jun 2015 12:58:57 +0000"  >&lt;p&gt;The design document is very useful, thanks.&lt;/p&gt;

&lt;p&gt;I do have one concern: the code looks through lists of routes while holding a spinlock and with interrupts disabled on the CPU (spin_lock_irqsave() and friends). This will definitely be a problem if these lists become large, because a system becomes unstable if one or more of the CPU cores runs for a long time with interrupts disabled.&lt;/p&gt;

&lt;p&gt;Trying to figure how large these lists can become, if we have a cluster with N clients, M MDS, O OSS, and ignoring routers, assuming just one interface for each system I get something like this:&lt;/p&gt;

&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;on a client: M + O&lt;/li&gt;
	&lt;li&gt;on an MDS: N + M + O&lt;/li&gt;
	&lt;li&gt;on an OSS: N + M&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;This shouldn&apos;t be much of a problem in a small cluster, but in a large cluster it would be the MDS and OSS in particular that have large lists. So my concern is that there is a scaling problem that will render MDS and OSS unstable in large clusters, but will be invisible in the small clusters typically used for testing.&lt;/p&gt;</comment>
                            <comment id="119618" author="morrone" created="Thu, 25 Jun 2015 18:23:57 +0000"  >&lt;p&gt;To take a step back for a moment, I think we need to have a good answer to the following question:&lt;/p&gt;

&lt;p&gt;Why is implementing channel bonding at the LND level the right thing to do rather than implementing channel bonding at the LNet level?&lt;/p&gt;

&lt;p&gt;It is not clear to me that the current configuration approach when done at the LND level is very robust or system administration friendly, and ways to fix that don&apos;t seem terribly easy to do since this implementation is all hacked into a single LND component.  I think that I can envision a system at the LNet level that would be much easier for system administrators to work with (because NIDs are already shared between nodes).  I am also concerned about how credits at the LNet layer are going to interact with multiple invisible peer connections in the LND layer.&lt;/p&gt;</comment>
                            <comment id="119983" author="doug" created="Tue, 30 Jun 2015 22:41:17 +0000"  >&lt;p&gt;Hi Chris,&lt;/p&gt;

&lt;p&gt;I agree with the validity of your question/concerns.  I would add one other potential issue with this solution: it does not allow the bonded HCAs to be on different networks.  That means it cannot protect you from a switch failure.  An LNet layer solution could be developed to allow HCAs on different networks to be bonded.&lt;/p&gt;

&lt;p&gt;We we first started looking at channel bonding, the solution was based in the LNet layer.  That was put on hold when the Fujitsu solution came to light as something already running in production and being offered to the community tree.  The philosophy: we are better off with a working known than putting in the effort re-doing a new solution (an unknown).  Also, having two solutions would muddy the Lustre waters so with the Fujitsu solution coming to the community tree, we backed off our own approach.&lt;/p&gt;

&lt;p&gt;Now, seeing the patch comments and your concerns, I feel we should re-review which solution is favoured for the community tree.&lt;/p&gt;

&lt;p&gt;For now, Intel is going to back off pushing this patch (and the DLC adaptation patch) given 1- it won&apos;t make 2.8 feature freeze, and 2- a need to reconsider what is the proper solution.  The patches are left in Gerrit for community consideration and guidance as to one way to approach Channel Bonding.&lt;/p&gt;</comment>
                            <comment id="208564" author="adilger" created="Sat, 16 Sep 2017 07:52:58 +0000"  >&lt;p&gt;Closing this ticket, as LNet multi-rail support landed in 2.10.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="45981">LU-9480</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="29582">LU-6480</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="18324" name="Fujitsu_Channel_Bonding.pdf" size="242930" author="ashehata" created="Wed, 24 Jun 2015 20:21:04 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzxbw7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>