<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:12:57 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-14806] o2iblnd: IB HCA failover with o2ib bonding is broken </title>
                <link>https://jira.whamcloud.com/browse/LU-14806</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;It has been observed that &quot;ko2iblnd dev_failover=1&quot; option used on a node with o2ib bonding doesn&apos;t behave properly.&lt;/p&gt;</description>
                <environment></environment>
        <key id="64955">LU-14806</key>
            <summary>o2iblnd: IB HCA failover with o2ib bonding is broken </summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="ssmirnov">Serguei Smirnov</assignee>
                                    <reporter username="ssmirnov">Serguei Smirnov</reporter>
                        <labels>
                            <label>bonding</label>
                            <label>failover</label>
                            <label>lnet</label>
                            <label>o2iblnd</label>
                    </labels>
                <created>Fri, 2 Jul 2021 01:08:40 +0000</created>
                <updated>Fri, 17 Sep 2021 20:26:42 +0000</updated>
                            <resolved>Sat, 31 Jul 2021 13:25:40 +0000</resolved>
                                                    <fixVersion>Lustre 2.15.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="306234" author="ssmirnov" created="Mon, 5 Jul 2021 18:41:35 +0000"  >&lt;p&gt;Here are the steps to reproduce (2 ib-enabled nodes are required, one with two ib interfaces):&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;Configure IB bonding. Here&apos;s the example of &quot;ip a&quot; output:&lt;/li&gt;
&lt;/ul&gt;


&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
 3: ib0: &amp;lt;BROADCAST,MULTICAST,SLAVE,UP,LOWER_UP&amp;gt; mtu 2044 qdisc mq master bond0 state UP group &lt;span class=&quot;code-keyword&quot;&gt;default&lt;/span&gt; qlen 256
    link/infiniband a0:00:02:10:fe:80:00:00:00:00:00:00:00:02:c9:03:00:5a:63:2b brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
4: ib1: &amp;lt;BROADCAST,MULTICAST,SLAVE,UP,LOWER_UP&amp;gt; mtu 65520 qdisc mq master bond0 state UP group &lt;span class=&quot;code-keyword&quot;&gt;default&lt;/span&gt; qlen 256
    link/infiniband a0:00:02:20:fe:80:00:00:00:00:00:00:00:02:c9:03:00:5a:63:2c brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
5: bond0: &amp;lt;BROADCAST,MULTICAST,MASTER,UP,LOWER_UP&amp;gt; mtu 2044 qdisc noqueue state UP group &lt;span class=&quot;code-keyword&quot;&gt;default&lt;/span&gt; qlen 1000
    link/infiniband a0:00:02:10:fe:80:00:00:00:00:00:00:00:02:c9:03:00:5a:63:2b brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
    inet 10.1.0.9/24 brd 10.1.0.255 scope global noprefixroute bond0
       valid_lft forever preferred_lft forever
    inet6 fe80::202:c903:5a:632b/64 scope link 
       valid_lft forever preferred_lft forever&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;ul&gt;
	&lt;li&gt;Add &quot;option&#160;ko2iblnd dev_failover=1&quot; to lnet.conf&lt;/li&gt;
	&lt;li&gt;Reload LNet modules and configure o2ib net on the bonded interface:&lt;/li&gt;
&lt;/ul&gt;


&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
lnetctl lnet configure
lnetctl net add --net o2ib --&lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; bond0&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;ul&gt;
	&lt;li&gt;Discover the other node:&lt;/li&gt;
&lt;/ul&gt;


&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
lnetctl discover 10.1.0.10@o2ib  &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;ul&gt;
	&lt;li&gt;Make sure lnetctl ping is reliable:&lt;/li&gt;
&lt;/ul&gt;


&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
 lnetctl ping 10.1.0.10@o2ib                  (repeat multiple times)&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;ul&gt;
	&lt;li&gt;Find out which ib link is active:&lt;/li&gt;
&lt;/ul&gt;


&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
cat /proc/net/bonding/bond0

Ethernet Channel Bonding Driver: v3.7.1 (April 27, 2011)Bonding Mode: fault-tolerance (active-backup) (fail_over_mac active)
Primary Slave: ib0 (primary_reselect always)
Currently Active Slave: ib0
MII Status: up
MII Polling Interval (ms): 100
Up Delay (ms): 100
Down Delay (ms): 100
Peer Notification Delay (ms): 0Slave Interface: ib0
MII Status: up
Speed: 20000 Mbps
Duplex: full
Link Failure Count: 18
Permanent HW addr: a0:00:02:10:fe:80:00:00:00:00:00:00:00:02:c9:03:00:5a:63:2b
Slave queue ID: 0Slave Interface: ib1
MII Status: up
Speed: 20000 Mbps
Duplex: full
Link Failure Count: 7
Permanent HW addr: a0:00:02:20:fe:80:00:00:00:00:00:00:00:02:c9:03:00:5a:63:2c
Slave queue ID: 0&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;ul&gt;
	&lt;li&gt;Pull the cable corresponding to the active link&lt;/li&gt;
	&lt;li&gt;Try pinging the other node (this will work in a second or so once failover completes):&lt;/li&gt;
&lt;/ul&gt;


&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
 ping 10.1.0.10&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;ul&gt;
	&lt;li&gt;Try lnetctl pinging the other node like before (this won&apos;t succeed anymore)&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="306235" author="gerrit" created="Mon, 5 Jul 2021 18:43:16 +0000"  >&lt;p&gt;Serguei Smirnov (ssmirnov@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/44139&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/44139&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14806&quot; title=&quot;o2iblnd: IB HCA failover with o2ib bonding is broken &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14806&quot;&gt;&lt;del&gt;LU-14806&lt;/del&gt;&lt;/a&gt; o2iblnd: clear fatal error on successful failover&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: eb174ff461244ee6aabb68d39dce9e73ad7a085e&lt;/p&gt;</comment>
                            <comment id="308995" author="gerrit" created="Sat, 31 Jul 2021 06:39:14 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/44139/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/44139/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14806&quot; title=&quot;o2iblnd: IB HCA failover with o2ib bonding is broken &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14806&quot;&gt;&lt;del&gt;LU-14806&lt;/del&gt;&lt;/a&gt; o2iblnd: clear fatal error on successful failover&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 4668283cd13079dd6d86482704aef593f5c01dff&lt;/p&gt;</comment>
                            <comment id="309008" author="pjones" created="Sat, 31 Jul 2021 13:25:40 +0000"  >&lt;p&gt;Landed for 2.15&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="66122">LU-15018</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i01ybr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>