<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:34:52 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-17367] Failover on master: Invalid NID string</title>
                <link>https://jira.whamcloud.com/browse/LU-17367</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;All failover sessions on master have been failing Lustre init since build 4455 with Invalid NID string:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2023-12-14T08:22:36 CMD: onyx-96vm2 mkfs.lustre --mgsnode=onyx-82vm11:onyx-82vm13 --fsname=lustre --ost --index=0 --param=sys.timeout=20 --backfstype=zfs --device-size=8388608 --reformat lustre-ost1/ost1 /dev/vg_Role_OSS/ost1
2023-12-14T08:22:36 onyx-96vm2: mkfs.lustre: Invalid NID string &apos;onyx-82vm11:onyx-82vm13&apos;
2023-12-14T08:22:36 onyx-96vm2: mkfs.lustre: Can&apos;t parse NID &apos;onyx-82vm11:onyx-82vm13&apos;
2023-12-14T08:22:36 onyx-96vm2: mkfs.lustre: exiting with 1 (Operation not permitted)
2023-12-14T08:22:36 pdsh@onyx-82vm4: onyx-96vm2: ssh exited with exit code 1 &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;Latest master build:&#160;&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.whamcloud.com/test_sessions/related?jobs=lustre-master&amp;amp;builds=4486#redirect&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/related?jobs=lustre-master&amp;amp;builds=4486#redirect&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;4455 revision: 2a498f06ccc975fb57214961db6e20a6c1cc2ec7&lt;/p&gt;

&lt;p&gt;4454 revision: aa8df6a4a3f50dc86554764f6ccb72db027633f8&lt;/p&gt;</description>
                <environment></environment>
        <key id="79594">LU-17367</key>
            <summary>Failover on master: Invalid NID string</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="simmonsja">James A Simmons</assignee>
                                    <reporter username="colmstea">Charlie Olmstead</reporter>
                        <labels>
                    </labels>
                <created>Thu, 14 Dec 2023 21:18:11 +0000</created>
                <updated>Thu, 18 Jan 2024 10:51:37 +0000</updated>
                                            <version>Lustre 2.16.0</version>
                                    <fixVersion>Lustre 2.16.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="396934" author="adilger" created="Fri, 15 Dec 2023 00:10:45 +0000"  >&lt;p&gt;Charlie, if the NIDs are given as &quot;&lt;tt&gt;HOSTNAME@tcp0&lt;/tt&gt;&quot; does this work again?&lt;/p&gt;

&lt;p&gt;James, traditionally the &quot;&lt;tt&gt;@tcp0&lt;/tt&gt;&quot; has been assumed as part of the NID if no nettype is provided. &lt;/p&gt;</comment>
                            <comment id="396940" author="simmonsja" created="Fri, 15 Dec 2023 01:12:23 +0000"  >&lt;p&gt;Can you point to an exact test that always fails with this. I was looking at the logs and their are many unrelated failures. Normally if you call libcfs_nidstr() and the string lacks @nettype it should fill it in.&lt;/p&gt;

&lt;p&gt;Oh I see. Its the how do you tell &apos;:&apos; as a delimiter from &apos;:&apos; being used in IPv6 addresses problem. The use of &apos;:&apos;&#160; as a delimiter causes so many headaches.&lt;/p&gt;</comment>
                            <comment id="396955" author="adilger" created="Fri, 15 Dec 2023 04:50:38 +0000"  >&lt;p&gt;Could we exclude IPv6 NIDs for names that contain chars other that hex digits?  Not perfect, but would handle most cases...&lt;/p&gt;</comment>
                            <comment id="396986" author="colmstea" created="Fri, 15 Dec 2023 14:17:17 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=simmonsja&quot; class=&quot;user-hover&quot; rel=&quot;simmonsja&quot;&gt;simmonsja&lt;/a&gt; - &quot;Can you point to an exact test that always fails with this.&quot;&lt;/p&gt;

&lt;p&gt;Essentially every failover-part-x or failover-zfs-part-x session triggered by lustre-master going back to 8/24/2023 will have the invalid NID error.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.whamcloud.com/search?horizon=15552000&amp;amp;jobs%5B%5D=lustre-master&amp;amp;test_groups%5B%5D=failover-part-1&amp;amp;test_groups%5B%5D=failover-part-2&amp;amp;test_groups%5B%5D=failover-part-3&amp;amp;test_groups%5B%5D=failover-zfs-part-1&amp;amp;test_groups%5B%5D=failover-zfs-part-2&amp;amp;test_groups%5B%5D=failover-zfs-part-3&amp;amp;test_set_script_id=5e9346a2-09e0-11e9-a2cc-52540065bddc&amp;amp;source=test_sets#redirect&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/search?horizon=15552000&amp;amp;jobs%5B%5D=lustre-master&amp;amp;test_groups%5B%5D=failover-part-1&amp;amp;test_groups%5B%5D=failover-part-2&amp;amp;test_groups%5B%5D=failover-part-3&amp;amp;test_groups%5B%5D=failover-zfs-part-1&amp;amp;test_groups%5B%5D=failover-zfs-part-2&amp;amp;test_groups%5B%5D=failover-zfs-part-3&amp;amp;test_set_script_id=5e9346a2-09e0-11e9-a2cc-52540065bddc&amp;amp;source=test_sets#redirect&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="400098" author="yujian" created="Wed, 17 Jan 2024 20:11:32 +0000"  >&lt;p&gt;The regression failure was introduced by the following commit on master branch:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;commit 101f6e84889a9b48238ca320557101058d935fb0
Author:     James Simmons &amp;lt;jsimmons@infradead.org&amp;gt;
AuthorDate: Thu Aug 3 16:57:02 2023 -0400
Commit:     Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;
CommitDate: Thu Aug 24 04:32:05 2023 +0000
    
    LU-10391 obdclass: handle large NIDs for mount strings
    
    Mount strings support using &apos;:&apos; as a delimiter but this is also
    a part of the some NID strings like IPv6, so rework class_parse_value()
    to only look at &apos;:&apos; when it occurs after &apos;@&apos;.
    
    The mount utilities use the function convert_hostnames() to ensure
    the mount string containing an NID is valid. This only works for
    small size nids so migrate the function to handle large NIDs. This
    should allow mounting with IPv6 or other large NID addresses.
    
    In testing the userland  libcfs_ip_str2addr_size() had bugs that
    rendered incorrect NID strings. Fix those issues.
    
    Fixes: b6c702df5d4 (&quot;LU-10391 libcfs: add large-nid string conversion functions.&quot;)
    Change-Id: Ic9b2a368456ba75ceb5911ac7f75ae00d6123870
    Signed-off-by: James Simmons &amp;lt;jsimmons@infradead.org&amp;gt;
    Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50362
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;The failover test sessions on master branch and the patches with failover test parameters are blocked.&lt;br/&gt;
&#160;&lt;/p&gt;</comment>
                            <comment id="400101" author="simmonsja" created="Wed, 17 Jan 2024 21:04:25 +0000"  >&lt;p&gt;The issue is that the delimiter &apos;:&apos; is used and its apart of the IPv6 address spec.&#160; Currently their is no way to tell the difference between the two. The way NFS handled this was to introduce&#160; &quot;[]&quot; around the addresses. Perhaps we should implement this approach. Sadly I don&apos;t see people doing &lt;span class=&quot;error&quot;&gt;&amp;#91;myhost1&amp;#93;&lt;/span&gt;: &lt;span class=&quot;error&quot;&gt;&amp;#91;myhost2&amp;#93;&lt;/span&gt;which is a problem. Mixing addresses with hostnames can happen which make this totally blow up. The problem is that people don&apos;t want to change their way with adding &quot;[] around myhost for example. Suggestions.&lt;/p&gt;</comment>
                            <comment id="400178" author="adilger" created="Thu, 18 Jan 2024 10:51:24 +0000"  >&lt;p&gt;As I mentioned earlier, checking for non-hex characters allows distinguishing between IPv6 and hostnames in most cases.  Not perfect, but an improvement.  Similarly, &quot;.&quot; is not used in IPv6 addresses, so we could detect IPv4 addresses similarly.  &lt;/p&gt;

&lt;p&gt;If there is something that needs &quot;[]&quot; around it, then it should be the IPv6 NID itself, since that is the &quot;new&quot; case that nobody is using, while &quot;hostname:hostname&quot; or &quot;NID:NID&quot; is the existing case that shouldn&apos;t break.  My main objection against &quot;[]&quot; in your previous patch was that it was putting &quot;[]&quot; around &lt;b&gt;all&lt;/b&gt; of the NIDs like &quot;&lt;tt&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;nid1:nid2:nid3:nid4&amp;#93;&lt;/span&gt;&lt;/tt&gt;&quot; (where it doesn&apos;t actually help parsing the separate NIDs) instead of around each individual NID like &quot;&lt;tt&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;nid1&amp;#93;&lt;/span&gt;:&lt;span class=&quot;error&quot;&gt;&amp;#91;nid2&amp;#93;&lt;/span&gt;:&lt;span class=&quot;error&quot;&gt;&amp;#91;nid3&amp;#93;&lt;/span&gt;:&lt;span class=&quot;error&quot;&gt;&amp;#91;nid4&amp;#93;&lt;/span&gt;&lt;/tt&gt;&quot; where it would make sense.  It should be optional for IPv4 NIDs and hostnames, but possibly required around IPv6 NIDs if they do not also have an &quot;@tcp&quot; to separate them.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i0451z:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>