<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:46:58 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-11791] recovery-mds-scale test failover_ost fails with &apos;test_failover_ost returned 1&apos;</title>
                <link>https://jira.whamcloud.com/browse/LU-11791</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;recovery-mds-scale test_failover_ost fails with &apos;test_failover_ost returned 1&apos;&lt;/p&gt;

&lt;p&gt;Looking at the client test_log from &lt;a href=&quot;https://testing.whamcloud.com/test_sets/e36f9e0c-fea5-11e8-b837-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/e36f9e0c-fea5-11e8-b837-52540065bddc&lt;/a&gt; , we see tht there were several successful OST failovers with one failure&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Found the END_RUN_FILE file: /autotest/trevis/2018-12-10/lustre-master-el7_6-x86_64--failover--1_32_1__3837___6af7940a-41a2-4a12-b890-ae54e8237ab3/shared_dir/end_run_file
trevis-25vm8.trevis.whamcloud.com
Client load  failed on node trevis-25vm8.trevis.whamcloud.com:
/autotest/trevis/2018-12-10/lustre-master-el7_6-x86_64--failover--1_32_1__3837___6af7940a-41a2-4a12-b890-ae54e8237ab3/recovery-mds-scale.test_failover_ost.run__stdout.trevis-25vm8.trevis.whamcloud.com.log
/autotest/trevis/2018-12-10/lustre-master-el7_6-x86_64--failover--1_32_1__3837___6af7940a-41a2-4a12-b890-ae54e8237ab3/recovery-mds-scale.test_failover_ost.run__debug.trevis-25vm8.trevis.whamcloud.com.log
2018-12-11 23:22:47 Terminating clients loads ...
Duration:               86400
Server failover period: 1200 seconds
Exited after:           21768 seconds
Number of failovers before exit:
mds1: 0 times
ost1: 3 times
ost2: 1 times
ost3: 6 times
ost4: 1 times
ost5: 6 times
ost6: 0 times
ost7: 2 times
Status: FAIL: rc=1
CMD: trevis-25vm7,trevis-25vm8 test -f /tmp/client-load.pid &amp;amp;&amp;amp;
        { kill -s TERM \$(cat /tmp/client-load.pid); rm -f /tmp/client-load.pid; }
trevis-25vm8: sh: line 1: kill: (11606) - No such process
trevis-25vm7: sh: line 1: kill: (18301) - No such process
Dumping lctl log to /autotest/trevis/2018-12-10/lustre-master-el7_6-x86_64--failover--1_32_1__3837___6af7940a-41a2-4a12-b890-ae54e8237ab3/recovery-mds-scale.test_failover_ost.*.1544570568.log
CMD: trevis-25vm10,trevis-25vm11,trevis-25vm12,trevis-25vm8.trevis.whamcloud.com,trevis-25vm9 /usr/sbin/lctl dk &amp;gt; /autotest/trevis/2018-12-10/lustre-master-el7_6-x86_64--failover--1_32_1__3837___6af7940a-41a2-4a12-b890-ae54e8237ab3/recovery-mds-scale.test_failover_ost.debug_log.\$(hostname -s).1544570568.log;
         dmesg &amp;gt; /autotest/trevis/2018-12-10/lustre-master-el7_6-x86_64--failover--1_32_1__3837___6af7940a-41a2-4a12-b890-ae54e8237ab3/recovery-mds-scale.test_failover_ost.dmesg.\$(hostname -s).1544570568.log
trevis-25vm9: invalid parameter &apos;dump_kernel&apos;
trevis-25vm9: open(dump_kernel) failed: No such file or directory
trevis-25vm12: invalid parameter &apos;dump_kernel&apos;
trevis-25vm12: open(dump_kernel) failed: No such file or directory
test_failover_ost returned 1
FAIL failover_ost (22821s)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Looking at the logs from Client 3 (vm8), we can see some issues with tar. From the run_tar_debug log, we see a client load return a positive return code&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2018-12-11 22:58:05: tar run starting
+ mkdir -p /mnt/lustre/d0.tar-trevis-25vm8.trevis.whamcloud.com
+ cd /mnt/lustre/d0.tar-trevis-25vm8.trevis.whamcloud.com
+ sync
++ du -s /etc
++ awk &apos;{print $1}&apos;
+ USAGE=34864
+ /usr/sbin/lctl set_param &apos;llite.*.lazystatfs=0&apos;
+ df /mnt/lustre/d0.tar-trevis-25vm8.trevis.whamcloud.com
+ sleep 2
++ df /mnt/lustre/d0.tar-trevis-25vm8.trevis.whamcloud.com
++ awk &apos;/:/ { print $4 }&apos;
+ FREE_SPACE=9359360
+ AVAIL=4211712
+ &apos;[&apos; 4211712 -lt 34864 &apos;]&apos;
+ do_tar
+ tar cf - /etc
+ tar xf -
tar: Removing leading `/&apos; from member names
+ return 2
+ RC=2
++ grep &apos;exit delayed from previous errors&apos; /autotest/trevis/2018-12-10/lustre-master-el7_6-x86_64--failover--1_32_1__3837___6af7940a-41a2-4a12-b890-ae54e8237ab3/recovery-mds-scale.test_failover_ost.run_tar_stdout.trevis-25vm8.log
+ PREV_ERRORS=
+ true
+ &apos;[&apos; 2 -ne 0 -a &apos;&apos; -a &apos;&apos; &apos;]&apos;
+ &apos;[&apos; 2 -eq 0 &apos;]&apos;
++ date &apos;+%F %H:%M:%S&apos;
+ echoerr &apos;2018-12-11 23:17:05: tar failed&apos;
+ echo &apos;2018-12-11 23:17:05: tar failed&apos;
2018-12-11 23:17:05: tar failed
+ &apos;[&apos; -z &apos;&apos; &apos;]&apos;
++ hostname
+ echo trevis-25vm8.trevis.whamcloud.com
+ &apos;[&apos; &apos;]&apos;
+ &apos;[&apos; &apos;!&apos; -e /autotest/trevis/2018-12-10/lustre-master-el7_6-x86_64--failover--1_32_1__3837___6af7940a-41a2-4a12-b890-ae54e8237ab3/shared_dir/end_run_file &apos;]&apos;
++ date &apos;+%F %H:%M:%S&apos;
+ echoerr &apos;2018-12-11 23:17:05: tar run exiting&apos;
+ echo &apos;2018-12-11 23:17:05: tar run exiting&apos;
2018-12-11 23:17:05: tar run exiting
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;p&gt;From the run_tar_stdout log, we see some write errors&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;tar: etc/mke2fs.conf: Cannot write: Input/output error
tar: Exiting with failure status due to previous errors
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="54315">LU-11791</key>
            <summary>recovery-mds-scale test failover_ost fails with &apos;test_failover_ost returned 1&apos;</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="Deiter">Alex Deiter</assignee>
                                    <reporter username="jamesanunez">James Nunez</reporter>
                        <labels>
                            <label>failover</label>
                    </labels>
                <created>Mon, 17 Dec 2018 02:53:10 +0000</created>
                <updated>Tue, 30 Jan 2024 19:53:34 +0000</updated>
                                            <version>Lustre 2.12.0</version>
                    <version>Lustre 2.10.7</version>
                    <version>Lustre 2.12.3</version>
                    <version>Lustre 2.14.0</version>
                    <version>Lustre 2.12.4</version>
                    <version>Lustre 2.15.3</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="243885" author="jamesanunez" created="Wed, 13 Mar 2019 23:12:10 +0000"  >&lt;p&gt;I see a similar failure for 2.10.7 RC1 failover testing with logs at &lt;a href=&quot;https://testing.whamcloud.com/test_sets/fefc3968-43fc-11e9-9720-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/fefc3968-43fc-11e9-9720-52540065bddc&lt;/a&gt; . &lt;/p&gt;

&lt;p&gt;The errors in the client (vm4) run_tar_stdout are&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;tar: etc/yum.repos.d/lustre-e2fsprogs.repo: Cannot close: Input/output error
tar: etc/rsyncd.conf: Cannot write: No such file or directory
tar: Exiting with failure status due to previous errors
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="262581" author="jamesanunez" created="Tue, 4 Feb 2020 23:02:44 +0000"  >&lt;p&gt;We see similar failures with 2.12.4 RHEL8 client failover testing at &lt;a href=&quot;https://testing.whamcloud.com/test_sets/673f11d2-4378-11ea-bffa-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/673f11d2-4378-11ea-bffa-52540065bddc&lt;/a&gt; with the following in run_tar_stdout&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;tar: etc/lvm/profile/cache-smq.profile: Cannot utime: Input/output error
tar: etc/lvm/profile/cache-smq.profile: Cannot close: Input/output error
tar: Exiting with failure status due to previous errors
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="55494">LU-12224</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="57149">LU-12858</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="25061">LU-5158</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i0085j:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>