<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:10:46 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-7653] replay-single/test_110f test failed Lustre: DEBUG MARKER: replay-single test_110f: FAIL: 1 != 2 after recovery</title>
                <link>https://jira.whamcloud.com/browse/LU-7653</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;== replay-single test 110f: DNE: create striped dir, fail MDT1/MDT2 == 04:57:43 (1452229063)
Filesystem          1K-blocks  Used Available Use% Mounted on
fre1225@tcp:/lustre   1377952 68464   1236816   6% /mnt/lustre
Filesystem          1K-blocks  Used Available Use% Mounted on
fre1225@tcp:/lustre   1377952 68464   1236816   6% /mnt/lustre
Failing mds1 on fre1225
Stopping /mnt/mds1 (opts:) on fre1225
pdsh@fre1227: fre1225: ssh exited with exit code 1
Failing mds2 on fre1225
Stopping /mnt/mds2 (opts:) on fre1225
pdsh@fre1227: fre1225: ssh exited with exit code 1
reboot facets: mds1
Failover mds1 to fre1225
04:57:56 (1452229076) waiting for fre1225 network 900 secs ...
04:57:56 (1452229076) network interface is UP
mount facets: mds1
Starting mds1: -o rw,user_xattr  /dev/vdb /mnt/mds1
pdsh@fre1227: fre1225: ssh exited with exit code 1
pdsh@fre1227: fre1225: ssh exited with exit code 1
Started lustre-MDT0000
reboot facets: mds2
Failover mds2 to fre1225
04:58:07 (1452229087) waiting for fre1225 network 900 secs ...
04:58:07 (1452229087) network interface is UP
mount facets: mds2
Starting mds2: -o rw,user_xattr  /dev/vdc /mnt/mds2
pdsh@fre1227: fre1225: ssh exited with exit code 1
pdsh@fre1227: fre1225: ssh exited with exit code 1
Started lustre-MDT0001
fre1228: mdc.lustre-MDT0000-mdc-*.mds_server_uuid in FULL state after 5 sec
fre1227: mdc.lustre-MDT0000-mdc-*.mds_server_uuid in FULL state after 5 sec
fre1228: mdc.lustre-MDT0001-mdc-*.mds_server_uuid in FULL state after 0 sec
fre1227: mdc.lustre-MDT0001-mdc-*.mds_server_uuid in FULL state after 0 sec
/mnt/lustre/d110f.replay-single/striped_dir has type dir OK
 replay-single test_110f: @@@@@@ FAIL: 1 != 2 after recovery 
  Trace dump:
  = /usr/lib64/lustre/tests/test-framework.sh:4767:error_noexit()
  = /usr/lib64/lustre/tests/test-framework.sh:4798:error()
  = /usr/lib64/lustre/tests/replay-single.sh:3600:check_striped_dir_110()
  = /usr/lib64/lustre/tests/replay-single.sh:3725:test_110f()
  = /usr/lib64/lustre/tests/test-framework.sh:5045:run_one()
  = /usr/lib64/lustre/tests/test-framework.sh:5082:run_one_logged()
  = /usr/lib64/lustre/tests/test-framework.sh:4899:run_test()
  = /usr/lib64/lustre/tests/replay-single.sh:3731:main()
Dumping lctl log to /tmp/test_logs/1452229057/replay-single.test_110f.*.1452229095.log
fre1228: Warning: Permanently added &apos;fre1227,192.168.112.27&apos; (RSA) to the list of known hosts.

fre1225: Warning: Permanently added &apos;fre1227,192.168.112.27&apos; (RSA) to the list of known hosts.

fre1226: Warning: Permanently added &apos;fre1227,192.168.112.27&apos; (RSA) to the list of known hosts.

FAIL 110f (33s)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>Configuration : 4 Node - ( 1 MDS/1 OSS/2 Clients)_dne_singlemds&lt;br/&gt;
Release&lt;br/&gt;
2.6.32_431.29.2.el6_lustremaster_9267_2_g959f8f7 Build Date: Sat 02 Jan 2016 05:21:40 PM UTC&lt;br/&gt;
Server 2.7.64&lt;br/&gt;
Client 2.7.64</environment>
        <key id="34061">LU-7653</key>
            <summary>replay-single/test_110f test failed Lustre: DEBUG MARKER: replay-single test_110f: FAIL: 1 != 2 after recovery</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="aboyko">Alexander Boyko</assignee>
                                    <reporter username="parinay">parinay v kondekar</reporter>
                        <labels>
                    </labels>
                <created>Tue, 12 Jan 2016 04:34:16 +0000</created>
                <updated>Tue, 14 Apr 2020 14:28:13 +0000</updated>
                            <resolved>Tue, 14 Apr 2020 14:28:13 +0000</resolved>
                                    <version>Lustre 2.8.0</version>
                                    <fixVersion>Lustre 2.14.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="138694" author="jamesanunez" created="Tue, 12 Jan 2016 18:17:48 +0000"  >&lt;p&gt;Parinay - We don&apos;t see this test fail in our testing. How often do you see this failure and does it fail consistently for you?&lt;/p&gt;</comment>
                            <comment id="138768" author="parinay" created="Wed, 13 Jan 2016 03:15:56 +0000"  >&lt;p&gt;James,&lt;br/&gt;
Its very consistent. I am attaching PTLDEBUG=-1 logs here.&lt;/p&gt;

&lt;p&gt;Thanks.&lt;/p&gt;</comment>
                            <comment id="156671" author="ys" created="Thu, 23 Jun 2016 15:06:06 +0000"  >&lt;p&gt;This issue is very easy to reproduce when mds1 &amp;amp; mds2 setup on same node. Looks like it can be fixed just changing failover order. &lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
YangSheng&lt;/p&gt;</comment>
                            <comment id="201093" author="red" created="Thu, 6 Jul 2017 02:17:02 +0000"  >&lt;p&gt;We reproduce this issue, and as Mr Yang said, this issue can be fixed just changing the script as follows:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;test_110f() {
         ...
        mkdir -p $DIR/$tdir
        replay_barrier mds1 
        replay_barrier mds2 
        $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir
       * fail mds2,mds1*

        check_striped_dir_110 || error &lt;span class=&quot;code-quote&quot;&gt;&quot;check striped_dir failed&quot;&lt;/span&gt;

        rm -rf $DIR/$tdir || error &lt;span class=&quot;code-quote&quot;&gt;&quot;rmdir failed&quot;&lt;/span&gt;

        &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; 0
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;And Our Lustre version is 2.9. Can some one help about this issue ? &lt;/p&gt;</comment>
                            <comment id="201094" author="gerrit" created="Thu, 6 Jul 2017 03:20:05 +0000"  >&lt;p&gt;Parinay Kondekar (parinay.kondekar@seagate.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/27940&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/27940&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7653&quot; title=&quot;replay-single/test_110f test failed Lustre: DEBUG MARKER: replay-single test_110f: FAIL: 1 != 2 after recovery&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7653&quot;&gt;&lt;del&gt;LU-7653&lt;/del&gt;&lt;/a&gt; tests: replay-single/110f fails for mdts on same MDS&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 10e2e6ab2a1c449ccd2cd5cacac1efcbefed29fb&lt;/p&gt;</comment>
                            <comment id="201146" author="red" created="Thu, 6 Jul 2017 12:21:51 +0000"  >&lt;p&gt;But, why this issue can be fixed just change the fail order of mds1 and mds2 ? Can someone explain this ?&lt;/p&gt;</comment>
                            <comment id="201309" author="ys" created="Fri, 7 Jul 2017 06:30:33 +0000"  >&lt;p&gt;Hi, ZhangWei,&lt;/p&gt;

&lt;p&gt;As my understanding, getdirstripe will try to get status from mds1 in DNE. If mds1 finished failover but mds2 does not, then we may get a stripecount less than mdscount. So keep mds1 finished failover in last can fix this issue especially when mds1 &amp;amp; mds2 setup on same node.&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
YangSheng&lt;/p&gt;</comment>
                            <comment id="202630" author="gerrit" created="Wed, 19 Jul 2017 03:32:25 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/27940/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/27940/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7653&quot; title=&quot;replay-single/test_110f test failed Lustre: DEBUG MARKER: replay-single test_110f: FAIL: 1 != 2 after recovery&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7653&quot;&gt;&lt;del&gt;LU-7653&lt;/del&gt;&lt;/a&gt; tests: replay-single/110f fails for mdts on same MDS&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: c245e87fad249622b1974dd64f3e497653269ee6&lt;/p&gt;</comment>
                            <comment id="202668" author="ys" created="Wed, 19 Jul 2017 04:44:23 +0000"  >&lt;p&gt;Landed to 2.10.&lt;/p&gt;</comment>
                            <comment id="209716" author="jgmitter" created="Wed, 27 Sep 2017 14:21:40 +0000"  >&lt;p&gt;This is actually landed to master for 2.11.0.&lt;br/&gt;
(fixing the fixVersion)&lt;/p&gt;</comment>
                            <comment id="255979" author="adilger" created="Mon, 7 Oct 2019 01:41:59 +0000"  >&lt;p&gt;Still seeing this failure very frequently on Oleg&apos;s test system:&lt;/p&gt;

&lt;p&gt;Test session:&lt;br/&gt;
&lt;a href=&quot;http://testing.linuxhacker.ru:3333/lustre-reports/3524/testresults/replay-single-ldiskfs-DNE-centos7_x86_64-centos7_x86_64/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://testing.linuxhacker.ru:3333/lustre-reports/3524/testresults/replay-single-ldiskfs-DNE-centos7_x86_64-centos7_x86_64/&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Subtest log:&lt;br/&gt;
&lt;a href=&quot;http://testing.linuxhacker.ru:3333/lustre-reports/3524/testresults/replay-single-ldiskfs-DNE-centos7_x86_64-centos7_x86_64/replay-single.test_110f.test_log.oleg264-client.log&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://testing.linuxhacker.ru:3333/lustre-reports/3524/testresults/replay-single-ldiskfs-DNE-centos7_x86_64-centos7_x86_64/replay-single.test_110f.test_log.oleg264-client.log&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="266877" author="gerrit" created="Mon, 6 Apr 2020 07:09:37 +0000"  >&lt;p&gt;Alexander Boyko (c17825@cray.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/38137&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/38137&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7653&quot; title=&quot;replay-single/test_110f test failed Lustre: DEBUG MARKER: replay-single test_110f: FAIL: 1 != 2 after recovery&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7653&quot;&gt;&lt;del&gt;LU-7653&lt;/del&gt;&lt;/a&gt; lod: fix stripe allocation during recovery&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 990d9d8cf607b035d2b341588212b77faf99f309&lt;/p&gt;</comment>
                            <comment id="267534" author="gerrit" created="Tue, 14 Apr 2020 08:11:17 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/38137/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/38137/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7653&quot; title=&quot;replay-single/test_110f test failed Lustre: DEBUG MARKER: replay-single test_110f: FAIL: 1 != 2 after recovery&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7653&quot;&gt;&lt;del&gt;LU-7653&lt;/del&gt;&lt;/a&gt; lod: fix stripe allocation during recovery&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 858c0c6959c1319e83a18be5ef6cb50251542052&lt;/p&gt;</comment>
                            <comment id="267564" author="pjones" created="Tue, 14 Apr 2020 14:28:13 +0000"  >&lt;p&gt;Landed for 2.14&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="20086" name="110f.lctl.tgz" size="981285" author="parinay" created="Tue, 12 Jan 2016 04:34:16 +0000"/>
                            <attachment id="20097" name="110f__0.console.MDS.log" size="152965" author="parinay" created="Wed, 13 Jan 2016 03:17:11 +0000"/>
                            <attachment id="20096" name="110f__0.messages.MDS.log" size="240070" author="parinay" created="Wed, 13 Jan 2016 03:17:11 +0000"/>
                            <attachment id="20095" name="110f__0.stdout.log" size="6904" author="parinay" created="Wed, 13 Jan 2016 03:17:11 +0000"/>
                            <attachment id="20098" name="110f__PTLDEBUG.lctl.tgz" size="1770484" author="parinay" created="Wed, 13 Jan 2016 03:17:11 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzxxw7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>