<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:48:39 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5115] replay-single test_73b: @@@@@@ FAIL: post-failover df: 1</title>
                <link>https://jira.whamcloud.com/browse/LU-5115</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/82faf914-e65d-11e3-9a4f-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/82faf914-e65d-11e3-9a4f-52540035b04c&lt;/a&gt;&lt;/p&gt;</description>
                <environment></environment>
        <key id="24843">LU-5115</key>
            <summary>replay-single test_73b: @@@@@@ FAIL: post-failover df: 1</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="niu">Niu Yawei</reporter>
                        <labels>
                    </labels>
                <created>Wed, 28 May 2014 14:32:23 +0000</created>
                <updated>Wed, 7 Sep 2016 11:09:46 +0000</updated>
                            <resolved>Wed, 7 Sep 2016 11:09:46 +0000</resolved>
                                    <version>Lustre 2.9.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="85056" author="adilger" created="Wed, 28 May 2014 18:36:07 +0000"  >&lt;p&gt;In the future, please use &quot;Raise bug&quot; from the Maloo page so that it will automatically triage this failure in the future. &lt;/p&gt;</comment>
                            <comment id="85095" author="niu" created="Thu, 29 May 2014 02:52:20 +0000"  >&lt;blockquote&gt;
&lt;p&gt;In the future, please use &quot;Raise bug&quot; from the Maloo page so that it will automatically triage this failure in the future.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Ok, thanks.&lt;/p&gt;

&lt;p&gt;This looks like a test script problem:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;shadow-40vm5: CMD: shadow-40vm5.shadow.whamcloud.com lctl get_param -n at_max
shadow-40vm6: CMD: shadow-40vm6.shadow.whamcloud.com lctl get_param -n at_max
shadow-40vm5: mdc.lustre-MDT0000-mdc-*.mds_server_uuid in FULL state after 0 sec
shadow-40vm6: mdc.lustre-MDT0000-mdc-*.mds_server_uuid in FULL state after 76 sec
shadow-40vm5: stat: cannot read file system information &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; `/mnt/lustre&apos;: Input/output error
 replay-single test_73b: @@@@@@ FAIL: post-failover df: 1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;wait_clients_import_state() reported shadow-40vm5 client recovered in 0 second? that looks suspicious to me, look at the log for this client:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;00000001:02000400:1.0:1401256333.271532:0:23799:0:(debug.c:445:libcfs_debug_mark_buffer()) DEBUG MARKER: /usr/sbin/lctl mark mdc.lustre-MDT0000-mdc-*.mds_server_uuid in FULL state after 0 sec
00000001:02000400:0.0:1401256333.271547:0:23800:0:(debug.c:445:libcfs_debug_mark_buffer()) DEBUG MARKER: /usr/sbin/lctl mark mdc.lustre-MDT0000-mdc-*.mds_server_uuid in FULL state after 0 sec
00000001:02000400:1.0:1401256333.422776:0:23883:0:(debug.c:445:libcfs_debug_mark_buffer()) DEBUG MARKER: mdc.lustre-MDT0000-mdc-*.mds_server_uuid in FULL state after 0 sec
00000001:02000400:0.0:1401256333.422803:0:23882:0:(debug.c:445:libcfs_debug_mark_buffer()) DEBUG MARKER: mdc.lustre-MDT0000-mdc-*.mds_server_uuid in FULL state after 0 sec
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Checking vm5 MDC state, and found it&apos;s FULL.&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;00000001:02000400:1.0:1401256409.934172:0:23908:0:(debug.c:445:libcfs_debug_mark_buffer()) DEBUG MARKER: /usr/sbin/lctl mark mdc.lustre-MDT0000-mdc-*.mds_server_uuid in FULL state after 76 sec
00000001:02000400:1.0:1401256410.078821:0:23932:0:(debug.c:445:libcfs_debug_mark_buffer()) DEBUG MARKER: mdc.lustre-MDT0000-mdc-*.mds_server_uuid in FULL state after 76 sec
00000080:00200000:0.0:1401256411.194194:0:23955:0:(llite_lib.c:1657:ll_statfs()) VFS Op: at 4299377545 jiffies
00000080:00000004:0.0:1401256411.194198:0:23955:0:(obd_class.h:1294:obd_statfs()) osfs 4299275701, max_age 4299376545
00800000:00000004:0.0:1401256411.194203:0:23955:0:(obd_class.h:1294:obd_statfs()) osfs 4299275699, max_age 4299376545
00000100:00080000:0.0:1401256411.194224:0:23955:0:(client.c:1403:ptlrpc_send_new_req()) @@@ req from PID 23955 waiting &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; recovery: (FULL != REPLAY)  req@ffff88006e57f400 x1469319256038592/t0(0) o41-&amp;gt;lustre-MDT0000-mdc-ffff88007d721000@10.1.5.230@tcp:12/10 lens 224/368 e 0 to 0 dl 0 ref 2 fl Rpc:W/0/ffffffff rc 0/-1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;vm6 MDC state become FULL, and statfs was issued, however, the vm5 was actually in REPLAY state, that&apos;s why the statfs on vm5 failed.&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;00000100:00080000:1.0:1401256419.512383:0:23956:0:(&lt;span class=&quot;code-keyword&quot;&gt;import&lt;/span&gt;.c:1283:ptlrpc_invalidate_import_thread()) ffff88007dac8000 lustre-MDT0000_UUID: changing &lt;span class=&quot;code-keyword&quot;&gt;import&lt;/span&gt; state from EVICTED to RECOVER
00000100:00080000:1.0:1401256419.512384:0:23956:0:(&lt;span class=&quot;code-keyword&quot;&gt;import&lt;/span&gt;.c:1394:ptlrpc_import_recovery_state_machine()) reconnected to lustre-MDT0000_UUID@10.1.5.230@tcp
00000100:00080000:1.0:1401256419.512386:0:23956:0:(&lt;span class=&quot;code-keyword&quot;&gt;import&lt;/span&gt;.c:1399:ptlrpc_import_recovery_state_machine()) ffff88007dac8000 lustre-MDT0000_UUID: changing &lt;span class=&quot;code-keyword&quot;&gt;import&lt;/span&gt; state from RECOVER to FULL
00000100:02000000:1.0:1401256419.512389:0:23956:0:(&lt;span class=&quot;code-keyword&quot;&gt;import&lt;/span&gt;.c:1407:ptlrpc_import_recovery_state_machine()) lustre-MDT0000-mdc-ffff88007d721000: Connection restored to lustre-MDT0000 (at 10.1.5.230@tcp)
00000080:00020000:0.0:1401256419.513378:0:23955:0:(llite_lib.c:1611:ll_statfs_internal()) md_statfs fails: rc = -5
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Finally, vm5 recovered, however, the statfs has failed.&lt;/p&gt;

&lt;p&gt;Look at following script:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;fail() {
        local facets=$1
        local clients=${CLIENTS:-$HOSTNAME}

        facet_failover $* || error &lt;span class=&quot;code-quote&quot;&gt;&quot;failover: $?&quot;&lt;/span&gt;
        wait_clients_import_state &lt;span class=&quot;code-quote&quot;&gt;&quot;$clients&quot;&lt;/span&gt; &lt;span class=&quot;code-quote&quot;&gt;&quot;$facets&quot;&lt;/span&gt; FULL
        clients_up || error &lt;span class=&quot;code-quote&quot;&gt;&quot;post-failover df: $?&quot;&lt;/span&gt;
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;It&apos;s quite possible that when wait_clients_import_state() is called, some clients&apos; MDC state is still in FULL, I&apos;m not sure what&apos;s the better way to improve the script, maybe wait a longer time before calling the wait_clients_import_state()?&lt;/p&gt;</comment>
                            <comment id="162149" author="yujian" created="Wed, 17 Aug 2016 01:54:42 +0000"  >&lt;p&gt;More failure instances on master branch:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/f29243fe-627f-11e6-b5b1-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/f29243fe-627f-11e6-b5b1-5254006e85c2&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/38583f22-63ea-11e6-b5b1-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/38583f22-63ea-11e6-b5b1-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="165003" author="standan" created="Tue, 6 Sep 2016 18:36:27 +0000"  >&lt;p&gt;Is it a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6670&quot; title=&quot;Hard Failover recovery-small test_28: post-failover df: 1&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6670&quot;&gt;LU-6670&lt;/a&gt;?&lt;/p&gt;</comment>
                            <comment id="165044" author="niu" created="Wed, 7 Sep 2016 02:08:49 +0000"  >&lt;p&gt;yes, it&apos;s quite possible they are related.&lt;/p&gt;</comment>
                            <comment id="165051" author="hongchao.zhang" created="Wed, 7 Sep 2016 07:36:10 +0000"  >&lt;p&gt;in &quot;fail()&quot;, the import state of all clients will be checked and waited to become the specified state, and if some client MDC state is in FULL, it should not affect&lt;br/&gt;
the check of other client&apos;s MDC import, Will &quot;pdsh&quot; or other similar tools wait all the nodes to finish the execution of the command?&lt;/p&gt;</comment>
                            <comment id="165065" author="niu" created="Wed, 7 Sep 2016 11:08:26 +0000"  >&lt;p&gt;Indeed, I think you are right.&lt;/p&gt;</comment>
                            <comment id="165066" author="niu" created="Wed, 7 Sep 2016 11:09:46 +0000"  >&lt;p&gt;Dup of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6670&quot; title=&quot;Hard Failover recovery-small test_28: post-failover df: 1&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6670&quot;&gt;LU-6670&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="30454">LU-6670</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwn7j:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>14102</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>