<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:44:19 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-4613] Failure on test suite sanity-hsm test_12o: request on 0x200000bd1:0xf:0x0 is not SUCCEED on mds1</title>
                <link>https://jira.whamcloud.com/browse/LU-4613</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for sarah &amp;lt;sarah@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;http://maloo.whamcloud.com/test_sets/6ac389ea-90ce-11e3-91ee-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://maloo.whamcloud.com/test_sets/6ac389ea-90ce-11e3-91ee-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_12o failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;request on 0x200000bd1:0xf:0x0 is not SUCCEED on mds1&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Info required for matching: sanity-hsm 12o&lt;/p&gt;</description>
                <environment>client and server: lustre-master build # 1876&lt;br/&gt;
client is SLES11 SP3</environment>
        <key id="23105">LU-4613</key>
            <summary>Failure on test suite sanity-hsm test_12o: request on 0x200000bd1:0xf:0x0 is not SUCCEED on mds1</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bfaccini">Bruno Faccini</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                            <label>HSM</label>
                    </labels>
                <created>Tue, 11 Feb 2014 22:21:52 +0000</created>
                <updated>Fri, 21 Feb 2014 13:47:51 +0000</updated>
                            <resolved>Fri, 21 Feb 2014 13:47:51 +0000</resolved>
                                    <version>Lustre 2.6.0</version>
                    <version>Lustre 2.5.1</version>
                                    <fixVersion>Lustre 2.6.0</fixVersion>
                    <fixVersion>Lustre 2.5.1</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="76809" author="bfaccini" created="Wed, 12 Feb 2014 09:36:11 +0000"  >&lt;p&gt;+1 for &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/e17437d2-934f-11e3-9f1b-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/e17437d2-934f-11e3-9f1b-52540035b04c&lt;/a&gt;, during auto-tests session for patch &lt;a href=&quot;http://review.whamcloud.com/9212&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9212&lt;/a&gt; (b2_5 version for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3834&quot; title=&quot;hsm_cdt_request_completed() may clear HS_RELEASED on failed restore&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3834&quot;&gt;&lt;del&gt;LU-3834&lt;/del&gt;&lt;/a&gt;).&lt;/p&gt;

&lt;p&gt;sanity-hsm/test_12o (I introduced for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3834&quot; title=&quot;hsm_cdt_request_completed() may clear HS_RELEASED on failed restore&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3834&quot;&gt;&lt;del&gt;LU-3834&lt;/del&gt;&lt;/a&gt; !!&#8230;) reports a failure waiting for the success of the 2nd diff command (ie, when fault-injection of layouts-swap has been disabled) for the same file/FID, and we can see the following traces in the tests log :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;
&#8230;&#8230;&#8230;

CMD: client-12vm3 lctl set_param fail_loc=0x152
fail_loc=0x152
CMD: client-12vm3 /usr/sbin/lctl set_param mdt.lustre-MDT0000.hsm.policy=+NRA
mdt.lustre-MDT0000.hsm.policy=+NRA
diff: /mnt/lustre/d12o.sanity-hsm/f12o.sanity-hsm: No data available
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl set_param mdt.lustre-MDT0000.hsm.policy=-NRA
mdt.lustre-MDT0000.hsm.policy=-NRA
CMD: client-12vm3 lctl set_param fail_loc=0
fail_loc=0
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
Waiting 100 secs for update
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
Waiting 90 secs for update
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=
Changed after 17s: from &apos;FAILED
SUCCEED&apos; to &apos;&apos;
CMD: client-12vm3 /usr/sbin/lctl get_param -n mdt.lustre-MDT0000.hsm.actions | awk &apos;/&apos;0x200000bd1:0xf:0x0&apos;.*action=&apos;RESTORE&apos;/ {print \$13}&apos; | cut -f2 -d=

&#8230;&#8230;&#8230;.

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;so, seems that since it searches for hsm operation result for the same FID than a previous failed operation that has may not have been purge from the log, it gets puzzled when receiving both old/new results!&lt;/p&gt;

&lt;p&gt;Here is sanity-hsm/test_12o current code :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;test_12o() {
        # test needs a running copytool
        copytool_setup

        mkdir -p $DIR/$tdir
        local f=$DIR/$tdir/$tfile
        local fid=$(copy_file /etc/hosts $f)

        $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
        wait_request_state $fid ARCHIVE SUCCEED
        $LFS hsm_release $f || error &quot;release of $f failed&quot;

#define OBD_FAIL_MDS_HSM_SWAP_LAYOUTS           0x152
        do_facet $SINGLEMDS lctl set_param fail_loc=0x152

        # set no retry action mode
        cdt_set_no_retry

        diff -q /etc/hosts $f
        local st=$?

        # we check we had a restore failure
        wait_request_state $fid RESTORE FAILED

        [[ $st -eq 0 ]] &amp;amp;&amp;amp; error &quot;Restore must fail&quot;

        # remove no retry action mode
        cdt_clear_no_retry

        # check file is still released
        check_hsm_flags $f &quot;0x0000000d&quot;

        # retry w/o failure injection
        do_facet $SINGLEMDS lctl set_param fail_loc=0

        diff -q /etc/hosts $f
        st=$?

        # we check we had a restore done
        wait_request_state $fid RESTORE SUCCEED

        [[ $st -eq 0 ]] || error &quot;Restored file differs&quot;

        copytool_cleanup
}
run_test 12o &quot;Layout-swap failure during Restore leaves file released&quot;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I will push a patch to introduce a &quot;cdt_purge + wait_for_grace_delay&quot; between both the failing (due to fault-injection) and succeeding (normal conditions) RESTORE attempts, to allow old/1st result to disapear from the log. Current grace_delay is set to 10s by sanity-hsm which is too long between both RESTORE requests here.&lt;/p&gt;</comment>
                            <comment id="76812" author="bfaccini" created="Wed, 12 Feb 2014 09:59:09 +0000"  >&lt;p&gt;Patch is at &lt;a href=&quot;http://review.whamcloud.com/9235&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9235&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="77221" author="yujian" created="Tue, 18 Feb 2014 10:09:34 +0000"  >&lt;p&gt;While testing patch &lt;a href=&quot;http://review.whamcloud.com/9288&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9288&lt;/a&gt; on Lustre b2_5 branch, the same failure occurred:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/a20caf5e-984d-11e3-8a38-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/a20caf5e-984d-11e3-8a38-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;As per Bruno, this regression failure was introduced by the patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3834&quot; title=&quot;hsm_cdt_request_completed() may clear HS_RELEASED on failed restore&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3834&quot;&gt;&lt;del&gt;LU-3834&lt;/del&gt;&lt;/a&gt; landed on Lustre b2_5 build #25.&lt;/p&gt;

&lt;p&gt;So, the failure is also needed to be fixed on Lustre b2_5 branch.&lt;/p&gt;</comment>
                            <comment id="77231" author="yujian" created="Tue, 18 Feb 2014 13:29:16 +0000"  >&lt;p&gt;Patch for b2_5 branch: &lt;a href=&quot;http://review.whamcloud.com/9295&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9295&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="77248" author="bogl" created="Tue, 18 Feb 2014 15:40:24 +0000"  >&lt;p&gt;hit again:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/d436c73c-9855-11e3-8828-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/d436c73c-9855-11e3-8828-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="77592" author="pjones" created="Fri, 21 Feb 2014 13:47:51 +0000"  >&lt;p&gt;Landed for 2.5.1 and 2.6&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzweu7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>12623</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>