<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:40:22 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-4178] Test failure on test suite sanity-hsm, subtest test_200</title>
                <link>https://jira.whamcloud.com/browse/LU-4178</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Bob Glossman &amp;lt;bob.glossman@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;http://maloo.whamcloud.com/test_sets/4c3bcdec-4025-11e3-bfaf-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://maloo.whamcloud.com/test_sets/4c3bcdec-4025-11e3-bfaf-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;test_200 was only recently enabled by commit 38695729d61958ab10e9e108175298f8a7d40536. before that is was always skipped due to being in ALWAYS_EXCEPT.  I&apos;m wondering if it was a mistake to turn this test on at all.  maloo reports:&lt;/p&gt;

&lt;p&gt;Failure Rate: 66.00% of last 100 executions &lt;span class=&quot;error&quot;&gt;&amp;#91;all branches&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;This failure looks not at all related to the change under test, at least in this case.&lt;/p&gt;

&lt;p&gt;The sub-test test_200 failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;request on sanity-hsm is not @@@@@@&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Info required for matching: sanity-hsm 200&lt;/p&gt;</description>
                <environment></environment>
        <key id="21704">LU-4178</key>
            <summary>Test failure on test suite sanity-hsm, subtest test_200</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bfaccini">Bruno Faccini</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                            <label>HSM</label>
                    </labels>
                <created>Tue, 29 Oct 2013 14:49:41 +0000</created>
                <updated>Thu, 31 Aug 2017 18:29:06 +0000</updated>
                            <resolved>Wed, 25 Feb 2015 23:52:12 +0000</resolved>
                                    <version>Lustre 2.5.0</version>
                                    <fixVersion>Lustre 2.6.0</fixVersion>
                    <fixVersion>Lustre 2.7.0</fixVersion>
                    <fixVersion>Lustre 2.8.0</fixVersion>
                    <fixVersion>Lustre 2.5.4</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>14</watches>
                                                                            <comments>
                            <comment id="70333" author="adilger" created="Thu, 31 Oct 2013 05:26:57 +0000"  >&lt;p&gt;Failed in &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/96e2dc3e-41d1-11e3-85f0-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/96e2dc3e-41d1-11e3-85f0-52540035b04c&lt;/a&gt; also.&lt;/p&gt;</comment>
                            <comment id="70334" author="adilger" created="Thu, 31 Oct 2013 05:38:40 +0000"  >&lt;p&gt;This failed about 10 times in the past week.  Should this subtest be disabled again, or is there some obvious fix for this problem?&lt;/p&gt;</comment>
                            <comment id="70342" author="jcl" created="Thu, 31 Oct 2013 09:33:29 +0000"  >&lt;p&gt;test 200 and 221 are using large files, so patch &lt;a href=&quot;http://review.whamcloud.com/#/c/7915/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/7915/&lt;/a&gt; will help them to pass (not sure it is always the failure origin). I will try to put more message to understand the failure (may be MOUNT2 is missing)&lt;/p&gt;</comment>
                            <comment id="70343" author="jcl" created="Thu, 31 Oct 2013 09:58:28 +0000"  >&lt;p&gt;Patch to understand at &lt;a href=&quot;http://review.whamcloud.com/#/c/8113/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/8113/&lt;/a&gt; (not to ne reviewed)&lt;/p&gt;</comment>
                            <comment id="70401" author="adilger" created="Thu, 31 Oct 2013 16:55:39 +0000"  >&lt;p&gt;For the short term I&apos;d like to just skip these tests. The sanity-hsm test is causing 50% test failures. If there is a fix to the problem it can re-enable the tests again. &lt;/p&gt;</comment>
                            <comment id="70413" author="bogl" created="Thu, 31 Oct 2013 18:18:08 +0000"  >&lt;p&gt;patch to turn off offending subtests: &lt;a href=&quot;http://review.whamcloud.com/8122&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/8122&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;please add comments or suggestions there&lt;/p&gt;</comment>
                            <comment id="75155" author="yujian" created="Fri, 17 Jan 2014 05:04:46 +0000"  >&lt;p&gt;While testing patch &lt;a href=&quot;http://review.whamcloud.com/8834&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/8834&lt;/a&gt; on Lustre b2_5 branch, sanity-hsm test 200, 221, 250, 251 failed with &quot;No space left on device&quot; failure:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;dd: writing `/mnt/lustre2/d0.sanity-hsm/d200/f.sanity-hsm.200&apos;: No space left on device
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Maloo report: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/6379144a-7f24-11e3-925a-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/6379144a-7f24-11e3-925a-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="76944" author="yujian" created="Thu, 13 Feb 2014 07:30:17 +0000"  >&lt;p&gt;Patch &lt;a href=&quot;http://review.whamcloud.com/9134&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9134&lt;/a&gt; landed on Lustre b2_5 branch for 2.5.1.&lt;/p&gt;</comment>
                            <comment id="77593" author="pjones" created="Fri, 21 Feb 2014 14:08:24 +0000"  >&lt;p&gt;Tests are now skipped for both 2.5.1 and 2.6. We should track fixing the individual tests separately.&lt;/p&gt;</comment>
                            <comment id="78980" author="bfaccini" created="Tue, 11 Mar 2014 10:46:23 +0000"  >&lt;p&gt;Like for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3852&quot; title=&quot;sanity-hsm test_251: client26-vm &amp;quot;dd: no space left on device&amp;quot;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3852&quot;&gt;&lt;del&gt;LU-3852&lt;/del&gt;&lt;/a&gt;, the root cause for this ticket is Toro/client-26vm* very small sized Lustre filesystem, that even recent cleanup_large_files() usage is not enough to fix. Next action will be to find a way to handle this &quot;small fs size vs file big enough for timing need&quot; requirement, and also to re-allow test_200/221/223b in same patch.&lt;/p&gt;

&lt;p&gt;Since I am in charge of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3852&quot; title=&quot;sanity-hsm test_251: client26-vm &amp;quot;dd: no space left on device&amp;quot;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3852&quot;&gt;&lt;del&gt;LU-3852&lt;/del&gt;&lt;/a&gt; that is about the same root-cause, but for test_251, I also assign this ticket to me.&lt;/p&gt;</comment>
                            <comment id="79029" author="bfaccini" created="Tue, 11 Mar 2014 19:28:32 +0000"  >&lt;p&gt;Oops, I did not notice ticket was closed/fixed before I commented and re-assigned to me !!&lt;br/&gt;
So, I will continue to track the main+same issue in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3852&quot; title=&quot;sanity-hsm test_251: client26-vm &amp;quot;dd: no space left on device&amp;quot;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3852&quot;&gt;&lt;del&gt;LU-3852&lt;/del&gt;&lt;/a&gt;. test_&lt;span class=&quot;error&quot;&gt;&amp;#91;200,221,223b&amp;#93;&lt;/span&gt; will also be re-enabled in same patch/fix.&lt;/p&gt;</comment>
                            <comment id="95492" author="adilger" created="Thu, 2 Oct 2014 05:45:59 +0000"  >&lt;p&gt;Tests are still being skipped due to this bug. Reopen until the tests are passing again. &lt;/p&gt;</comment>
                            <comment id="95640" author="bfaccini" created="Fri, 3 Oct 2014 15:45:58 +0000"  >&lt;p&gt;Oops, seems that despite I said I will re-enable test_&lt;span class=&quot;error&quot;&gt;&amp;#91;200,221,223b&amp;#93;&lt;/span&gt; in same patch than test_251 for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3852&quot; title=&quot;sanity-hsm test_251: client26-vm &amp;quot;dd: no space left on device&amp;quot;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3852&quot;&gt;&lt;del&gt;LU-3852&lt;/del&gt;&lt;/a&gt;, I have simply still not make it ... Thanks to re-open so I will not forget about these 3 sub-tests.&lt;/p&gt;</comment>
                            <comment id="98217" author="bfaccini" created="Mon, 3 Nov 2014 20:17:36 +0000"  >&lt;p&gt;test_&lt;span class=&quot;error&quot;&gt;&amp;#91;200,221,223b&amp;#93;&lt;/span&gt; have finally, and as expected!, been re-enabled in patch &lt;a href=&quot;http://review.whamcloud.com/12456&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/12456&lt;/a&gt; for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3852&quot; title=&quot;sanity-hsm test_251: client26-vm &amp;quot;dd: no space left on device&amp;quot;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3852&quot;&gt;&lt;del&gt;LU-3852&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="101833" author="gerrit" created="Wed, 17 Dec 2014 17:49:13 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/8113/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/8113/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4178&quot; title=&quot;Test failure on test suite sanity-hsm, subtest test_200&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4178&quot;&gt;&lt;del&gt;LU-4178&lt;/del&gt;&lt;/a&gt; tests: add messages to sanity-hsm&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 597214dc586ae9d329f2bb819600ac4b9ddfcc06&lt;/p&gt;</comment>
                            <comment id="102058" author="jhammond" created="Fri, 19 Dec 2014 15:16:21 +0000"  >&lt;p&gt;Here is another test_200 failure on v2_6_91_0-49-ge0ece89 &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/cc1dae4c-8703-11e4-87d3-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/cc1dae4c-8703-11e4-87d3-5254006e85c2&lt;/a&gt;. I don&apos;t think that this failure is about file size. I ran this locally 10 times and never saw the cancel action succeed.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;== sanity-hsm test 200: Register/Cancel archive == 09:08:31 (1419001711)
Only wakeup running copytool agt1 on t
103+0 records in
103+0 records out
108003328 bytes (108 MB) copied, 18.2603 s, 5.9 MB/s
mdt.lustre-MDT0000.hsm_control=disabled
mdt.lustre-MDT0000.hsm_control=enabled
Waiting 100 secs for update
Waiting 90 secs for update
Waiting 80 secs for update
Waiting 70 secs for update
Waiting 60 secs for update
Waiting 50 secs for update
Waiting 40 secs for update
Waiting 30 secs for update
Waiting 20 secs for update
Waiting 10 secs for update
Update not seen after 100s: wanted &apos;SUCCEED&apos; got &apos;STARTED
STARTED&apos;
 sanity-hsm test_200: @@@@@@ FAIL: request on 0x200000401:0x1:0x0 is not SUCCEED on mds1 
  Trace dump:
  = /root/lustre-release/lustre/tests/test-framework.sh:4667:error_noexit()
  = /root/lustre-release/lustre/tests/test-framework.sh:4698:error()
  = lustre/tests/sanity-hsm.sh:697:wait_request_state()
  = lustre/tests/sanity-hsm.sh:3482:test_200()
  = /root/lustre-release/lustre/tests/test-framework.sh:4945:run_one()
  = /root/lustre-release/lustre/tests/test-framework.sh:4982:run_one_logged()
  = /root/lustre-release/lustre/tests/test-framework.sh:4799:run_test()
  = lustre/tests/sanity-hsm.sh:3486:main()
Dumping lctl log to /tmp/test_logs/1419001709/sanity-hsm.test_200.*.1419001832.log
Dumping logs only on local client.
FAIL 200 (121s)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="102130" author="bfaccini" created="Fri, 19 Dec 2014 23:44:40 +0000"  >&lt;p&gt;Hello John,&lt;br/&gt;
I agree with you that the new failures you reported don&apos;t look as file size nor free space related as previously (even if my patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3852&quot; title=&quot;sanity-hsm test_251: client26-vm &amp;quot;dd: no space left on device&amp;quot;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3852&quot;&gt;&lt;del&gt;LU-3852&lt;/del&gt;&lt;/a&gt; that has recently landed unfortunately does not correctly detect/report lack of free space, as you brilliantly demonstrated in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6056&quot; title=&quot;sanity-hsm uses bash&amp;#39;s local built-in incorrectly&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6056&quot;&gt;&lt;del&gt;LU-6056&lt;/del&gt;&lt;/a&gt;!!), but they seem to be the consequence of a new/different problem.&lt;br/&gt;
Could this be somewhat related to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3873&quot; title=&quot;HSM cancel actions never removed from agent llog&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3873&quot;&gt;LU-3873&lt;/a&gt; and/or &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4545&quot; title=&quot;Test failure sanity-hsm test_223a: request on 0x200000402:0x13f:0x0 is not SUCCEED&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4545&quot;&gt;&lt;del&gt;LU-4545&lt;/del&gt;&lt;/a&gt;, at least with their comments regarding the status change of CANCEL requests and their handling by copytool?&lt;/p&gt;</comment>
                            <comment id="102146" author="jhammond" created="Sat, 20 Dec 2014 16:17:46 +0000"  >&lt;p&gt;I have not carefully verified this but here is my idea. From the CT logs &lt;a href=&quot;https://testing.hpdd.intel.com/test_logs/268d8b72-8704-11e4-87d3-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_logs/268d8b72-8704-11e4-87d3-5254006e85c2&lt;/a&gt; I see that the cancel is handled by the CT before the archive request:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;418932953.617511 lhsmtool_posix[19200]: action=0 src=(null) dst=(null) mount_point=/mnt/lustre
1418932953.626199 lhsmtool_posix[19201]: waiting for message from kernel
1418932972.656373 lhsmtool_posix[19201]: copytool fs=lustre archive#=2 item_count=1
1418932972.656586 lhsmtool_posix[19201]: waiting for message from kernel
1418932972.656652 lhsmtool_posix[19203]: &apos;[0x400000401:0x211:0x0]&apos; action CANCEL reclen 72, cookie=0x549332b1
1418932972.657448 lhsmtool_posix[19201]: copytool fs=lustre archive#=2 item_count=1
1418932972.657528 lhsmtool_posix[19201]: waiting for message from kernel
1418932972.657618 lhsmtool_posix[19204]: &apos;[0x400000401:0x211:0x0]&apos; action ARCHIVE reclen 72, cookie=0x549332b1
1418932972.659924 lhsmtool_posix[19203]: processing file &apos;d200.sanity-hsm/f200.sanity-hsm&apos;
1418932972.659961 lhsmtool_posix[19203]: cancel not implemented for file system &apos;/mnt/lustre&apos;
1418932972.659984 lhsmtool_posix[19204]: processing file &apos;d200.sanity-hsm/f200.sanity-hsm&apos;
1418932972.731465 lhsmtool_posix[19204]: archiving &apos;/mnt/lustre/.lustre/fid/0x400000401:0x211:0x0&apos; to &apos;/home/autotest/.autotest/shared_dir/2014-12-18/080743-70100366414020/arc1/0211/0000/0401/0000/0004/0000/0x400000401:0x211:0x0_tmp&apos;
1418932987.009995 lhsmtool_posix[19204]: saving stripe info of &apos;/mnt/lustre/.lustre/fid/0x400000401:0x211:0x0&apos; in /home/autotest/.autotest/shared_dir/2014-12-18/080743-70100366414020/arc1/0211/0000/0401/0000/0004/0000/0x400000401:0x211:0x0_tmp.lov
1418932988.535890 lhsmtool_posix[19204]: start copy of 108003328 bytes from &apos;/mnt/lustre/.lustre/fid/0x400000401:0x211:0x0&apos; to &apos;/home/autotest/.autotest/shared_dir/2014-12-18/080743-70100366414020/arc1/0211/0000/0401/0000/0004/0000/0x400000401:0x211:0x0_tmp&apos;
1418933018.865594 lhsmtool_posix[19204]: %29 
1418933018.874360 lhsmtool_posix[19204]: bandwith control: 1048576B/s excess=1048576 sleep for 1.000000000s
1418933048.043158 lhsmtool_posix[19204]: %58 
1418933048.050016 lhsmtool_posix[19204]: bandwith control: 1048576B/s excess=1048576 sleep for 1.000000000s
1418933078.126741 lhsmtool_posix[19204]: %87 
1418933078.130387 lhsmtool_posix[19204]: bandwith control: 1048576B/s excess=1048576 sleep for 1.000000000s
1418933091.161624 lhsmtool_posix[19204]: copied 108003328 bytes in 102.857832 seconds
1418933097.333515 lhsmtool_posix[19204]: data archiving for &apos;/mnt/lustre/.lustre/fid/0x400000401:0x211:0x0&apos; to &apos;/home/autotest/.autotest/shared_dir/2014-12-18/080743-70100366414020/arc1/0211/0000/0401/0000/0004/0000/0x400000401:0x211:0x0_tmp&apos; done
1418933098.588120 lhsmtool_posix[19204]: attr file for &apos;/mnt/lustre/.lustre/fid/0x400000401:0x211:0x0&apos; saved to archive &apos;/home/autotest/.autotest/shared_dir/2014-12-18/080743-70100366414020/arc1/0211/0000/0401/0000/0004/0000/0x400000401:0x211:0x0_tmp&apos;
1418933098.590178 lhsmtool_posix[19204]: fsetxattr of &apos;trusted.hsm&apos; on &apos;/home/autotest/.autotest/shared_dir/2014-12-18/080743-70100366414020/arc1/0211/0000/0401/0000/0004/0000/0x400000401:0x211:0x0_tmp&apos; rc=-1 (Operation not supported)
1418933098.590223 lhsmtool_posix[19204]: fsetxattr of &apos;trusted.link&apos; on &apos;/home/autotest/.autotest/shared_dir/2014-12-18/080743-70100366414020/arc1/0211/0000/0401/0000/0004/0000/0x400000401:0x211:0x0_tmp&apos; rc=-1 (Operation not supported)
1418933098.590258 lhsmtool_posix[19204]: fsetxattr of &apos;trusted.lov&apos; on &apos;/home/autotest/.autotest/shared_dir/2014-12-18/080743-70100366414020/arc1/0211/0000/0401/0000/0004/0000/0x400000401:0x211:0x0_tmp&apos; rc=-1 (Operation not supported)
1418933098.590308 lhsmtool_posix[19204]: fsetxattr of &apos;trusted.lma&apos; on &apos;/home/autotest/.autotest/shared_dir/2014-12-18/080743-70100366414020/arc1/0211/0000/0401/0000/0004/0000/0x400000401:0x211:0x0_tmp&apos; rc=-1 (Operation not supported)
1418933098.692510 lhsmtool_posix[19204]: fsetxattr of &apos;lustre.lov&apos; on &apos;/home/autotest/.autotest/shared_dir/2014-12-18/080743-70100366414020/arc1/0211/0000/0401/0000/0004/0000/0x400000401:0x211:0x0_tmp&apos; rc=-1 (Operation not supported)
1418933098.692559 lhsmtool_posix[19204]: xattr file for &apos;/mnt/lustre/.lustre/fid/0x400000401:0x211:0x0&apos; saved to archive &apos;/home/autotest/.autotest/shared_dir/2014-12-18/080743-70100366414020/arc1/0211/0000/0401/0000/0004/0000/0x400000401:0x211:0x0_tmp&apos;
1418933099.066853 lhsmtool_posix[19204]: symlink &apos;/home/autotest/.autotest/shared_dir/2014-12-18/080743-70100366414020/arc1/shadow/d200.sanity-hsm/f200.sanity-hsm&apos; to &apos;../../0211/0000/0401/0000/0004/0000/0x400000401:0x211:0x0&apos; done
1418933099.068225 lhsmtool_posix[19204]: Action completed, notifying coordinator cookie=0x549332b1, FID=[0x400000401:0x211:0x0], hp_flags=0 err=0
1418933099.070061 lhsmtool_posix[19204]: llapi_hsm_action_end() on &apos;/mnt/lustre/.lustre/fid/0x400000401:0x211:0x0&apos; ok (rc=0)
exiting: Interrupt
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I wonder if the wrong ordering is due to thread scheduling on the CT or if it comes from somewhere else. Perhaps the CDT processes the requests in the wrong order. When does a cancel succeed? IIRC when the CT makes a progress report on the action. Is that correct?&lt;/p&gt;

&lt;p&gt;Since the CT does not implement cancel, I also wonder what precisely we are testing here.&lt;/p&gt;</comment>
                            <comment id="102238" author="blakecaldwell" created="Tue, 23 Dec 2014 09:19:55 +0000"  >&lt;p&gt;Here&apos;s a failure instance: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/b63defe6-8a7e-11e4-9b75-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/b63defe6-8a7e-11e4-9b75-5254006e85c2&lt;/a&gt;. Appears to be a transient failure?&lt;/p&gt;</comment>
                            <comment id="102240" author="gerrit" created="Tue, 23 Dec 2014 10:34:13 +0000"  >&lt;p&gt;Faccini Bruno (bruno.faccini@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13173&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13173&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4178&quot; title=&quot;Test failure on test suite sanity-hsm, subtest test_200&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4178&quot;&gt;&lt;del&gt;LU-4178&lt;/del&gt;&lt;/a&gt; tests: Wait requests to reach CDT before Cancel&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 918ccc4184b91d61b53016f8e3375bf9d4698afc&lt;/p&gt;</comment>
                            <comment id="102241" author="bfaccini" created="Tue, 23 Dec 2014 10:36:26 +0000"  >&lt;p&gt;John,&lt;br/&gt;
I think that sub-tests 200-202 verify that requests can be canceled when CDT operations have been started but disabled.&lt;br/&gt;
BTW, your finding that sometime (very likely due to some threading cause at Agent and/or MDS side!) the Cancel requests is treated by CDT before the action it targets needs to be addressed and also explains why these failures are not solid.&lt;/p&gt;

&lt;p&gt;Patch that adds verification that the operation has already been registered at CDT before to send the Cancel in sanity-hsm/test_&lt;span class=&quot;error&quot;&gt;&amp;#91;200-202&amp;#93;&lt;/span&gt;, is at &lt;a href=&quot;http://review.whamcloud.com/13173&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13173&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="102348" author="jcl" created="Mon, 29 Dec 2014 06:36:21 +0000"  >&lt;p&gt;The analysis and patch look good but I am surprised because the CDT command registration should be synchronous. So after hsm_archive the CDT entry should be recorded.&lt;/p&gt;</comment>
                            <comment id="102409" author="gerrit" created="Tue, 30 Dec 2014 15:11:27 +0000"  >&lt;p&gt;John L. Hammond (john.hammond@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13206&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13206&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4178&quot; title=&quot;Test failure on test suite sanity-hsm, subtest test_200&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4178&quot;&gt;&lt;del&gt;LU-4178&lt;/del&gt;&lt;/a&gt; tests: increase sanity-hsm wait_request_state tiemout&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 278c1fb845c2cdd7905f717435176e94a4ad7057&lt;/p&gt;</comment>
                            <comment id="102410" author="jhammond" created="Tue, 30 Dec 2014 15:12:12 +0000"  >&lt;p&gt;The cancel action does not succeed until the CT reports that the archive is complete. In test_200 we use make_large_for_cancel() which gives us a 100MB file. Because of the 1MB/s bandwidth limit the CT will take at least 100s to archive the file. Since wait_request_state() uses a 100s timeout this make for a very racy test. And since most of these tests still use NFS for the archive there can be additional delays.&lt;/p&gt;

&lt;p&gt;I suggest that we double the timeout in wait_request_state(). Please see &lt;a href=&quot;http://review.whamcloud.com/13206&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13206&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="102508" author="gerrit" created="Sun, 4 Jan 2015 18:44:51 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/13206/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13206/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4178&quot; title=&quot;Test failure on test suite sanity-hsm, subtest test_200&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4178&quot;&gt;&lt;del&gt;LU-4178&lt;/del&gt;&lt;/a&gt; tests: increase sanity-hsm wait_request_state tiemout&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 4cb51c76ed2afa168f19e999190a315803580258&lt;/p&gt;</comment>
                            <comment id="106656" author="jlevi" created="Wed, 11 Feb 2015 17:41:26 +0000"  >&lt;p&gt;Patches landed to Master.&lt;/p&gt;</comment>
                            <comment id="107423" author="jamesanunez" created="Thu, 19 Feb 2015 21:18:50 +0000"  >&lt;p&gt;Reopening ticket because there is one more patch for this ticket that has not landed. The patch is at: &lt;a href=&quot;http://review.whamcloud.com/#/c/13173/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/13173/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="107498" author="gerrit" created="Fri, 20 Feb 2015 16:06:35 +0000"  >&lt;p&gt;James Nunez (james.a.nunez@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13825&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13825&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4178&quot; title=&quot;Test failure on test suite sanity-hsm, subtest test_200&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4178&quot;&gt;&lt;del&gt;LU-4178&lt;/del&gt;&lt;/a&gt; tests: add messages to sanity-hsm&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_5&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 75d6d35bcc48eefe490e8b4efd673c58b3373507&lt;/p&gt;</comment>
                            <comment id="107501" author="gerrit" created="Fri, 20 Feb 2015 16:11:06 +0000"  >&lt;p&gt;James Nunez (james.a.nunez@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13826&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13826&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4178&quot; title=&quot;Test failure on test suite sanity-hsm, subtest test_200&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4178&quot;&gt;&lt;del&gt;LU-4178&lt;/del&gt;&lt;/a&gt; tests: increase sanity-hsm wait_request_state tiemout&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_5&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 9c56a8f64d2ab4b8db8b3f38dff2d019b8cd3e40&lt;/p&gt;</comment>
                            <comment id="107671" author="jamesanunez" created="Mon, 23 Feb 2015 18:30:21 +0000"  >&lt;p&gt;Closing ticket because sanity-hsm tests 200, 201 and 202 are passing on master for the past month. If any more work needs to be done for this ticket, please open a new ticket and we&apos;ll track the work there.&lt;/p&gt;</comment>
                            <comment id="129407" author="gerrit" created="Tue, 6 Oct 2015 01:56:53 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/13173/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13173/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4178&quot; title=&quot;Test failure on test suite sanity-hsm, subtest test_200&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4178&quot;&gt;&lt;del&gt;LU-4178&lt;/del&gt;&lt;/a&gt; tests: Wait requests to reach CDT before Cancel&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 6a31cf92555182a23f14d3385c8c14266887070a&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="20667">LU-3852</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzw76v:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>11309</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>