<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:44:35 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-4643] sanity-hsm test_60: FAIL: Expected progress update within 10 seconds</title>
                <link>https://jira.whamcloud.com/browse/LU-4643</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;While validating patch &lt;a href=&quot;http://review.whamcloud.com/9290&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9290&lt;/a&gt; on Lustre b2_5 branch, sanity-hsm test 60 failed as follows:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Updated after 9s: wanted &apos;5242880&apos; got &apos;5242880&apos;
 sanity-hsm test_60: @@@@@@ FAIL: Expected progress update within 10 seconds 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Maloo report: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/b8823022-987c-11e3-98a2-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/b8823022-987c-11e3-98a2-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;More instances occurred while validating patches on Lustre b2_5 branch:&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/9260&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9260&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/9103&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9103&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/9071&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9071&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The same failure also occurred while validating patches on master branch:&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/9243&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9243&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/9221&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9221&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The test was introduced by the patches for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4512&quot; title=&quot;POSIX copytool option &amp;quot;--report&amp;quot; is non-obvious and functionality is broken&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4512&quot;&gt;&lt;del&gt;LU-4512&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</description>
                <environment></environment>
        <key id="23189">LU-4643</key>
            <summary>sanity-hsm test_60: FAIL: Expected progress update within 10 seconds</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bfaccini">Bruno Faccini</assignee>
                                    <reporter username="yujian">Jian Yu</reporter>
                        <labels>
                    </labels>
                <created>Tue, 18 Feb 2014 14:31:17 +0000</created>
                <updated>Tue, 29 Sep 2015 12:49:02 +0000</updated>
                            <resolved>Tue, 29 Sep 2015 12:48:18 +0000</resolved>
                                    <version>Lustre 2.6.0</version>
                    <version>Lustre 2.5.1</version>
                                    <fixVersion>Lustre 2.6.0</fixVersion>
                    <fixVersion>Lustre 2.5.1</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>11</watches>
                                                                            <comments>
                            <comment id="77242" author="yujian" created="Tue, 18 Feb 2014 14:36:23 +0000"  >&lt;p&gt;Hi Michael,&lt;/p&gt;

&lt;p&gt;This failure is a regression introduced by the patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4512&quot; title=&quot;POSIX copytool option &amp;quot;--report&amp;quot; is non-obvious and functionality is broken&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4512&quot;&gt;&lt;del&gt;LU-4512&lt;/del&gt;&lt;/a&gt;. Could you please take a look at this failure? Thanks.&lt;/p&gt;</comment>
                            <comment id="77244" author="yujian" created="Tue, 18 Feb 2014 14:42:50 +0000"  >&lt;p&gt;Another similar failure:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Update not seen after 100s: wanted &apos;5242880&apos; got &apos;&apos;
 sanity-hsm test_60: @@@@@@ FAIL: request on 0x400000401:0x1c1:0x0 has not made progress 5242880 on mds1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Maloo report: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/8e7ec8f2-97b0-11e3-a9f6-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/8e7ec8f2-97b0-11e3-a9f6-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="77332" author="yujian" created="Wed, 19 Feb 2014 08:55:36 +0000"  >&lt;p&gt;For &quot;Expected progress update within 10 seconds&quot; failure, I checked the outputs of all of the failed tests and found that:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Updated after 9s: wanted &apos;5242880&apos; got &apos;5242880&apos;
Updated after 9s: wanted &apos;5242880&apos; got &apos;5242880&apos;
Updated after 12s: wanted &apos;5242880&apos; got &apos;5242880&apos;
Updated after 12s: wanted &apos;5242880&apos; got &apos;5242880&apos;
Updated after 11s: wanted &apos;5242880&apos; got &apos;5242880&apos;
Updated after 11s: wanted &apos;5242880&apos; got &apos;5242880&apos;
Updated after 11s: wanted &apos;5242880&apos; got &apos;5242880&apos;
Updated after 13s: wanted &apos;5242880&apos; got &apos;5242880&apos;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The current values of &quot;interval&quot; and &quot;progress_timeout&quot; in sanity-hsm test_60() are as follows:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;        local interval=5
        local progress_timeout=$((interval * 2))
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I just uploaded patches to increase the value of &quot;progress_timeout&quot;.&lt;br/&gt;
For master branch: &lt;a href=&quot;http://review.whamcloud.com/9304&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9304&lt;/a&gt;&lt;br/&gt;
For b2_5 branch: &lt;a href=&quot;http://review.whamcloud.com/9305&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9305&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="77334" author="yujian" created="Wed, 19 Feb 2014 09:56:35 +0000"  >&lt;p&gt;For &quot;Update not seen after 100s: wanted &apos;5242880&apos; got &apos;&apos;&quot; failure, I checked the outputs of all of the failed tests and found that the updating of &quot;done=&quot; values were all as follows:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Changed after 0s: from &apos;&apos; to &apos;0&apos;
Changed after 4s: from &apos;0&apos; to &apos;4194304&apos;
Changed after 9s: from &apos;4194304&apos; to &apos;13631488&apos;
Changed after 13s: from &apos;13631488&apos; to &apos;28311552&apos;
Changed after 17s: from &apos;28311552&apos; to &apos;48234496&apos;
Changed after 22s: from &apos;48234496&apos; to &apos;73400320&apos;
Changed after 26s: from &apos;73400320&apos; to &apos;103809024&apos;
Changed after 30s: from &apos;103809024&apos; to &apos;139460608&apos;
Changed after 34s: from &apos;139460608&apos; to &apos;&apos;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;In sanity-hsm test_60(), the expected value of &quot;data_moved&quot; is:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;        wait_request_progress $fid ARCHIVE 5242880
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I&apos;m confused here about how &quot;5242880&quot; was calculated because the size of archived file was 39000000 bytes:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;        dd &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt;=/dev/urandom of=$file2 count=39 bs=1000000 conv=fsync ||
                error &lt;span class=&quot;code-quote&quot;&gt;&quot;cannot create $file2&quot;&lt;/span&gt;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt; </comment>
                            <comment id="77386" author="bfaccini" created="Wed, 19 Feb 2014 18:03:32 +0000"  >&lt;p&gt;To allow for further debugging of sanity-hsm/test_60 issues, I pushed the 2 patches &lt;a href=&quot;http://review.whamcloud.com/9313&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9313&lt;/a&gt; and &lt;a href=&quot;http://review.whamcloud.com/9314&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/9314&lt;/a&gt;, respectively for master and b2_5, that will disable it from inside sanity-hsm suite itself. Then it could be re-enabled as a further patch that will fix the issues.&lt;/p&gt;

&lt;p&gt;Possible issues to be addressed then are 1) a mismatch in the way the CT reports progress and how the CDT interpret/report it in the log, 2) a too short grace_period request causing request to disappear from the log, 3) a wrong 5242880 size to be checked, &#8230;??&lt;/p&gt;</comment>
                            <comment id="77749" author="mjmac" created="Mon, 24 Feb 2014 21:12:26 +0000"  >&lt;p&gt;Hi, sorry I&apos;m just now replying. Was on vacation last week.&lt;/p&gt;

&lt;p&gt;So, I think the problem with this test is that it&apos;s too fragile. The intent of the test is to ensure that changing the copytool reporting interval via command-line arguments works. The bug that &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4512&quot; title=&quot;POSIX copytool option &amp;quot;--report&amp;quot; is non-obvious and functionality is broken&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4512&quot;&gt;&lt;del&gt;LU-4512&lt;/del&gt;&lt;/a&gt; fixed was that the --report argument wasn&apos;t parsed correctly and I believe what happened was that the interval was interpreted as milliseconds rather than seconds. In any case, it wasn&apos;t working, and it works now.&lt;/p&gt;

&lt;p&gt;I believe that this is an intermittent failure due to load variations on the test VMs. The tests in this suite artificially limit bandwidth to 1MB/sec (see line 164 in copytool_setup()) which is fine when the cluster is actually capable of 1MB/sec. When it&apos;s not, test_60 can fail because the expected value isn&apos;t seen.&lt;/p&gt;

&lt;p&gt;In hindsight, I see now that assuming the test could always count on 1MB/sec was a mistake. Also, the test is sort of testing the wrong thing. We don&apos;t really care what the value is, just that there was a progress report within the expected window. I will work on a patch to make this test more robust.&lt;/p&gt;</comment>
                            <comment id="77796" author="mjmac" created="Tue, 25 Feb 2014 11:57:11 +0000"  >&lt;p&gt;proposed improvement:&lt;br/&gt;
master: &lt;a href=&quot;http://review.whamcloud.com/#/c/9376/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9376/&lt;/a&gt;&lt;br/&gt;
b2_5: &lt;a href=&quot;http://review.whamcloud.com/#/c/9378/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9378/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="78091" author="bfaccini" created="Fri, 28 Feb 2014 13:58:47 +0000"  >&lt;p&gt;Michael, not sure if you have seen that patches #9313/#9314, respectively for master and b2_5, should land soon to disable sanity-hsm/test_60 until problem is fixed.&lt;br/&gt;
This means that when it will have landed you will need to re-enable it inside your patches #9376/#9378.&lt;/p&gt;</comment>
                            <comment id="78094" author="mjmac" created="Fri, 28 Feb 2014 14:22:32 +0000"  >&lt;p&gt;Given that my patches have +1s for code review and are working through testing, I wonder why we would land the patches to disable the test? Wouldn&apos;t it be better to just land the fixed version of the test and not bother with disabling it?&lt;/p&gt;</comment>
                            <comment id="78110" author="bfaccini" created="Fri, 28 Feb 2014 18:04:42 +0000"  >&lt;p&gt;The patches to disable the failing test is frequent way we use now to reduce the time/frequency tests sessions are impacted with the issue and then allow for enough time and no pressure during the test debugging. It is easier then to re-enable the test with and additional line/change (to modify  ALWAYS_EXCEPT setting) in the final patch/fix.&lt;/p&gt;</comment>
                            <comment id="78113" author="mjmac" created="Fri, 28 Feb 2014 18:14:02 +0000"  >&lt;p&gt;I understand, but as a hypothetical example, if the patch to disable the test and the patch to fix the test are both ready at the same time for landing, it doesn&apos;t make sense to me to disable the test and then re-enable it with two separate patches. Am I missing something?&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#/c/9376/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9376/&lt;/a&gt; has 2 +1 code-reviews, a +1 from jenkins, and a +1 from maloo. Just waiting for the gatekeeper to cherry-pick it and land it.&lt;/p&gt;</comment>
                            <comment id="78116" author="bfaccini" created="Fri, 28 Feb 2014 18:20:37 +0000"  >&lt;p&gt;I have no problem at all with that Michael, if you are sure/confident about imminent landing of #9376/#9378 just abandon #9313/#9314 in this case.&lt;/p&gt;</comment>
                            <comment id="78991" author="pjones" created="Tue, 11 Mar 2014 13:54:51 +0000"  >&lt;p&gt;Landed for 2.5.1 and 2.6&lt;/p&gt;</comment>
                            <comment id="128702" author="standan" created="Tue, 29 Sep 2015 01:33:55 +0000"  >&lt;p&gt;Encountered similar issue for interop testing for 2.7.60 tag&lt;br/&gt;
Client : master branch/ build 3194, RHEL 7&lt;br/&gt;
Server: b2_7 branch&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;5242880 bytes copied in 3 seconds.
 sanity-hsm test_60: @@@@@@ FAIL: Expected progress update after at least 5 seconds 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="128736" author="pjones" created="Tue, 29 Sep 2015 12:49:02 +0000"  >&lt;p&gt;Saurabh&lt;/p&gt;

&lt;p&gt;Given how old this issue is it would be much better to open a new ticket even if the issue does appear similar to this older one&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwfaf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>12700</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>