<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:37:53 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-10754] sanityn test 47b fails with &apos;create must fail&apos; </title>
                <link>https://jira.whamcloud.com/browse/LU-10754</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;sanityn test_47b fails for DNE/ZFS testing. From the client test_log, we see&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;== sanityn test 47b: pdirops: remote mkdir vs create ================================================= 09:50:51 (1519725051)
CMD: onyx-32vm9 lctl set_param fail_loc=0x80000145
fail_loc=0x80000145
 sanityn test_47b: @@@@@@ FAIL: create must fail 
lfs mkdir: error on LL_IOC_LMV_SETSTRIPE &apos;/mnt/lustre/f47b.sanityn&apos; (3): stripe already set
lfs setdirstripe: cannot create stripe dir &apos;/mnt/lustre/f47b.sanityn&apos;: File exists
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;This test started failing on 2018-01-19.&lt;/p&gt;

&lt;p&gt;The console and dmesg logs don&#8217;t have any more information in them for this failure than what is in the suite_log.&lt;/p&gt;

&lt;p&gt;Logs for test sessions with this failure are at&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/b9932fbe-fd24-11e7-bd00-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/b9932fbe-fd24-11e7-bd00-52540065bddc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/316d208c-1bb4-11e8-a7cd-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/316d208c-1bb4-11e8-a7cd-52540065bddc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/5ce20dce-1905-11e8-a6ad-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/5ce20dce-1905-11e8-a6ad-52540065bddc&lt;/a&gt;&lt;/p&gt;
</description>
                <environment></environment>
        <key id="51081">LU-10754</key>
            <summary>sanityn test 47b fails with &apos;create must fail&apos; </summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="pfarrell">Patrick Farrell</assignee>
                                    <reporter username="jamesanunez">James Nunez</reporter>
                        <labels>
                            <label>dne</label>
                            <label>zfs</label>
                    </labels>
                <created>Fri, 2 Mar 2018 00:22:09 +0000</created>
                <updated>Tue, 26 Oct 2021 18:49:48 +0000</updated>
                            <resolved>Wed, 22 May 2019 21:54:32 +0000</resolved>
                                    <version>Lustre 2.11.0</version>
                    <version>Lustre 2.12.2</version>
                                    <fixVersion>Lustre 2.13.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="240783" author="bzzz" created="Mon, 28 Jan 2019 04:40:23 +0000"  >&lt;p&gt;&lt;a href=&quot;https://testing.whamcloud.com/test_sessions/b9d6b421-a589-4a3c-8daa-a9e2e08e5b31&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/b9d6b421-a589-4a3c-8daa-a9e2e08e5b31&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="241111" author="pfarrell" created="Thu, 31 Jan 2019 20:27:49 +0000"  >&lt;p&gt;&lt;a href=&quot;https://testing.whamcloud.com/test_sessions/bc2f83f6-83cc-4baa-8854-25a3f29ecb0c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sessions/bc2f83f6-83cc-4baa-8854-25a3f29ecb0c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="241116" author="pfarrell" created="Thu, 31 Jan 2019 21:30:25 +0000"  >&lt;p&gt;So what I learned after looking at these logs for a bit is that the DNE2 protocol is complex and I don&apos;t understand it, but what I did learn is that we are hanging such that the lfs mkdir is not running until after the multiop has started:&lt;/p&gt;

&lt;p&gt;&amp;#8212;&lt;/p&gt;

&lt;p&gt;$LFS mkdir -i 1 $DIR1/$tfile &amp;amp;&lt;br/&gt;
 PID1=$!&lt;br/&gt;
 sleep 1&lt;br/&gt;
 multiop $DIR2/$tfile oO_CREAT:O_EXCL:c &amp;amp;&amp;amp; error &quot;create must fail&quot;&lt;/p&gt;

&lt;p&gt;&amp;#8212;&lt;/p&gt;

&lt;p&gt;The lfs mkdir hangs waiting on a lock (not the intended PDO lock, I think - much earlier...?).&#160; The sequence of events is a bit beyond me right now, but it involves cancelling locks on MDT1, which are presumably held because of previous tests.&#160; Somewhere in there, something gets stuck, and the lock cancellation does not complete (on MDT0) until after the create operation has started, hence the mkdir failing with EEXIST.&lt;/p&gt;</comment>
                            <comment id="241301" author="pfarrell" created="Mon, 4 Feb 2019 16:48:55 +0000"  >&lt;p&gt;I can&apos;t prove it, but I have a guess on this.&lt;/p&gt;



&lt;p&gt;The lock it&apos;s waiting for was created by/during a previous test, and I can&apos;t prove it, but I think there&apos;s some uncommitted stuff under it that&apos;s getting sync&apos;ed.&#160; In that case, &apos;sleep 1&apos; simply isn&apos;t long enough for that to happen due to our lack of the ZIL and the long commit intervals on ZFS (once per second).&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;I&apos;m going to push a patch to change the sleeps to 2 seconds.&lt;/p&gt;</comment>
                            <comment id="241308" author="pfarrell" created="Mon, 4 Feb 2019 16:58:07 +0000"  >&lt;p&gt;Discussed with &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=bzzz&quot; class=&quot;user-hover&quot; rel=&quot;bzzz&quot;&gt;bzzz&lt;/a&gt;, he&apos;s going to take a look and see what we can do, maybe we can do better than just increasing the sleep.&#160; (Which isn&apos;t 100% reliable, but we also want to avoid syncs...)&lt;/p&gt;</comment>
                            <comment id="246989" author="adilger" created="Fri, 10 May 2019 20:07:13 +0000"  >&lt;p&gt;What about just cancelling all of the MDC locks at the start of the test, so that we aren&apos;t waiting on that in the middle of the test?  That avoids making the sleep to be long enough for the 1% case, without wasting time to make it too long for 99% of runs.  We still need a small sleep due to fork/exec, but that can be &lt;tt&gt;sleep 0.2&lt;/tt&gt; or similar.&lt;/p&gt;</comment>
                            <comment id="246990" author="pfarrell" created="Fri, 10 May 2019 20:11:36 +0000"  >&lt;p&gt;That makes a &lt;b&gt;lot&lt;/b&gt; more sense than my suggestion.&lt;/p&gt;

&lt;p&gt;Andreas, has this failed recently?&#160; I ignored it because I saw it a few times, then hadn&apos;t seen it since.&lt;/p&gt;</comment>
                            <comment id="246991" author="adilger" created="Fri, 10 May 2019 20:16:34 +0000"  >&lt;p&gt;PS: I post here because it seems like there has been an uptick in the frequency of test failures:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;2019-05-10: 4 failures already today&lt;/li&gt;
	&lt;li&gt;2019-05-09: 4 failures the week ending on&lt;/li&gt;
	&lt;li&gt;2019-05-02: 2 failures that week&lt;/li&gt;
	&lt;li&gt;2019-04-25: 2 failures that week&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;There were a number of patches landed on 2019-05-08, so it is likely one of them is involved (possibly just increasing the size of the race window by making some piece of code/test slower.&lt;/p&gt;</comment>
                            <comment id="246992" author="pfarrell" created="Fri, 10 May 2019 20:25:17 +0000"  >&lt;p&gt;Presumably:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://review.whamcloud.com/4392&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/4392&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Which modifies these &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/smile.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;</comment>
                            <comment id="246994" author="adilger" created="Fri, 10 May 2019 20:29:06 +0000"  >&lt;p&gt;It looks like the recent uptick in failures is caused by the landing on 2019-05-10 of patch &lt;a href=&quot;https://review.whamcloud.com/4392&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/4392&lt;/a&gt; &quot;&lt;tt&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2233&quot; title=&quot;sanityn/40-46 takes ~800-900s&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2233&quot;&gt;&lt;del&gt;LU-2233&lt;/del&gt;&lt;/a&gt; tests: improve tests sanityn/40-47&lt;/tt&gt;&quot;, which reduced the &quot;sleep&quot; time to 0.2s, which increased the probability of other lock&lt;br/&gt;
operations still being blocked when this test was run.&lt;/p&gt;</comment>
                            <comment id="246995" author="gerrit" created="Fri, 10 May 2019 20:37:02 +0000"  >&lt;p&gt;Patrick Farrell (pfarrell@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/34848&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34848&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10754&quot; title=&quot;sanityn test 47b fails with &amp;#39;create must fail&amp;#39; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10754&quot;&gt;&lt;del&gt;LU-10754&lt;/del&gt;&lt;/a&gt; tests: Clear mdc locks before tests&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 1a4b819075e2936ed9dc5d4b3600a7be37270c8a&lt;/p&gt;</comment>
                            <comment id="247036" author="gerrit" created="Sun, 12 May 2019 01:28:12 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/34848/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34848/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10754&quot; title=&quot;sanityn test 47b fails with &amp;#39;create must fail&amp;#39; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10754&quot;&gt;&lt;del&gt;LU-10754&lt;/del&gt;&lt;/a&gt; tests: Clear mdc locks before tests&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 26a7abe812834663a6d42ba0d04606650c71cb9f&lt;/p&gt;</comment>
                            <comment id="247070" author="gerrit" created="Mon, 13 May 2019 17:51:17 +0000"  >&lt;p&gt;Alex Zhuravlev (bzzz@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/34853&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34853&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10754&quot; title=&quot;sanityn test 47b fails with &amp;#39;create must fail&amp;#39; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10754&quot;&gt;&lt;del&gt;LU-10754&lt;/del&gt;&lt;/a&gt; debug: try to sleep for 1s&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 24860d1c1948a2a7ccec0be8eb1ea8ee78e63cb6&lt;/p&gt;</comment>
                            <comment id="247546" author="simmonsja" created="Wed, 22 May 2019 19:26:33 +0000"  >&lt;p&gt;This is causing about 1/3 of each test suite runs to fail.&lt;/p&gt;</comment>
                            <comment id="247547" author="gerrit" created="Wed, 22 May 2019 20:00:34 +0000"  >&lt;p&gt;Andreas Dilger (adilger@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/34853/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/34853/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10754&quot; title=&quot;sanityn test 47b fails with &amp;#39;create must fail&amp;#39; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10754&quot;&gt;&lt;del&gt;LU-10754&lt;/del&gt;&lt;/a&gt; tests: sanityn/47b to sleep for 1s&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 85ac0da36748311528aa7b89f62679f087b7445c&lt;/p&gt;</comment>
                            <comment id="250454" author="gerrit" created="Mon, 1 Jul 2019 21:13:29 +0000"  >&lt;p&gt;Patrick Farrell (pfarrell@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/35399&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/35399&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10754&quot; title=&quot;sanityn test 47b fails with &amp;#39;create must fail&amp;#39; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10754&quot;&gt;&lt;del&gt;LU-10754&lt;/del&gt;&lt;/a&gt; tests: Debug full testing failure&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: c6779689cbcd658b600933425db1d5365b3a3337&lt;/p&gt;</comment>
                            <comment id="250859" author="pfarrell" created="Mon, 8 Jul 2019 21:10:56 +0000"  >&lt;p&gt;OK, figured it out:&lt;br/&gt;
&lt;a href=&quot;https://review.whamcloud.com/#/c/4392/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/4392/&lt;/a&gt;&lt;br/&gt;
breaks interop.&lt;/p&gt;

&lt;p&gt;Wondering if we can just land it to b2_12?&lt;/p&gt;</comment>
                            <comment id="250868" author="adilger" created="Tue, 9 Jul 2019 00:20:37 +0000"  >&lt;blockquote&gt;
&lt;p&gt;Wondering if we can just land it to b2_12?&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;If this patch is landed to b2_12, will it then cause interop testing problems for 2.12 vs. 2.10?  Not that I&apos;m totally against this, since we run far more testing on master than b2_12, but ideally we should just add a version interop check, or just skip this test for interop testing on b2_12 so that there aren&apos;t gratuitous errors that need to be looked at.&lt;/p&gt;</comment>
                            <comment id="252623" author="yujian" created="Tue, 6 Aug 2019 18:27:59 +0000"  >&lt;p&gt;The failure still occurred on master branch:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/c376ce50-b86c-11e9-b753-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/c376ce50-b86c-11e9-b753-52540065bddc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/3e5c96d8-b7c6-11e9-b88c-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/3e5c96d8-b7c6-11e9-b88c-52540065bddc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/29aab538-b66a-11e9-b88c-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/29aab538-b66a-11e9-b88c-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="252627" author="pfarrell" created="Tue, 6 Aug 2019 18:41:09 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=yujian&quot; class=&quot;user-hover&quot; rel=&quot;yujian&quot;&gt;yujian&lt;/a&gt;,&lt;/p&gt;

&lt;p&gt;There are a number of bugs for this, unfortunately.&#160; Here&apos;s the one we&apos;re using now, which has an unlanded fix: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12470&quot; title=&quot;sanityn test_47b: create isn&amp;#39;t blocked&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12470&quot;&gt;&lt;del&gt;LU-12470&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="252646" author="yujian" created="Wed, 7 Aug 2019 03:49:22 +0000"  >&lt;p&gt;Thank you, Patrick.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="57689">LU-13097</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="56188">LU-12470</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="16431">LU-2233</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzztnj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>