<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:47:41 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-11873] sanity test_801a: FAIL: (2) unexpected barrier status &apos;expired&apos;</title>
                <link>https://jira.whamcloud.com/browse/LU-11873</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;sanity test 801a failed in review-ldiskfs test session on master branch as follows:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;trevis-34vm4: Fail to freeze barrier for lustre: Timer expired
CMD: trevis-34vm4 /usr/sbin/lctl get_param -n version 2&amp;gt;/dev/null ||
				/usr/sbin/lctl lustre_build_version 2&amp;gt;/dev/null ||
				/usr/sbin/lctl --version 2&amp;gt;/dev/null | cut -d&apos; &apos; -f2
CMD: trevis-34vm4 /usr/sbin/lctl barrier_stat -s lustre
 sanity test_801a: @@@@@@ FAIL: (2) unexpected barrier status &apos;expired&apos; 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Maloo reports:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/f49fde42-1b26-11e9-8388-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/f49fde42-1b26-11e9-8388-52540065bddc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/900f4ab4-1ac0-11e9-8388-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/900f4ab4-1ac0-11e9-8388-52540065bddc&lt;/a&gt;&lt;/p&gt;</description>
                <environment></environment>
        <key id="54605">LU-11873</key>
            <summary>sanity test_801a: FAIL: (2) unexpected barrier status &apos;expired&apos;</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="pfarrell">Patrick Farrell</assignee>
                                    <reporter username="yujian">Jian Yu</reporter>
                        <labels>
                    </labels>
                <created>Fri, 18 Jan 2019 16:41:04 +0000</created>
                <updated>Thu, 12 Sep 2019 03:59:41 +0000</updated>
                            <resolved>Wed, 21 Aug 2019 12:03:52 +0000</resolved>
                                    <version>Lustre 2.13.0</version>
                    <version>Lustre 2.12.1</version>
                                    <fixVersion>Lustre 2.13.0</fixVersion>
                    <fixVersion>Lustre 2.12.3</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="244318" author="mdiep" created="Wed, 20 Mar 2019 15:44:36 +0000"  >&lt;p&gt;+1 on b2_12 &lt;a href=&quot;https://testing.whamcloud.com/test_sets/e298fc1e-4ad4-11e9-92fe-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/e298fc1e-4ad4-11e9-92fe-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="246235" author="bzzz" created="Tue, 23 Apr 2019 18:46:28 +0000"  >&lt;p&gt;I see this issue with master on a local setup very frequently.&lt;/p&gt;</comment>
                            <comment id="249972" author="hornc" created="Tue, 25 Jun 2019 15:02:57 +0000"  >&lt;p&gt;+1 on master &lt;a href=&quot;https://testing.whamcloud.com/test_sets/9d49e0f0-9756-11e9-8262-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/9d49e0f0-9756-11e9-8262-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="250268" author="pfarrell" created="Fri, 28 Jun 2019 15:31:09 +0000"  >&lt;p&gt;Logs from&#160;&lt;a href=&quot;https://testing.whamcloud.com/test_sets/c432303a-9988-11e9-af8b-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/c432303a-9988-11e9-af8b-52540065bddc&lt;/a&gt;&lt;br/&gt;
(&lt;a href=&quot;https://review.whamcloud.com/#/c/35352/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/35352/&lt;/a&gt;)&lt;/p&gt;

&lt;p&gt;00000004:00080000:0.0:1561700209.521557:0:11001:0:(osp_sync.c:1615:osp_sync_add_commit_cb()) lustre-OST0000-osc-MDT0001: add commit cb at 12164268321642ns, next at 11075831108412ns, rc = 0&lt;/p&gt;


&lt;p&gt;12164268321642-11075831108412&lt;/p&gt;

&lt;p&gt;On the node where the barrier command is being done, we start setting the barrier:&lt;br/&gt;
20000000:00000001:0.0:1561700207.130890:0:28927:0:(mgs_barrier.c:306:mgs_barrier_freeze()) Process entered&lt;br/&gt;
20000000:00000001:0.0:1561700209.536789:0:28927:0:(mgs_barrier.c:128:mgs_barrier_glimpse_lock()) Process entered&lt;/p&gt;

&lt;p&gt;And it doesn&apos;t complete until almost 10 seconds later:&lt;br/&gt;
20000000:00000001:1.0:1561700218.029393:0:28927:0:(mgs_barrier.c:205:mgs_barrier_glimpse_lock()) Process leaving via out (rc=0 : 0 : 0x0)&lt;br/&gt;
By which time our barrier has expired.&lt;/p&gt;

&lt;p&gt;Looking at the glimpses, this one:&lt;br/&gt;
00000100:00000040:0.0:1561700209.537058:0:28927:0:(lustre_net.h:2491:ptlrpc_rqphase_move()) @@@ move req &quot;New&quot; &lt;del&gt;&amp;gt; &quot;Rpc&quot; req@ffff93ba50f0cd80 x1637548785242880/t0(0) o106&lt;/del&gt;&amp;gt;MGS@10.2.8.58@tcp:15/16 lens 368/224 e 0 to 0 dl 0 ref 1 fl New:QU/0/ffffffff rc 0/-1&lt;br/&gt;
is sent normally, nice and quick...&lt;/p&gt;

&lt;p&gt;But it doesn&apos;t finish until:&lt;br/&gt;
00000100:00000200:1.0:1561700218.029267:0:28927:0:(events.c:93:reply_in_callback()) @@@ type 6, status 0 req@ffff93ba50f0cd80 x1637548785242880/t0(0) o106-&amp;gt;MGS@10.2.8.58@tcp:15/16 lens 368/224 e 1 to 0 dl 1561700241 ref 1 fl Rpc:RQ/0/ffffffff rc 0/-1&lt;/p&gt;

&lt;p&gt;Much later, after the barrier has expired.&lt;/p&gt;

&lt;p&gt;Looking at the node where this glimpse was sent, we can see it arriving, and then generating a sync operation as part of turning on the barrier:&lt;br/&gt;
00000100:00000200:1.0:1561700209.521162:0:24526:0:(service.c:2234:ptlrpc_server_handle_request()) got req 1637548785242880&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;...&amp;#93;&lt;/span&gt;&lt;br/&gt;
00000004:00080000:1.0:1561700209.521565:0:24526:0:(osp_sync.c:1615:osp_sync_add_commit_cb()) lustre-OST0004-osc-MDT0003: add commit cb at 12164268329849ns, next at 12152647346967ns, rc = 0&lt;br/&gt;
12164268329849ns-12152647346967ns --&amp;gt; ~11 seconds&lt;/p&gt;

&lt;p&gt;And we see:&lt;br/&gt;
00080000:00000020:1.0:1561700209.521610:0:24526:0:(osd_handler.c:658:osd_sync()) syncing OSD osd-zfs&lt;/p&gt;

&lt;p&gt;And then, after a nice long wait:&lt;br/&gt;
00080000:00000020:0.0:1561700218.012522:0:24526:0:(osd_handler.c:660:osd_sync()) synced OSD osd-zfs&lt;br/&gt;
00000004:00000001:0.0:1561700218.012525:0:24526:0:(lod_dev.c:1583:lod_sync()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00200000:40000000:0.0:1561700218.012528:0:24526:0:(barrier.c:221:barrier_freeze()) lustre-MDT0003-osd: barrier freezing phase1 done.&lt;br/&gt;
00200000:00000001:0.0:1561700218.012530:0:24526:0:(barrier.c:226:barrier_freeze()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00200000:00000001:0.0:1561700218.012531:0:24526:0:(barrier.c:344:barrier_handler()) Process leaving via fini (rc=0 : 0 : 0x0)&lt;/p&gt;

&lt;p&gt;And then (finally) we reply.&lt;/p&gt;

&lt;p&gt;So the issue is the ZFS sync interval being around 10 seconds, which is the same as the length of our barrier. So if we get unlucky, we&apos;ll overrun it.&lt;/p&gt;

&lt;p&gt;It&apos;s probably enough to change the barrier length to 15 seconds.&lt;/p&gt;</comment>
                            <comment id="250269" author="gerrit" created="Fri, 28 Jun 2019 15:35:25 +0000"  >&lt;p&gt;Patrick Farrell (pfarrell@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/35361&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/35361&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11873&quot; title=&quot;sanity test_801a: FAIL: (2) unexpected barrier status &amp;#39;expired&amp;#39;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11873&quot;&gt;&lt;del&gt;LU-11873&lt;/del&gt;&lt;/a&gt; tests: Increase barrier freeze time&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: f34ecbc3b15c6897be545f8241bf0f626997f183&lt;/p&gt;</comment>
                            <comment id="253347" author="gerrit" created="Wed, 21 Aug 2019 04:58:56 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/35361/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/35361/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11873&quot; title=&quot;sanity test_801a: FAIL: (2) unexpected barrier status &amp;#39;expired&amp;#39;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11873&quot;&gt;&lt;del&gt;LU-11873&lt;/del&gt;&lt;/a&gt; tests: Increase barrier freeze time&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 96771280b330af07781326ff8811facd1ca39deb&lt;/p&gt;</comment>
                            <comment id="253373" author="pjones" created="Wed, 21 Aug 2019 12:03:52 +0000"  >&lt;p&gt;Landed for 2.13&lt;/p&gt;</comment>
                            <comment id="253764" author="gerrit" created="Wed, 28 Aug 2019 15:42:52 +0000"  >&lt;p&gt;Minh Diep (mdiep@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/35952&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/35952&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11873&quot; title=&quot;sanity test_801a: FAIL: (2) unexpected barrier status &amp;#39;expired&amp;#39;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11873&quot;&gt;&lt;del&gt;LU-11873&lt;/del&gt;&lt;/a&gt; tests: Increase barrier freeze time&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: b35866abc21a854e75cde15a6562ee2004c64b20&lt;/p&gt;</comment>
                            <comment id="254575" author="gerrit" created="Thu, 12 Sep 2019 03:47:42 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/35952/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/35952/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11873&quot; title=&quot;sanity test_801a: FAIL: (2) unexpected barrier status &amp;#39;expired&amp;#39;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11873&quot;&gt;&lt;del&gt;LU-11873&lt;/del&gt;&lt;/a&gt; tests: Increase barrier freeze time&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 3a164a8d81395092add1426549821159db2e33a7&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i009xj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>