<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:36:16 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-10570] sanity test_27y: Error:  &apos;Of 2 OSTs, only 1 is available&apos;</title>
                <link>https://jira.whamcloud.com/browse/LU-10570</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;== sanity test 27y: create files while OST0 is degraded and the rest inactive ======================== 06:05:43 (1516946743)&lt;br/&gt;
CMD: onyx-37vm9 lctl get_param -n osc.lustre-OST0000-osc-MDT0000.prealloc_last_id&lt;br/&gt;
CMD: onyx-37vm9 lctl get_param -n osc.lustre-OST0000-osc-MDT0000.prealloc_next_id&lt;br/&gt;
CMD: onyx-37vm9 lctl dl&lt;br/&gt;
lustre-OST0001-osc-MDT0000 is Deactivated:&lt;br/&gt;
CMD: onyx-37vm9 lctl --device %lustre-OST0001-osc-MDT0000 deactivate&lt;br/&gt;
lustre-OST0000 is degraded:&lt;br/&gt;
CMD: onyx-37vm8 lctl set_param -n obdfilter.lustre-OST0000.degraded=1&lt;br/&gt;
CMD: onyx-37vm9 lctl get_param -n lov.*.qos_maxage&lt;br/&gt;
total: 2 open/close in 0.00 seconds: 437.82 ops/second&lt;br/&gt;
lustre-OST0000 is recovered from degraded:&lt;br/&gt;
CMD: onyx-37vm8 lctl set_param -n obdfilter.lustre-OST0000.degraded=0&lt;br/&gt;
CMD: onyx-37vm9 lctl --device %lustre-OST0001-osc-MDT0000 activate&lt;br/&gt;
CMD: onyx-37vm9 lctl get_param -n lov.*.qos_maxage&lt;br/&gt;
 sanity test_27y: @@@@@@ FAIL: Of 2 OSTs, only 1 is available &lt;br/&gt;
  Trace dump:&lt;br/&gt;
  = /usr/lib64/lustre/tests/test-framework.sh:5336:error()&lt;br/&gt;
  = /usr/lib64/lustre/tests/sanity.sh:1840:test_27y()&lt;br/&gt;
  = /usr/lib64/lustre/tests/test-framework.sh:5612:run_one()&lt;br/&gt;
  = /usr/lib64/lustre/tests/test-framework.sh:5651:run_one_logged()&lt;br/&gt;
  = /usr/lib64/lustre/tests/test-framework.sh:5498:run_test()&lt;br/&gt;
  = /usr/lib64/lustre/tests/sanity.sh:1843:main()&lt;/p&gt;

&lt;p&gt;This issue was created by maloo for Jinshan Xiong &amp;lt;jinshan.xiong@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;/p&gt;

&lt;p&gt;&amp;lt;&amp;lt;Please provide additional information about the failure here&amp;gt;&amp;gt;&lt;/p&gt;</description>
                <environment></environment>
        <key id="50410">LU-10570</key>
            <summary>sanity test_27y: Error:  &apos;Of 2 OSTs, only 1 is available&apos;</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="simmonsja">James A Simmons</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Fri, 26 Jan 2018 18:34:37 +0000</created>
                <updated>Tue, 27 Feb 2018 04:35:17 +0000</updated>
                            <resolved>Tue, 27 Feb 2018 04:35:17 +0000</resolved>
                                    <version>Lustre 2.11.0</version>
                                    <fixVersion>Lustre 2.11.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>11</watches>
                                                                            <comments>
                            <comment id="219282" author="jay" created="Fri, 26 Jan 2018 18:35:30 +0000"  >&lt;p&gt;This issue should be introduced by a recent commit because I have never seen it before&lt;/p&gt;</comment>
                            <comment id="219366" author="jamesanunez" created="Mon, 29 Jan 2018 18:19:46 +0000"  >&lt;p&gt;We&apos;ve seen this test fail four times since January 26, 2017 and all of those failures are in review-zfs. Here are links to some of the logs for these failures:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/7e9510c2-050c-11e8-a10a-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/7e9510c2-050c-11e8-a10a-52540065bddc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/4ba8b3e8-0486-11e8-a7cd-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/4ba8b3e8-0486-11e8-a7cd-52540065bddc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/de800ac0-02da-11e8-a6ad-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/de800ac0-02da-11e8-a6ad-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="219401" author="jhammond" created="Mon, 29 Jan 2018 23:43:50 +0000"  >&lt;p&gt;Bisection shows that this was introduced by &lt;a href=&quot;https://review.whamcloud.com/30867&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/30867&lt;/a&gt; &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9019&quot; title=&quot;Migrate lustre to standard 64 bit time kernel API&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9019&quot;&gt;&lt;del&gt;LU-9019&lt;/del&gt;&lt;/a&gt; libcfs: remove cfs_time_XXX_64 wrappers.&lt;/p&gt;</comment>
                            <comment id="219402" author="pjones" created="Tue, 30 Jan 2018 00:20:35 +0000"  >&lt;p&gt;James&lt;/p&gt;

&lt;p&gt;Should we revert this change or is it possible to easily fix this issue?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="219440" author="simmonsja" created="Tue, 30 Jan 2018 16:13:56 +0000"  >&lt;p&gt;It should easy to fix. I have a feeling its one of those using seconds resolution is not good enough. I bet only seen on VMs.&lt;/p&gt;</comment>
                            <comment id="219478" author="simmonsja" created="Tue, 30 Jan 2018 22:20:40 +0000"  >&lt;p&gt;Can&apos;t reproduce on ldiskfs locally. Attempted to bring up ZFS for test suite but I have run into some issues with setup. I get device is to small. smaller than 64MB issues. Looking I don&apos;t see any docs on how to setup the test suite with ZFS. Any pointers? Can anyone reproduce this easily?&lt;/p&gt;</comment>
                            <comment id="219481" author="jay" created="Tue, 30 Jan 2018 22:37:15 +0000"  >&lt;p&gt;Saw the same issue before. From my side, it worked after I just used dd to write a bigger Lustre target files of &lt;tt&gt;/tmp/lustre-{ost,mdt}X&lt;/tt&gt;, like:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;dd &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt;=/dev/zero of=/tmp/lustre-mdt1 bs=1M count=1 seek=8191
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="219526" author="jhammond" created="Wed, 31 Jan 2018 13:28:34 +0000"  >&lt;p&gt;James,&lt;/p&gt;

&lt;p&gt;I was able to reproduce this using a llmount.sh filesystem with FSTYPE=ldiskfs on a single RHEL 7.4 VM. It didn&apos;t fail every time but it would consistently fail within a minute when I ran &lt;tt&gt;while ONLY=27y bash lustre/tests/sanity.sh; do true; done&lt;/tt&gt;.&lt;/p&gt;

&lt;p&gt;For the FSTYPE=zfs issue, see &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10424&quot; title=&quot;FSTYPE=zfs bash lustre/tests/llmount.sh fails unless /tmp/lustre-{mdt1,ost1,ost2} all exist and have size at least 64M&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10424&quot;&gt;LU-10424&lt;/a&gt;.&lt;/p&gt;
</comment>
                            <comment id="219678" author="simmonsja" created="Thu, 1 Feb 2018 16:36:43 +0000"  >&lt;p&gt;I have a patch cooked up. Will push after I&apos;m done testing.&lt;/p&gt;</comment>
                            <comment id="219737" author="gerrit" created="Thu, 1 Feb 2018 19:20:59 +0000"  >&lt;p&gt;James Simmons (uja.ornl@yahoo.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/31127&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/31127&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10570&quot; title=&quot;sanity test_27y: Error:  &amp;#39;Of 2 OSTs, only 1 is available&amp;#39;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10570&quot;&gt;&lt;del&gt;LU-10570&lt;/del&gt;&lt;/a&gt; obd: use ktime_t for statfs handling&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: e4441d88f18882bf3b1441ddf02ea59f843ca207&lt;/p&gt;</comment>
                            <comment id="219742" author="jay" created="Thu, 1 Feb 2018 19:26:23 +0000"  >&lt;p&gt;James - can you explain why ktime_t change would cause the problem?&lt;/p&gt;</comment>
                            <comment id="219910" author="gerrit" created="Fri, 2 Feb 2018 21:12:17 +0000"  >&lt;p&gt;James Simmons (uja.ornl@yahoo.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/31158&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/31158&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10570&quot; title=&quot;sanity test_27y: Error:  &amp;#39;Of 2 OSTs, only 1 is available&amp;#39;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10570&quot;&gt;&lt;del&gt;LU-10570&lt;/del&gt;&lt;/a&gt; obd: fix statfs handling&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: b6cd8307c488cffe2bbd5819b42583ab340610f7&lt;/p&gt;</comment>
                            <comment id="219915" author="simmonsja" created="Fri, 2 Feb 2018 22:37:26 +0000"  >&lt;p&gt;So in the original code handling the stat refreshing was using 64 bit jiffies which means we usually have time resolution in the milliseconds. Because it was the 64 bit version of jiffies didn&apos;t mean it has better time resolution like ktime_t does. Looking at the code it feels natural to use seconds resolution since OBD_STATFS_CACHE_SECONDS is one second and the qos max_age is also in seconds which is why I moved in that direction. Also the comments above obd_statfs() pointed to a more second resolution approach.&#160; What is causing the pain is that in&#160;lod_qos_statfs_updates() we test twice if the stats need to be refreshed due to the millisecond resolution. One before lq_rw_sem is taken and then again after taking the semaphore. For the case of using jiffies level resolution the chances that condition one is false and condition two is true is pretty slim. When the code moved to using time64_t that change greatly increased.&lt;/p&gt;

&lt;p&gt;So I have approach this problem in two ways. First one was to move to ktime_t and maintain the original behavior of the code. The second was to remove the second test for the need to refresh the cache which seems to work. I have both options posted seen their might be other behavior&#160; changes with the removal of the second stale stats test in&#160;lod_qos_statfs_updates(). We can ponder which is the better approach.&lt;/p&gt;</comment>
                            <comment id="220436" author="yujian" created="Thu, 8 Feb 2018 17:19:52 +0000"  >&lt;p&gt;+1 on master branch:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/28ef2abe-0ccc-11e8-a7cd-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/28ef2abe-0ccc-11e8-a7cd-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="220499" author="simmonsja" created="Thu, 8 Feb 2018 21:02:31 +0000"  >&lt;p&gt;I have a fix at&#160;&lt;a href=&quot;https://review.whamcloud.com/#/c/31158/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/31158/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="220578" author="bogl" created="Fri, 9 Feb 2018 15:23:10 +0000"  >&lt;p&gt;more on master:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/497dd194-0d2c-11e8-bd00-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/497dd194-0d2c-11e8-bd00-52540065bddc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/2c909e06-0dbb-11e8-bd00-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/2c909e06-0dbb-11e8-bd00-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="221479" author="bogl" created="Thu, 22 Feb 2018 17:19:20 +0000"  >&lt;p&gt;more on master:&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/9ff21932-17f3-11e8-a7cd-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/9ff21932-17f3-11e8-a7cd-52540065bddc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/9ea2f9ec-1808-11e8-a10a-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/9ea2f9ec-1808-11e8-a10a-52540065bddc&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/7bcf5f50-1817-11e8-bd00-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/7bcf5f50-1817-11e8-bd00-52540065bddc&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="221481" author="simmonsja" created="Thu, 22 Feb 2018 17:28:51 +0000"  >&lt;p&gt;Patch is in master-next so the fix should land soon.&lt;/p&gt;</comment>
                            <comment id="221758" author="gerrit" created="Tue, 27 Feb 2018 03:46:40 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/31158/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/31158/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10570&quot; title=&quot;sanity test_27y: Error:  &amp;#39;Of 2 OSTs, only 1 is available&amp;#39;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10570&quot;&gt;&lt;del&gt;LU-10570&lt;/del&gt;&lt;/a&gt; obd: fix statfs handling&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 87577f4988c1814dae1a1274880e20f1991e7b94&lt;/p&gt;</comment>
                            <comment id="221782" author="pjones" created="Tue, 27 Feb 2018 04:35:17 +0000"  >&lt;p&gt;Landed for 2.11&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="42994">LU-9019</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="50429">LU-10576</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzrsn:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>