<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:50:57 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5375] Failure on test suite sanity test_151 test_156: roc_hit is not safe to use</title>
                <link>https://jira.whamcloud.com/browse/LU-5375</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for sarah &amp;lt;sarah@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/97672104-0dca-11e4-b3f5-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/97672104-0dca-11e4-b3f5-5254006e85c2&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_151 failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;roc_hit is not safe to use&lt;/p&gt;&lt;/blockquote&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;== sanity test 151: test cache on oss and controls ================================= 19:31:03 (1405477863)
CMD: onyx-40vm8 /usr/sbin/lctl get_param -n obdfilter.lustre-OST*.read_cache_enable 		osd-*.lustre-OST*.read_cache_enable 2&amp;gt;&amp;amp;1
CMD: onyx-40vm8 /usr/sbin/lctl get_param -n obdfilter.lustre-OST*.read_cache_enable 		osd-*.lustre-OST*.read_cache_enable 2&amp;gt;&amp;amp;1
CMD: onyx-40vm8 /usr/sbin/lctl set_param -n obdfilter.lustre-OST*.writethrough_cache_enable=1 		osd-*.lustre-OST*.writethrough_cache_enable=1 2&amp;gt;&amp;amp;1
CMD: onyx-40vm8 /usr/sbin/lctl get_param -n obdfilter.lustre-OST*.writethrough_cache_enable 		osd-*.lustre-OST*.writethrough_cache_enable 2&amp;gt;&amp;amp;1
4+0 records in
4+0 records out
16384 bytes (16 kB) copied, 0.00947514 s, 1.7 MB/s
CMD: onyx-40vm8 /usr/sbin/lctl get_param -n obdfilter.*OST*0000.stats 		osd-*.*OST*0000.stats 2&amp;gt;&amp;amp;1
CMD: onyx-40vm8 /usr/sbin/lctl get_param -n obdfilter.*OST*0000.stats 		osd-*.*OST*0000.stats 2&amp;gt;&amp;amp;1
BEFORE:11 AFTER:12
 sanity test_151: @@@@@@ FAIL: roc_hit is not safe to use 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>client and server: lustre-b2_6-rc2 ldiskfs &lt;br/&gt;
client is SLES11 SP3</environment>
        <key id="25657">LU-5375</key>
            <summary>Failure on test suite sanity test_151 test_156: roc_hit is not safe to use</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Sat, 19 Jul 2014 00:22:45 +0000</created>
                <updated>Sun, 27 Jan 2019 21:10:56 +0000</updated>
                            <resolved>Wed, 12 Sep 2018 00:28:57 +0000</resolved>
                                    <version>Lustre 2.6.0</version>
                    <version>Lustre 2.8.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>9</watches>
                                                                            <comments>
                            <comment id="89651" author="green" created="Mon, 21 Jul 2014 17:51:15 +0000"  >&lt;p&gt;So this failing code was added as part of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2902&quot; title=&quot;sanity test_156: NOT IN CACHE: before: , after: &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2902&quot;&gt;&lt;del&gt;LU-2902&lt;/del&gt;&lt;/a&gt; so whoever is going to look at it, you might want to look there too.&lt;/p&gt;</comment>
                            <comment id="89919" author="sarah" created="Thu, 24 Jul 2014 02:24:07 +0000"  >&lt;p&gt;Also seen this during rolling upgrade from 2.5 ldiskfs to 2.6.  &lt;br/&gt;
After MDS and OSS were upgraded to 2.6, both clients keep 2.5 and then run the sanity test_151 failed as the same error.&lt;/p&gt;

&lt;p&gt;before upgrade: 2.5.2&lt;br/&gt;
after upgrade: b2_6-rc2&lt;/p&gt;</comment>
                            <comment id="116578" author="adilger" created="Wed, 27 May 2015 18:21:23 +0000"  >&lt;p&gt;Haven&apos;t seen this in a long time.&lt;/p&gt;</comment>
                            <comment id="120200" author="sarah" created="Thu, 2 Jul 2015 18:49:18 +0000"  >&lt;p&gt;hit this again in interop testing with lustre-master server(EL7) and 2.5.3 client&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/e006681c-1250-11e5-bec9-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/e006681c-1250-11e5-bec9-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="129655" author="adilger" created="Wed, 7 Oct 2015 01:54:35 +0000"  >&lt;p&gt;This is failing between 0-5 times per day, maybe twice per day on average.  It looks like most of these recent failures (excluding those attributable to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5030&quot; title=&quot;&amp;quot;lctl {get,set}_param&amp;quot; should also check in /sys/fs/{lnet,lustre}&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5030&quot;&gt;&lt;del&gt;LU-5030&lt;/del&gt;&lt;/a&gt; breaking /proc access completely) are of the form:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;BEFORE:18720 AFTER:18721
 sanity test_151: @@@@@@ FAIL: roc_hit is not safe to use 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;so the before/after values are only off by one.  I suspect this is just a problem with the test script - the &lt;tt&gt;roc_hit_init()&lt;/tt&gt; function is just using &lt;tt&gt;cat $DIR/$tfile&lt;/tt&gt; to read the file and with proper readahead of files smaller than &lt;tt&gt;max_readahead_whole&lt;/tt&gt; it should only do a single read.  So &lt;tt&gt;roc_hit_init()&lt;/tt&gt; should be changed to use something like:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (( AFTER - BEFORE == 0 || AFTER - BEFORE &amp;gt; 4)); then
                        rm -rf $dir
                        error &lt;span class=&quot;code-quote&quot;&gt;&quot;roc_hit is not safe to use: BEFORE=$BEFORE, AFTER=$AFTER&quot;&lt;/span&gt;
                fi
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The &lt;tt&gt;rm -rf $dir&lt;/tt&gt; at the end of &lt;tt&gt;roc_hit_init()&lt;/tt&gt; should also be changed to just use &lt;tt&gt;rmdir $dir&lt;/tt&gt; since this directory should be empty at this point because $file is deleted for each loop.&lt;/p&gt;</comment>
                            <comment id="136071" author="sarah" created="Fri, 11 Dec 2015 18:25:04 +0000"  >&lt;p&gt;also hit this issue after rolling downgrade from master/3264 RHEL6.7  to 2.5.5 RHEL6.6&lt;/p&gt;</comment>
                            <comment id="136392" author="standan" created="Tue, 15 Dec 2015 18:43:40 +0000"  >&lt;p&gt;Encountered another instance for Interop master&amp;lt;-&amp;gt;2.5.5&lt;br/&gt;
Server: Master, Build# 3266, Tag 2.7.64 &lt;br/&gt;
Client: 2.5.5, b2_5_fe/62&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/ac332386-9fcc-11e5-a33d-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/ac332386-9fcc-11e5-a33d-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="136489" author="standan" created="Wed, 16 Dec 2015 01:25:23 +0000"  >&lt;p&gt;Server: Master, Build# 3266, Tag 2.7.64 , RHEL 7 &lt;br/&gt;
Client: 2.5.5, b2_5_fe/62&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/e4f27f18-9fff-11e5-a33d-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/e4f27f18-9fff-11e5-a33d-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="138996" author="simmonsja" created="Fri, 15 Jan 2016 01:36:43 +0000"  >&lt;p&gt;I know why this test is failing. Symlinks are being created by the obdfilter into the osd-* layer for writehthrough_cache_enable, read cache_max_filesize, read_cache_enable and brw_stats. This works for ldiskfs but not ZFS. ZFS only has brw_stats but lacks the rest. This is why sanity test 151 fails for ZFS.&lt;/p&gt;</comment>
                            <comment id="139000" author="adilger" created="Fri, 15 Jan 2016 02:23:17 +0000"  >&lt;p&gt;James, wouldn&apos;t that cause review-zfs to fail all the time?  It appears that test_151 has checks for &lt;tt&gt;read_cache_enable&lt;/tt&gt; and &lt;tt&gt;writethrough_cache_enable&lt;/tt&gt;, though it does &quot;return 0&quot; instead of &quot;skip&quot; as it probably should.&lt;/p&gt;</comment>
                            <comment id="139002" author="simmonsja" created="Fri, 15 Jan 2016 02:53:42 +0000"  >&lt;p&gt;The source of the failure is get_osd_param(). It should be reporting that those parameters don&apos;t exist. Its doing a &apos;grep -v &apos;Found no match&apos; but that is not being reported by lctl get_param. Running the command manually gives:&lt;/p&gt;

&lt;p&gt;root@ninja11 lustre-OST0000]# lctl get_param -n obdfilter.lustre-OST0000.read_cache_enable&lt;br/&gt;
error: list_param: obdfilter/lustre-OST0000/read_cache_enable: No such file or directory&lt;/p&gt;

&lt;p&gt;Ah yes I moved from the custom globerrstr() to the standard strerror(...). The failure in this case is due to the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5030&quot; title=&quot;&amp;quot;lctl {get,set}_param&amp;quot; should also check in /sys/fs/{lnet,lustre}&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5030&quot;&gt;&lt;del&gt;LU-5030&lt;/del&gt;&lt;/a&gt; changes.&lt;/p&gt;</comment>
                            <comment id="139003" author="simmonsja" created="Fri, 15 Jan 2016 03:20:46 +0000"  >&lt;p&gt;I&apos;m thinking the &quot;grep -v &apos;Found no match&apos;&quot; test might not always work. I&apos;m exploring testing the return value &quot;$?&quot; of the command. I like to test to see if &quot;$?&quot; is less than zero. Would something like this work?&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;do_nodes $nodes &quot;$LCTL set_param -n obdfilter.$device.$name=$value \
-              osd-*.$device.$name=$value 2&amp;gt;&amp;amp;1&quot; | grep -v &apos;Found no match&apos;
+              osd-*.$device.$name=$value 2&amp;gt;&amp;amp;1&quot; || return [ $? -lt 0 ]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Sorry not the greatest bash scripter.&lt;/p&gt;
</comment>
                            <comment id="139014" author="adilger" created="Fri, 15 Jan 2016 11:38:50 +0000"  >&lt;p&gt;No, because bash always returns positive error numbers and not negative ones.  You could check &lt;tt&gt;[ $? -ne 0 ]&lt;/tt&gt; but that might as well just be &lt;tt&gt;return $?&lt;/tt&gt;, which is also the default behaviour when returning from a function - to return the exit code from the last function.  The other question is whether &quot;lctl set_param&quot; actually returns an error code on errors, or just prints a message?&lt;/p&gt;

&lt;p&gt;In this case, you might be better off using &lt;tt&gt;| egrep -v &apos;Found no match|no such file or directory&apos;&lt;/tt&gt; or similar, to ensure it works for both old and new lctl, since this will also run in interop mode with servers that do not have your patches.  Is there a reason you got rid of globerrstr() and went to strerror()?&lt;/p&gt;</comment>
                            <comment id="139020" author="simmonsja" created="Fri, 15 Jan 2016 15:09:30 +0000"  >&lt;p&gt;globstrerr only handled 3 error cases. The move to cfs_get_paths() expanded the possible errors. I have a working solution now. Just pushed the patch.&lt;/p&gt;</comment>
                            <comment id="139336" author="standan" created="Tue, 19 Jan 2016 23:07:12 +0000"  >&lt;p&gt;Another instance found for interop : EL6.7 Server/2.5.5 Client&lt;br/&gt;
Server: master, build# 3303, RHEL 6.7&lt;br/&gt;
Client: 2.5.5, b2_5_fe/62&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/24b4b54e-bad6-11e5-9137-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/24b4b54e-bad6-11e5-9137-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="141872" author="standan" created="Wed, 10 Feb 2016 22:20:29 +0000"  >&lt;p&gt;Another instance found for interop tag 2.7.66 - EL6.7 Server/2.5.5 Client, build# 3316&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/9ed7c1d8-cc9f-11e5-963e-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/9ed7c1d8-cc9f-11e5-963e-5254006e85c2&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Another instance found for interop tag 2.7.66 - EL7 Server/2.5.5 Client, build# 3316&lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/5ea975e2-cc46-11e5-901d-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/5ea975e2-cc46-11e5-901d-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="143599" author="standan" created="Wed, 24 Feb 2016 17:25:48 +0000"  >&lt;p&gt;Another instance found for interop - EL6.7 Server/2.5.5 Client, tag 2.7.90. &lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sessions/f99a2d60-d567-11e5-bc47-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sessions/f99a2d60-d567-11e5-bc47-5254006e85c2&lt;/a&gt;&lt;br/&gt;
Another instance found for interop - EL7 Server/2.5.5 Client, tag 2.7.90. &lt;br/&gt;
&lt;a href=&quot;https://testing.hpdd.intel.com/test_sessions/93baffee-d2ae-11e5-8697-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sessions/93baffee-d2ae-11e5-8697-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="233254" author="simmonsja" created="Mon, 10 Sep 2018 16:41:24 +0000"  >&lt;p&gt;Can we close this?&lt;/p&gt;</comment>
                            <comment id="233364" author="adilger" created="Wed, 12 Sep 2018 00:28:57 +0000"  >&lt;p&gt;Recent failures reporting this ticket were caused by &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11347&quot; title=&quot;Do not use pagecache for SSD I/O when read/write cache are disabled&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11347&quot;&gt;&lt;del&gt;LU-11347&lt;/del&gt;&lt;/a&gt; patch, this hasn&apos;t been hit in a long time.&lt;/p&gt;</comment>
                            <comment id="240733" author="simmonsja" created="Fri, 25 Jan 2019 19:33:03 +0000"  >&lt;p&gt;I&apos;m seeing this bug again &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/sad.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&#160;in 2.12.50 testing.&lt;/p&gt;</comment>
                            <comment id="240771" author="adilger" created="Sun, 27 Jan 2019 21:10:56 +0000"  >&lt;p&gt;Recent failures were triggered by &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11607&quot; title=&quot;Reduce repeated function calls in Lustre test suites&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11607&quot;&gt;&lt;del&gt;LU-11607&lt;/del&gt;&lt;/a&gt; patch landing, but it turns out the problem was in the original &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2261&quot; title=&quot;Add cache stats to zfs osd&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2261&quot;&gt;&lt;del&gt;LU-2261&lt;/del&gt;&lt;/a&gt; patch.  See &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11889&quot; title=&quot;sanity test 156 fails on ZFS: roc_hit not safe to use&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11889&quot;&gt;LU-11889&lt;/a&gt; for details.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="54667">LU-11889</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="17754">LU-2902</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="53231">LU-11347</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="16546">LU-2261</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="53901">LU-11607</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwrtr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>14980</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>