<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:15:19 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-15086] replay-dual: test_10: @@@@@@ FAIL: test_10 failed with 2 </title>
                <link>https://jira.whamcloud.com/browse/LU-15086</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Vladimir Saveliev &amp;lt;vlaidimir.saveliev@hpe.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/2f1edc30-7ddf-4a6c-bc7a-fffbf58435b8&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/2f1edc30-7ddf-4a6c-bc7a-fffbf58435b8&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;This is relatively regular failure:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Error: &apos;test_10 failed with 2&apos; 
Failure Rate: 6.00% of most recent 100 runs, 0 skipped (all branches)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="66594">LU-15086</key>
            <summary>replay-dual: test_10: @@@@@@ FAIL: test_10 failed with 2 </summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bzzz">Alex Zhuravlev</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Tue, 12 Oct 2021 15:42:55 +0000</created>
                <updated>Sat, 20 Nov 2021 14:49:22 +0000</updated>
                            <resolved>Sat, 20 Nov 2021 14:49:22 +0000</resolved>
                                                    <fixVersion>Lustre 2.15.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="315324" author="adilger" created="Tue, 12 Oct 2021 17:52:16 +0000"  >&lt;p&gt;It looks like this has been failing intermittently (once per 4-5 days on full sessions only) since at least 2021-01-01 (the earliest I checked).  Since 2021-10-10 it has also failed on 4 review test sessions, and as such is failing more frequently (6x in 2 days).  It would be worthwhile to check what changes landed on 10-10 that might have triggered this. &lt;/p&gt;</comment>
                            <comment id="316047" author="bzzz" created="Wed, 20 Oct 2021 10:29:49 +0000"  >&lt;p&gt;my bisection points to one patch:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
COMMIT		TESTED	PASSED	 FAILED		COMMIT DESCRIPTION
8a770616a5	5	4	1	BAD	 LU-14797 sec: add projid to nodemap
bbfdc7c167	17	15	2	BAD	 LU-14739 quota: fix quota with root squash enabled
da1d93513f	7	6	1	BAD	 LU-14475 log: Rewrite some log messages
885b494632	18	17	1	BAD	 LU-12362 ptlrpc: use wait_woken() in ptlrpcd()
9ec5e2329b	90	90	0		 LU-6142 lod: &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; pools_hash_params to being &lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt;.
2a24b6ec67	90	90	0		 LU-14734 ldiskfs: improve message &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; large_dir
f7f31f8f96	90	90	0		 LU-12567 ptlrpc: handle reply and resend reorder
1a409a3e6a	90	90	0		 LU-14711 osc: Do not attempt sending empty pages
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I &lt;em&gt;think&lt;/em&gt; there are two potential issues in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12362&quot; title=&quot;kernel warning &amp;#39;do not call blocking ops when !TASK_RUNNING &amp;#39; in ptlrpcd&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12362&quot;&gt;&lt;del&gt;LU-12362&lt;/del&gt;&lt;/a&gt;:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
-               &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (timeout == 0)
-                       wait_event_idle(set-&amp;gt;set_waitq,
-                                       ptlrpcd_check(&amp;amp;env, pc));
-               &lt;span class=&quot;code-keyword&quot;&gt;else&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (wait_event_idle_timeout(set-&amp;gt;set_waitq,
-                                                ptlrpcd_check(&amp;amp;env, pc),
-                                                cfs_time_seconds(timeout))
-                        == 0)
+
+               add_wait_queue(&amp;amp;set-&amp;gt;set_waitq, &amp;amp;wait);
+               &lt;span class=&quot;code-keyword&quot;&gt;while&lt;/span&gt; (!ptlrpcd_check(&amp;amp;env, pc)) {
+                       &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; ret;
+
+                       &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (timeout == 0)
+                               ret = wait_woken(&amp;amp;wait, TASK_IDLE,
+                                                MAX_SCHEDULE_TIMEOUT);
+                       &lt;span class=&quot;code-keyword&quot;&gt;else&lt;/span&gt;
+                               ret = wait_woken(&amp;amp;wait, TASK_IDLE,
+                                                cfs_time_seconds(timeout));
+                       &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (ret != 0)
+                               &lt;span class=&quot;code-keyword&quot;&gt;continue&lt;/span&gt;;
+                       &lt;span class=&quot;code-comment&quot;&gt;/* Timed out */&lt;/span&gt;
                        ptlrpc_expired_set(set);
+                       &lt;span class=&quot;code-keyword&quot;&gt;break&lt;/span&gt;;
+               }
+               remove_wait_queue(&amp;amp;set-&amp;gt;set_waitq, &amp;amp;wait);
+
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;1) timeout is reused after spurious wakeup, so this can last longer than expected&lt;br/&gt;
2) ptlrpc_expired_set() can be called for the case when timeout=0&lt;/p&gt;
</comment>
                            <comment id="316051" author="bzzz" created="Wed, 20 Oct 2021 11:02:01 +0000"  >&lt;p&gt;added a simple check to pltrpcd to catch too long waiting:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;LustreError: 5019:0:(ptlrpcd.c:515:ptlrpcd()) ASSERTION( timeout == 0 || end - start &amp;lt; timeout + 4 ) failed: timeout 10, end 97, start 83, diff 14, count 3&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;i.e. 10 seconds wait was requested, but actually spent 14 seconds waiting, wait_woken() was called 3 times.&lt;/p&gt;

&lt;p&gt;yet another interesting example from failed replay-dual:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;LustreError: 5009:0:(ptlrpcd.c:513:ptlrpcd()) timeout 19, end 711, start 620, diff 91, count 9&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="316054" author="gerrit" created="Wed, 20 Oct 2021 11:13:58 +0000"  >&lt;p&gt;&quot;Alex Zhuravlev &amp;lt;bzzz@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/45308&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45308&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15086&quot; title=&quot;replay-dual: test_10: @@@@@@ FAIL: test_10 failed with 2 &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15086&quot;&gt;&lt;del&gt;LU-15086&lt;/del&gt;&lt;/a&gt; ptlrpc: fix timeout after spurious wakeup&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: a495c00874105caa299fdf4dfaa482df9b24ad2e&lt;/p&gt;</comment>
                            <comment id="316058" author="pjones" created="Wed, 20 Oct 2021 12:08:27 +0000"  >&lt;p&gt;Would we be better off reverting &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12362&quot; title=&quot;kernel warning &amp;#39;do not call blocking ops when !TASK_RUNNING &amp;#39; in ptlrpcd&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12362&quot;&gt;&lt;del&gt;LU-12362&lt;/del&gt;&lt;/a&gt; from master?&lt;/p&gt;</comment>
                            <comment id="316060" author="bzzz" created="Wed, 20 Oct 2021 12:21:28 +0000"  >&lt;p&gt;with the patch above I can&apos;t reproduce the problem. would like to hear Neil&apos;s opinion.&lt;/p&gt;</comment>
                            <comment id="318742" author="gerrit" created="Sat, 20 Nov 2021 06:26:31 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/45308/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/45308/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15086&quot; title=&quot;replay-dual: test_10: @@@@@@ FAIL: test_10 failed with 2 &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15086&quot;&gt;&lt;del&gt;LU-15086&lt;/del&gt;&lt;/a&gt; ptlrpc: fix timeout after spurious wakeup&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: b8383035406a4b7bee2e6d8674eaef480b3e3b35&lt;/p&gt;</comment>
                            <comment id="318782" author="pjones" created="Sat, 20 Nov 2021 14:49:22 +0000"  >&lt;p&gt;Landed for 2.15&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i026y7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>