<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:06:13 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-14027] Client recovery statemachine hangs in recovery disconnected during lock reply</title>
                <link>https://jira.whamcloud.com/browse/LU-14027</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13600&quot; title=&quot;limit number of RPCs in flight during recovery&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13600&quot;&gt;&lt;del&gt;LU-13600&lt;/del&gt;&lt;/a&gt; introduced lock ratelimiting logic, but it did not take into account that if there&apos;s a disconnection in the REPLAY_LOCKS phase then yet unsent locks get stuck in the sending queue so the replay locks thread hangs with imp_replay_inflight elevated above zero.&lt;/p&gt;

&lt;p&gt;The direct consequence from that is recovery state machine never advances from REPLAY to REPLAY_LOCKS status when imp_replay_inflight is non zero:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (imp-&amp;gt;imp_state == LUSTRE_IMP_REPLAY) {
                CDEBUG(D_HA, &lt;span class=&quot;code-quote&quot;&gt;&quot;replay requested by %s\n&quot;&lt;/span&gt;,
                       obd2cli_tgt(imp-&amp;gt;imp_obd));
                rc = ptlrpc_replay_next(imp, &amp;amp;inflight);
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (inflight == 0 &amp;amp;&amp;amp;
                    atomic_read(&amp;amp;imp-&amp;gt;imp_replay_inflight) == 0) {
                        import_set_state(imp, LUSTRE_IMP_REPLAY_LOCKS);
                        rc = ldlm_replay_locks(imp);
                        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (rc)
                                GOTO(out, rc);
                }
                rc = 0;
        }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;To break this we either need to check import state in the replay locks thread before attempting any sending or make sure replay_one_lock() prepares resend requests in such a state that they are never stuck.&lt;/p&gt;</description>
                <environment></environment>
        <key id="61196">LU-14027</key>
            <summary>Client recovery statemachine hangs in recovery disconnected during lock reply</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="green">Oleg Drokin</assignee>
                                    <reporter username="green">Oleg Drokin</reporter>
                        <labels>
                    </labels>
                <created>Wed, 14 Oct 2020 03:48:32 +0000</created>
                <updated>Wed, 25 Oct 2023 17:45:12 +0000</updated>
                            <resolved>Thu, 19 Nov 2020 15:14:45 +0000</resolved>
                                    <version>Lustre 2.14.0</version>
                    <version>Lustre 2.12.6</version>
                                    <fixVersion>Lustre 2.14.0</fixVersion>
                    <fixVersion>Lustre 2.12.7</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="282182" author="gerrit" created="Wed, 14 Oct 2020 04:00:49 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/40238&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/40238&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14027&quot; title=&quot;Client recovery statemachine hangs in recovery disconnected during lock reply&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14027&quot;&gt;&lt;del&gt;LU-14027&lt;/del&gt;&lt;/a&gt; ldlm: Do not hang if recovery restarted during lock replay&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 1ed765025e4a2b34ff992cb5c461557bc35ad154&lt;/p&gt;</comment>
                            <comment id="282432" author="gerrit" created="Fri, 16 Oct 2020 14:32:34 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/40272&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/40272&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14027&quot; title=&quot;Client recovery statemachine hangs in recovery disconnected during lock reply&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14027&quot;&gt;&lt;del&gt;LU-14027&lt;/del&gt;&lt;/a&gt; ldlm: Do not wait for lock replay sending if import dsconnected&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: bffa4ae3a3c38f7cd3bea2b7fbf8e09df98e46a0&lt;/p&gt;</comment>
                            <comment id="285547" author="gerrit" created="Thu, 19 Nov 2020 10:20:42 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/40272/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/40272/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14027&quot; title=&quot;Client recovery statemachine hangs in recovery disconnected during lock reply&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14027&quot;&gt;&lt;del&gt;LU-14027&lt;/del&gt;&lt;/a&gt; ldlm: Do not wait for lock replay sending if import dsconnected&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: f06a4efe13faca21ae2a6afcf5718d748bb6ac5d&lt;/p&gt;</comment>
                            <comment id="285583" author="gerrit" created="Thu, 19 Nov 2020 15:11:16 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/40238/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/40238/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14027&quot; title=&quot;Client recovery statemachine hangs in recovery disconnected during lock reply&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14027&quot;&gt;&lt;del&gt;LU-14027&lt;/del&gt;&lt;/a&gt; ldlm: Do not hang if recovery restarted during lock replay&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 7ca495ec67f474e10352077fc40123e4818b8e69&lt;/p&gt;</comment>
                            <comment id="285585" author="pjones" created="Thu, 19 Nov 2020 15:14:45 +0000"  >&lt;p&gt;Landed for 2.14&lt;/p&gt;</comment>
                            <comment id="289487" author="gerrit" created="Thu, 14 Jan 2021 15:58:51 +0000"  >&lt;p&gt;Etienne AUJAMES (eaujames@ddn.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/41223&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41223&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14027&quot; title=&quot;Client recovery statemachine hangs in recovery disconnected during lock reply&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14027&quot;&gt;&lt;del&gt;LU-14027&lt;/del&gt;&lt;/a&gt; ldlm: Do not wait for lock replay sending if import dsconnected&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: d5f9742667dab11393c602807e918c9eb8793b2b&lt;/p&gt;</comment>
                            <comment id="289488" author="gerrit" created="Thu, 14 Jan 2021 15:58:52 +0000"  >&lt;p&gt;Etienne AUJAMES (eaujames@ddn.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/41224&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41224&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14027&quot; title=&quot;Client recovery statemachine hangs in recovery disconnected during lock reply&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14027&quot;&gt;&lt;del&gt;LU-14027&lt;/del&gt;&lt;/a&gt; ldlm: Do not hang if recovery restarted during lock replay&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: c54bb57e1687f0db23753eea0b100cc5071d916a&lt;/p&gt;</comment>
                            <comment id="289494" author="eaujames" created="Thu, 14 Jan 2021 16:20:04 +0000"  >&lt;p&gt;The patch above fix the &lt;a href=&quot;https://review.whamcloud.com/39111/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/39111/&lt;/a&gt; (&quot;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13600&quot; title=&quot;limit number of RPCs in flight during recovery&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13600&quot;&gt;&lt;del&gt;LU-13600&lt;/del&gt;&lt;/a&gt; ptlrpc: limit rate of lock replays&quot;) on b2_12 branch.&lt;/p&gt;</comment>
                            <comment id="289502" author="gerrit" created="Thu, 14 Jan 2021 17:18:07 +0000"  >&lt;p&gt;Etienne AUJAMES (eaujames@ddn.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/41227&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41227&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14027&quot; title=&quot;Client recovery statemachine hangs in recovery disconnected during lock reply&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14027&quot;&gt;&lt;del&gt;LU-14027&lt;/del&gt;&lt;/a&gt; tests: Fix test_135 of replay-single&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: a2d9b877521a0198f333228b117380c5c855e6e8&lt;/p&gt;</comment>
                            <comment id="293920" author="gerrit" created="Thu, 4 Mar 2021 08:36:41 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/41223/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41223/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14027&quot; title=&quot;Client recovery statemachine hangs in recovery disconnected during lock reply&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14027&quot;&gt;&lt;del&gt;LU-14027&lt;/del&gt;&lt;/a&gt; ldlm: Do not wait for lock replay sending if import dsconnected&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 2bcc166b0a660afab62d96ede496f42c31ada94b&lt;/p&gt;</comment>
                            <comment id="293921" author="gerrit" created="Thu, 4 Mar 2021 08:36:45 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/41224/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41224/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14027&quot; title=&quot;Client recovery statemachine hangs in recovery disconnected during lock reply&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14027&quot;&gt;&lt;del&gt;LU-14027&lt;/del&gt;&lt;/a&gt; ldlm: Do not hang if recovery restarted during lock replay&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 5fa7c8f24e71187a0c3ac70a04a8b566de5a76f3&lt;/p&gt;</comment>
                            <comment id="390554" author="gerrit" created="Wed, 25 Oct 2023 17:45:12 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/41227/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/41227/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14027&quot; title=&quot;Client recovery statemachine hangs in recovery disconnected during lock reply&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14027&quot;&gt;&lt;del&gt;LU-14027&lt;/del&gt;&lt;/a&gt; tests: Fix test_135 of replay-single&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: fab71963c2513ec8f4eff2c1636c767c47a46034&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10120">
                    <name>Blocker</name>
                                            <outwardlinks description="is blocking">
                                                        </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="59319">LU-13600</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="76849">LU-16943</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i01caf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>